feat: youtube_subs + dashboard includ descrierea video ca index

- tools/youtube_subs.py: get_subtitles() returneaza acum (title, desc, transcript).
  Functii noi is_description_about_video() si extract_relevant_description()
  detecteaza daca descrierea contine capitole/timestamps (nu doar promotie autori)
  si curata trailing-urile promotionale inainte sa includa descrierea in output.
- dashboard/handlers/youtube.py: aceleasi functii adaugate; nota KB generata
  include acum un bloc "Descriere / Index" daca descrierea e relevanta pentru video.
- memory/kb/youtube: nota Jeremy Grantham (AI bubble, investitii, toxicitate)
  cu descrierea ca index de capitole.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-27 17:00:59 +00:00
parent a8d024944d
commit 6e9dfd137c
4 changed files with 444 additions and 27 deletions

View File

@@ -36,6 +36,50 @@ def _clean_vtt(content):
return ' '.join(lines)
def _is_description_about_video(description):
"""Return True if description contains info about the video (chapters/topics)."""
if not description or len(description.strip()) < 50:
return False
timestamp_pattern = re.compile(r'\b\d{1,2}:\d{2}(:\d{2})?\b')
if len(timestamp_pattern.findall(description)) >= 3:
return True
lines = description.strip().split('\n')
bullet_lines = [l for l in lines if re.match(r'^\s*[◼•\-\*▶►]\s+\S', l)]
if len(bullet_lines) >= 3:
return True
numbered_lines = [l for l in lines if re.match(r'^\s*\d+[\.\)]\s+\S', l)]
if len(numbered_lines) >= 3:
return True
return False
def _extract_relevant_description(description):
"""Strip promotional tails (links, social media) from description."""
if not description:
return ""
promo_patterns = [
re.compile(r'https?://\S+'),
re.compile(r'instagram|twitter|facebook|tiktok|linkedin|patreon|spotify', re.I),
re.compile(r'follow|subscribe|newsletter|merch|sponsor|affiliate', re.I),
re.compile(r'purchase|buy|order|shop|store', re.I),
]
result_lines = []
promo_streak = 0
for line in description.strip().split('\n'):
stripped = line.strip()
is_promo = any(p.search(stripped) for p in promo_patterns)
if is_promo:
promo_streak += 1
if promo_streak >= 2:
break
else:
promo_streak = 0
result_lines.append(line)
while result_lines and not result_lines[-1].strip():
result_lines.pop()
return '\n'.join(result_lines)
def _process_youtube(url):
"""Download subtitles, save note."""
yt_dlp = os.path.expanduser('~/.local/bin/yt-dlp')
@@ -51,6 +95,7 @@ def _process_youtube(url):
info = json.loads(result.stdout)
title = info.get('title', 'Unknown')
duration = info.get('duration', 0)
description = info.get('description', '')
temp_dir = Path('/tmp/yt_subs')
temp_dir.mkdir(exist_ok=True)
@@ -78,6 +123,13 @@ def _process_youtube(url):
slug = re.sub(r'[^\w\s-]', '', title.lower())[:50].strip().replace(' ', '-')
filename = f"{date_str}_{slug}.md"
# Build optional description block
desc_block = ""
if _is_description_about_video(description):
relevant_desc = _extract_relevant_description(description)
if relevant_desc:
desc_block = f"\n## Descriere / Index\n\n{relevant_desc}\n\n---\n"
note_content = f"""# {title}
**Video:** {url}
@@ -86,7 +138,7 @@ def _process_youtube(url):
**Tags:** #youtube #to-summarize
---
{desc_block}
## Transcript
{transcript[:15000]}

View File

@@ -1,5 +1,185 @@
{
"notes": [
{
"file": "notes-data/youtube/2026-06-27_jeremy-grantham-ai-bubble-warning.md",
"title": "Billionaire's WARNING: I'm SELLING. The Crash Is Already Here! — Jeremy Grantham",
"date": "2026-06-27",
"tags": [
"investitii",
"bubble",
"AI",
"sanatate",
"economie"
],
"domains": [
"growth",
"work"
],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Jeremy Grantham (60 ani experienta, a gestionat 165 miliarde $) avertizeaza ca ne aflam in cea mai mare bula investitionala din istoria americana — AI. Sfatul sau: vinde actiunile US tech acum, nu ast..."
},
{
"file": "notes-data/youtube/2026-06-27_google-open-knowledge-format.md",
"title": "Google's New Release Just Fixed AI Systems (Open Knowledge Format)",
"date": "2026-06-27",
"tags": [],
"domains": [
"work",
"growth"
],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Google a lansat **Open Knowledge Format (OKF)** — un standard pentru organizarea knowledge base-urilor astfel încât agenții AI să navigheze mai eficient. Se bazează pe pattern-ul LLM Wiki al lui Andre..."
},
{
"file": "notes-data/youtube/2026-06-25_google-agentic-engineering-masterclass.md",
"title": "Google Just Dropped a Masterclass on Agentic Engineering",
"date": "2026-06-25",
"tags": [],
"domains": [
"work",
"growth"
],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Google a publicat un ghid de 51 de pagini despre AI-driven SDLC (Software Development Life Cycle). Concluzia centrală: **harness-ul (regulile, workflow-urile, tool-urile, guardrails) contează 90%, mod..."
},
{
"file": "notes-data/youtube/2026-06-24_codie-sanchez-3s-breakthrough.md",
"title": "#1 Biggest Mistake Blocking Your Breakthrough (Codie Sanchez)",
"date": "2026-06-24",
"tags": [],
"domains": [],
"types": [
"coaching"
],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Tony Robbins (neidentificat explicit, dar stilul și conținutul sunt clare) explică de ce oamenii eșuează să aibă un breakthrough: atacă problemele în ordinea greșită. Cei 3 S ai unui breakthrough treb..."
},
{
"file": "notes-data/youtube/2026-06-23_remote-boring-businesses.md",
"title": "100% REMOTE Boring Businesses (That Almost Never Fail)",
"date": "2026-06-23",
"tags": [],
"domains": [
"work",
"growth"
],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Fondatorul unui business de $23M/lună face un ranking al afacerilor remote. Concluzia: cele mai bune nu sunt cele \"sexy\" (dropshipping, SEO, FBA) ci **expertiza + proces + autoritate** — adică afaceri..."
},
{
"file": "notes-data/youtube/2026-06-21_claude-code-anki-setup.md",
"title": "This Claude Code Setup Changed My Life (Seriously…)",
"date": "2026-06-21",
"tags": [],
"domains": [
"growth",
"work"
],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Combini Claude Code cu Anki (prin Anki Connect add-on) pentru a automatiza crearea și optimizarea flashcard-urilor. Claude Code citește videoclipuri, lecturi, transcrieri și generează automat carduri ..."
},
{
"file": "notes-data/youtube/2026-06-19_business-gurus-5m-review.md",
"title": "We Spent $5M on Business Gurus, So You Don't Have To",
"date": "2026-06-19",
"tags": [],
"domains": [],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Doi antreprenori cu afaceri de 8-9 cifre (Nick Fischer - New Reach, $150M+/an) analizează cele mai valoroase cursuri și guru-uri în care au investit colectiv $5M+. Concluzia principală: primele câteva..."
},
{
"file": "notes-data/youtube/2026-06-19_matt-pocock-agentic-engineering-workflow.md",
"title": "Matt Pocock's Agentic Engineering Workflow (just copy him)",
"date": "2026-06-19",
"tags": [],
"domains": [
"work",
"growth"
],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Matt Pocock (educator TypeScript, autor skills pentru Claude Code) explica filosofia sa de lucru cu AI: nu modelul conteaza cel mai mult, ci harness-ul (setup-ul, skill-urile, codebase-ul). AI a \"manc..."
},
{
"file": "notes-data/youtube/2026-06-14_claude-trading-102k.md",
"title": "I Tested Letting Claude Trade For A Month and Made $102k",
"date": "2026-06-14",
"tags": [],
"domains": [],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Un trader cu background în matematică și finanțe a folosit Claude ca analist și portfolio manager timp de o lună (mai 2026), începând cu $66k și terminând cu ~$169k (+155%). Claude a propus strategia,..."
},
{
"file": "notes-data/youtube/2026-06-12_iulia-borcsa-suplimente.md",
"title": "Dezvoltator Suplimente: \"Producătorii De Vitamine Au Un Truc Ascuns\" | Iulia Borcsa | Gândește Diferit",
"date": "2026-06-12",
"tags": [],
"domains": [
"health",
"growth"
],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Iulia Borcsa, cercetător și dezvoltator de suplimente în Germania (~10 ani), explică ce nu știe consumatorul mediu despre industria suplimentelor: aditivi ascunși, forme cu biodisponibilitate scăzută,..."
},
{
"file": "notes-data/youtube/2026-06-09_top-1-percent-claude-code-loops.md",
"title": "How the Top 1% Actually Run Claude Code Now",
"date": "2026-06-09",
"tags": [
"loops",
"agents",
"automation",
"claude-code"
],
"domains": [
"work",
"growth"
],
"types": [],
"category": "youtube",
"project": null,
"subdir": null,
"video": "",
"tldr": "Videoul descrie tranziția de la Stage 2 (juglezi manual mai mulți agenți) la Stage 3 (proiectezi loop-uri autonome care promtează agenții în locul tău). Unitatea de muncă nu mai e prompt-ul individual..."
},
{
"file": "notes-data/facebook/2026-06-07_7-micro-habits-rewire-happiness.md",
"title": "7 Micro Habits That Rewire Your Happiness",
@@ -9878,11 +10058,11 @@
}
],
"stats": {
"total": 570,
"total": 581,
"by_domain": {
"work": 192,
"health": 100,
"growth": 259,
"work": 199,
"health": 101,
"growth": 267,
"sprijin": 39,
"scout": 9
},
@@ -9899,7 +10079,7 @@
"reflectii": 3,
"retete": 1,
"tools": 7,
"youtube": 140,
"youtube": 151,
"memory": 44
}
},

View File

@@ -0,0 +1,99 @@
# Billionaire's WARNING: I'm SELLING. The Crash Is Already Here! — Jeremy Grantham
**Sursa:** https://www.youtube.com/watch?v=32u5T6lO8qk
**Durata:** 1h 45min
**Data notita:** 2026-06-27
**Tags:** @growth @work #investitii #bubble #AI #sanatate #economie
---
## TL;DR
Jeremy Grantham (60 ani experienta, a gestionat 165 miliarde $) avertizeaza ca ne aflam in cea mai mare bula investitionala din istoria americana — AI. Sfatul sau: vinde actiunile US tech acum, nu astepta ajutor de la Wall Street. In plus, discuta despre impactul toxic al chimicalelor asupra fertilitatii si declinul societatii americane.
---
## Puncte cheie
- **Bula AI e reala si imensa.** Grantham a prezis prabusirea dot-com si criza din 2007. Spune ca bula AI va fi cea mai mare din istoria SUA. Indicatorii sunt identici cu cele anterioare: optimism irrational, suprainvestitie, prospecte ridicole (ex: SpaceX promite sa mineze asteroizi si sa adreseze 25% din PIB-ul global).
- **Nu detine actiuni US.** Sfatul sau clar: vinde-le pe toate, mai ales tech. S&P 500? Nu. Actiuni tech mari? Nu. Alternativele: actiuni japoneze, europene, pietele emergente, resurse naturale.
- **Bitcoin merge la zero.** "O piesa inutila de nonsens care nu faciliteaza nimic in afara de miscarea banilor criminali care nu vor sa fie vazuti."
- **Wall Street nu te va avertiza niciodata.** Institutiile mari nu pot lua pozitii contrare pentru ca risca cariera. Trebuie sa te uiti singur la date. O bula e usor de vazut: e un plafon plat si apoi un varf himalayan — care mereu se intoarce in jos.
- **Chimicale toxice = criza de fertilitate.** Numarul de spermatozoizi a scazut dramatic in 50 ani. Cauzele: ftalati, BPA, pesticide. Problema e mai acuta in SUA decat in Europa (reglementari mai slabe). Solutie: mancare organica, evitare plasticuri, filtrare apa.
- **Inegalitate si declin social.** SUA ara spectaculos la media avutiei dar dezastruos pentru quartila de jos. "Singurii oameni care cred ca poti avea crestere compusa pe o planeta finita sunt nebunii si economistii."
- **Unde sa traiesti.** Refuza sa spuna direct, dar implicit: nu SUA. Danemarca si tarile nordice ofera speranta de viata mai mare, securitate sociala mai buna, rate ale criminaltatii mai mici.
---
## Descriere / Index capitole
The man who predicted the dot-com crash and the 2007 housing collapse warns that the AI bubble is the biggest in American history. Billionaire investor Jeremy Grantham reveals why it will burst, the exact strategy to protect your money, and why house prices need to fall 30%.
He explains:
◼ Why Wall Street will never warn you when to get out of the market, and what to do instead
◼ The exact portfolio Jeremy recommends to protect your money before the crash
◼ What everyday chemicals in your food and cosmetics are doing to your fertility
◼ Why house prices need to fall 30%, and what it means for your finances
◼ Why the AI boom won't automatically lead to higher profits, and what to buy instead
**Capitole:**
- 00:00:00 Who Is Jeremy Grantham?
- 00:02:54 Will AI Become The Next Financial Bubble?
- 00:06:57 How Jeremy Grantham Built An Investing Empire
- 00:09:18 What Happens When The AI Bubble Bursts?
- 00:11:35 How AI Will Change Everyday Life
- 00:12:53 The Investing Strategy For Right Now
- 00:18:12 Why You Should Avoid US Stocks
- 00:20:13 Why Investment Advisors Mislead Clients
- 00:26:09 Advice For Entrepreneurs Right Now
- 00:28:59 The Real Risks Of AI
- 00:36:21 The Battle Between The Magnificent 7
- 00:41:57 Which Jobs AI Will Replace First
- 00:44:18 Will SpaceX Eventually Fail?
- 00:50:40 The Most Valuable Skill For The Future
- 00:51:41 Is Society Declining And What Comes Next?
- 00:54:02 What History Says About Wealth Inequality
- 00:57:59 How To Build Wealth In Your 30s Today
- 01:00:08 How To Invest Your Salary Wisely
- 01:02:58 Should You Own Crypto?
- 01:04:05 Is Property Still A Good Investment?
- 01:07:27 What's Really Causing The Baby Bust?
- 01:14:24 How Microplastics Affect Fertility
- 01:16:42 How Pesticides Impact Fertility
- 01:21:43 How To Reduce Toxic Chemical Exposure
- 01:27:30 How To Stay Healthy In A Toxic World
- 01:35:55 The Flaw That Destroys Societies
- 01:39:22 The Best Places To Live Today
---
## Quote-uri memorabile
> "Don't own US stocks. That's a simple strategy that you can act on."
> "Bubbles always occur around the very most important ideas — railroads, internet, AI. The bigger the idea, the bigger the bubble, and the bigger the bust."
> "Amazon went up 6-7x in '99. In the crash, it went down 92%. Then out of the wreckage, it inherited the retail world. That's how it works."
> "The only people who think you can have compound growth on a finite planet are madmen and economists." — Kenneth Boulding
> "Large enterprises almost never get the big turning points because they can't take the career risk involved. The central political skill in life: never be wrong on your own."
> "Get out of the most dangerous part, and do it now. Don't wait for help because no help is coming."
> "Crypto is an unnecessary piece of nonsense that facilitates nothing except criminals moving money."
---
## Idei actionabile
- [ ] Evalueaza expunerea la actiuni US/tech din portofoliu
- [ ] Cerceteaza ETF-uri japoneze, europene, piete emergente ca alternative
- [ ] Verifica chimicalele din cosmetice/alimente — ftalati, BPA, pesticide
- [ ] Considera filtrarea apei potabile

View File

@@ -37,6 +37,81 @@ def clean_vtt(content):
return ' '.join(lines)
def is_description_about_video(description):
"""
Determine if the description contains info about the video content
(chapters/timestamps, topics) vs. just author promotion/ads.
Returns True if description is worth including.
"""
if not description or len(description.strip()) < 50:
return False
# Strong signal: contains timestamp markers like 00:00, 0:00:00, 1:23
timestamp_pattern = re.compile(r'\b\d{1,2}:\d{2}(:\d{2})?\b')
timestamp_count = len(timestamp_pattern.findall(description))
if timestamp_count >= 3:
return True
# Strong signal: contains chapter/topic-like bullet lines
lines = description.strip().split('\n')
bullet_lines = [l for l in lines if re.match(r'^\s*[◼•\-\*▶►]\s+\S', l)]
if len(bullet_lines) >= 3:
return True
# Signal: numbered list or clear topic breakdown
numbered_lines = [l for l in lines if re.match(r'^\s*\d+[\.\)]\s+\S', l)]
if len(numbered_lines) >= 3:
return True
return False
def extract_relevant_description(description):
"""
Extract only the relevant parts of the description (about the video).
Removes trailing promotional links, author bio boilerplate, etc.
"""
if not description:
return ""
lines = description.strip().split('\n')
# Find the last line that looks like content (timestamps or bullets or substantive text)
# Cut off at lines that are clearly promotional (links, social media, etc.)
promo_patterns = [
re.compile(r'https?://\S+'), # URLs
re.compile(r'instagram|twitter|facebook|tiktok|linkedin|patreon|spotify', re.I),
re.compile(r'follow|subscribe|newsletter|merch|sponsor|affiliate', re.I),
re.compile(r'purchase|buy|order|shop|store', re.I),
]
result_lines = []
promo_streak = 0
for line in lines:
stripped = line.strip()
# Check if this line is promotional
is_promo = any(p.search(stripped) for p in promo_patterns)
if is_promo:
promo_streak += 1
# Allow isolated promo lines (like a single URL after a chapter list)
# but stop if we hit multiple consecutive promo lines
if promo_streak >= 2:
break
else:
promo_streak = 0
result_lines.append(line)
# Also strip trailing empty lines
while result_lines and not result_lines[-1].strip():
result_lines.pop()
return '\n'.join(result_lines)
def get_subtitles(url, lang='en'):
"""Download subtitles for a YouTube video."""
@@ -50,6 +125,7 @@ def get_subtitles(url, lang='en'):
# First, get video info
title = "Unknown"
description = ""
info_cmd = [yt_dlp, '--js-runtimes', 'node', '--dump-json', '--no-download', url]
result = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30)
print(f"INFO: returncode={result.returncode}, stderr={result.stderr[:200]}", file=sys.stderr)
@@ -57,6 +133,7 @@ def get_subtitles(url, lang='en'):
try:
info = json.loads(result.stdout)
title = info.get('title', 'Unknown')
description = info.get('description', '')
duration = info.get('duration', 0)
print(f"Title: {title}", file=sys.stderr)
print(f"Duration: {duration//60}:{duration%60:02d}", file=sys.stderr)
@@ -86,7 +163,7 @@ def get_subtitles(url, lang='en'):
for ext in ['vtt', 'srt', 'ass']:
for sub_file in temp_dir.glob(f'*.{try_lang}*.{ext}'):
content = sub_file.read_text(encoding='utf-8', errors='replace')
return title, clean_vtt(content)
return title, description, clean_vtt(content)
# Try auto-generated subtitles
for try_lang in lang_preferences:
@@ -107,9 +184,9 @@ def get_subtitles(url, lang='en'):
content = sub_file.read_text(encoding='utf-8', errors='replace')
text = clean_vtt(content)
if text:
return title, text
return title, description, text
return title or "Unknown", None
return title or "Unknown", description, None
if __name__ == '__main__':
if len(sys.argv) < 2:
@@ -119,10 +196,19 @@ if __name__ == '__main__':
url = sys.argv[1]
lang = sys.argv[2] if len(sys.argv) > 2 else 'en'
title, transcript = get_subtitles(url, lang)
title, description, transcript = get_subtitles(url, lang)
if transcript:
print(f"\n=== {title} ===\n")
# Include description if it's about the video content
if description and is_description_about_video(description):
relevant_desc = extract_relevant_description(description)
if relevant_desc:
print("--- Descriere / Index ---")
print(relevant_desc)
print("--- Transcript ---")
print(transcript)
else:
print(f"No subtitles found for: {title}", file=sys.stderr)