From 6e9dfd137c78331e28957ff800738a314436cbf4 Mon Sep 17 00:00:00 2001 From: Marius Mutu Date: Sat, 27 Jun 2026 17:00:59 +0000 Subject: [PATCH] feat: youtube_subs + dashboard includ descrierea video ca index - tools/youtube_subs.py: get_subtitles() returneaza acum (title, desc, transcript). Functii noi is_description_about_video() si extract_relevant_description() detecteaza daca descrierea contine capitole/timestamps (nu doar promotie autori) si curata trailing-urile promotionale inainte sa includa descrierea in output. - dashboard/handlers/youtube.py: aceleasi functii adaugate; nota KB generata include acum un bloc "Descriere / Index" daca descrierea e relevanta pentru video. - memory/kb/youtube: nota Jeremy Grantham (AI bubble, investitii, toxicitate) cu descrierea ca index de capitole. Co-Authored-By: Claude Sonnet 4.6 --- dashboard/handlers/youtube.py | 54 ++++- memory/kb/index.json | 190 +++++++++++++++++- ...06-27_jeremy-grantham-ai-bubble-warning.md | 99 +++++++++ tools/youtube_subs.py | 128 ++++++++++-- 4 files changed, 444 insertions(+), 27 deletions(-) create mode 100644 memory/kb/youtube/2026-06-27_jeremy-grantham-ai-bubble-warning.md diff --git a/dashboard/handlers/youtube.py b/dashboard/handlers/youtube.py index 8e6485b..7bef16b 100644 --- a/dashboard/handlers/youtube.py +++ b/dashboard/handlers/youtube.py @@ -36,6 +36,50 @@ def _clean_vtt(content): return ' '.join(lines) +def _is_description_about_video(description): + """Return True if description contains info about the video (chapters/topics).""" + if not description or len(description.strip()) < 50: + return False + timestamp_pattern = re.compile(r'\b\d{1,2}:\d{2}(:\d{2})?\b') + if len(timestamp_pattern.findall(description)) >= 3: + return True + lines = description.strip().split('\n') + bullet_lines = [l for l in lines if re.match(r'^\s*[◼•\-\*▶►]\s+\S', l)] + if len(bullet_lines) >= 3: + return True + numbered_lines = [l for l in lines if re.match(r'^\s*\d+[\.\)]\s+\S', l)] + if len(numbered_lines) >= 3: + return True + return False + + +def _extract_relevant_description(description): + """Strip promotional tails (links, social media) from description.""" + if not description: + return "" + promo_patterns = [ + re.compile(r'https?://\S+'), + re.compile(r'instagram|twitter|facebook|tiktok|linkedin|patreon|spotify', re.I), + re.compile(r'follow|subscribe|newsletter|merch|sponsor|affiliate', re.I), + re.compile(r'purchase|buy|order|shop|store', re.I), + ] + result_lines = [] + promo_streak = 0 + for line in description.strip().split('\n'): + stripped = line.strip() + is_promo = any(p.search(stripped) for p in promo_patterns) + if is_promo: + promo_streak += 1 + if promo_streak >= 2: + break + else: + promo_streak = 0 + result_lines.append(line) + while result_lines and not result_lines[-1].strip(): + result_lines.pop() + return '\n'.join(result_lines) + + def _process_youtube(url): """Download subtitles, save note.""" yt_dlp = os.path.expanduser('~/.local/bin/yt-dlp') @@ -51,6 +95,7 @@ def _process_youtube(url): info = json.loads(result.stdout) title = info.get('title', 'Unknown') duration = info.get('duration', 0) + description = info.get('description', '') temp_dir = Path('/tmp/yt_subs') temp_dir.mkdir(exist_ok=True) @@ -78,6 +123,13 @@ def _process_youtube(url): slug = re.sub(r'[^\w\s-]', '', title.lower())[:50].strip().replace(' ', '-') filename = f"{date_str}_{slug}.md" + # Build optional description block + desc_block = "" + if _is_description_about_video(description): + relevant_desc = _extract_relevant_description(description) + if relevant_desc: + desc_block = f"\n## Descriere / Index\n\n{relevant_desc}\n\n---\n" + note_content = f"""# {title} **Video:** {url} @@ -86,7 +138,7 @@ def _process_youtube(url): **Tags:** #youtube #to-summarize --- - +{desc_block} ## Transcript {transcript[:15000]} diff --git a/memory/kb/index.json b/memory/kb/index.json index 2bed2d3..50192de 100644 --- a/memory/kb/index.json +++ b/memory/kb/index.json @@ -1,5 +1,185 @@ { "notes": [ + { + "file": "notes-data/youtube/2026-06-27_jeremy-grantham-ai-bubble-warning.md", + "title": "Billionaire's WARNING: I'm SELLING. The Crash Is Already Here! — Jeremy Grantham", + "date": "2026-06-27", + "tags": [ + "investitii", + "bubble", + "AI", + "sanatate", + "economie" + ], + "domains": [ + "growth", + "work" + ], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Jeremy Grantham (60 ani experienta, a gestionat 165 miliarde $) avertizeaza ca ne aflam in cea mai mare bula investitionala din istoria americana — AI. Sfatul sau: vinde actiunile US tech acum, nu ast..." + }, + { + "file": "notes-data/youtube/2026-06-27_google-open-knowledge-format.md", + "title": "Google's New Release Just Fixed AI Systems (Open Knowledge Format)", + "date": "2026-06-27", + "tags": [], + "domains": [ + "work", + "growth" + ], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Google a lansat **Open Knowledge Format (OKF)** — un standard pentru organizarea knowledge base-urilor astfel încât agenții AI să navigheze mai eficient. Se bazează pe pattern-ul LLM Wiki al lui Andre..." + }, + { + "file": "notes-data/youtube/2026-06-25_google-agentic-engineering-masterclass.md", + "title": "Google Just Dropped a Masterclass on Agentic Engineering", + "date": "2026-06-25", + "tags": [], + "domains": [ + "work", + "growth" + ], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Google a publicat un ghid de 51 de pagini despre AI-driven SDLC (Software Development Life Cycle). Concluzia centrală: **harness-ul (regulile, workflow-urile, tool-urile, guardrails) contează 90%, mod..." + }, + { + "file": "notes-data/youtube/2026-06-24_codie-sanchez-3s-breakthrough.md", + "title": "#1 Biggest Mistake Blocking Your Breakthrough (Codie Sanchez)", + "date": "2026-06-24", + "tags": [], + "domains": [], + "types": [ + "coaching" + ], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Tony Robbins (neidentificat explicit, dar stilul și conținutul sunt clare) explică de ce oamenii eșuează să aibă un breakthrough: atacă problemele în ordinea greșită. Cei 3 S ai unui breakthrough treb..." + }, + { + "file": "notes-data/youtube/2026-06-23_remote-boring-businesses.md", + "title": "100% REMOTE Boring Businesses (That Almost Never Fail)", + "date": "2026-06-23", + "tags": [], + "domains": [ + "work", + "growth" + ], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Fondatorul unui business de $23M/lună face un ranking al afacerilor remote. Concluzia: cele mai bune nu sunt cele \"sexy\" (dropshipping, SEO, FBA) ci **expertiza + proces + autoritate** — adică afaceri..." + }, + { + "file": "notes-data/youtube/2026-06-21_claude-code-anki-setup.md", + "title": "This Claude Code Setup Changed My Life (Seriously…)", + "date": "2026-06-21", + "tags": [], + "domains": [ + "growth", + "work" + ], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Combini Claude Code cu Anki (prin Anki Connect add-on) pentru a automatiza crearea și optimizarea flashcard-urilor. Claude Code citește videoclipuri, lecturi, transcrieri și generează automat carduri ..." + }, + { + "file": "notes-data/youtube/2026-06-19_business-gurus-5m-review.md", + "title": "We Spent $5M on Business Gurus, So You Don't Have To", + "date": "2026-06-19", + "tags": [], + "domains": [], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Doi antreprenori cu afaceri de 8-9 cifre (Nick Fischer - New Reach, $150M+/an) analizează cele mai valoroase cursuri și guru-uri în care au investit colectiv $5M+. Concluzia principală: primele câteva..." + }, + { + "file": "notes-data/youtube/2026-06-19_matt-pocock-agentic-engineering-workflow.md", + "title": "Matt Pocock's Agentic Engineering Workflow (just copy him)", + "date": "2026-06-19", + "tags": [], + "domains": [ + "work", + "growth" + ], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Matt Pocock (educator TypeScript, autor skills pentru Claude Code) explica filosofia sa de lucru cu AI: nu modelul conteaza cel mai mult, ci harness-ul (setup-ul, skill-urile, codebase-ul). AI a \"manc..." + }, + { + "file": "notes-data/youtube/2026-06-14_claude-trading-102k.md", + "title": "I Tested Letting Claude Trade For A Month and Made $102k", + "date": "2026-06-14", + "tags": [], + "domains": [], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Un trader cu background în matematică și finanțe a folosit Claude ca analist și portfolio manager timp de o lună (mai 2026), începând cu $66k și terminând cu ~$169k (+155%). Claude a propus strategia,..." + }, + { + "file": "notes-data/youtube/2026-06-12_iulia-borcsa-suplimente.md", + "title": "Dezvoltator Suplimente: \"Producătorii De Vitamine Au Un Truc Ascuns\" | Iulia Borcsa | Gândește Diferit", + "date": "2026-06-12", + "tags": [], + "domains": [ + "health", + "growth" + ], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Iulia Borcsa, cercetător și dezvoltator de suplimente în Germania (~10 ani), explică ce nu știe consumatorul mediu despre industria suplimentelor: aditivi ascunși, forme cu biodisponibilitate scăzută,..." + }, + { + "file": "notes-data/youtube/2026-06-09_top-1-percent-claude-code-loops.md", + "title": "How the Top 1% Actually Run Claude Code Now", + "date": "2026-06-09", + "tags": [ + "loops", + "agents", + "automation", + "claude-code" + ], + "domains": [ + "work", + "growth" + ], + "types": [], + "category": "youtube", + "project": null, + "subdir": null, + "video": "", + "tldr": "Videoul descrie tranziția de la Stage 2 (juglezi manual mai mulți agenți) la Stage 3 (proiectezi loop-uri autonome care promtează agenții în locul tău). Unitatea de muncă nu mai e prompt-ul individual..." + }, { "file": "notes-data/facebook/2026-06-07_7-micro-habits-rewire-happiness.md", "title": "7 Micro Habits That Rewire Your Happiness", @@ -9878,11 +10058,11 @@ } ], "stats": { - "total": 570, + "total": 581, "by_domain": { - "work": 192, - "health": 100, - "growth": 259, + "work": 199, + "health": 101, + "growth": 267, "sprijin": 39, "scout": 9 }, @@ -9899,7 +10079,7 @@ "reflectii": 3, "retete": 1, "tools": 7, - "youtube": 140, + "youtube": 151, "memory": 44 } }, diff --git a/memory/kb/youtube/2026-06-27_jeremy-grantham-ai-bubble-warning.md b/memory/kb/youtube/2026-06-27_jeremy-grantham-ai-bubble-warning.md new file mode 100644 index 0000000..c188115 --- /dev/null +++ b/memory/kb/youtube/2026-06-27_jeremy-grantham-ai-bubble-warning.md @@ -0,0 +1,99 @@ +# Billionaire's WARNING: I'm SELLING. The Crash Is Already Here! — Jeremy Grantham + +**Sursa:** https://www.youtube.com/watch?v=32u5T6lO8qk +**Durata:** 1h 45min +**Data notita:** 2026-06-27 +**Tags:** @growth @work #investitii #bubble #AI #sanatate #economie + +--- + +## TL;DR + +Jeremy Grantham (60 ani experienta, a gestionat 165 miliarde $) avertizeaza ca ne aflam in cea mai mare bula investitionala din istoria americana — AI. Sfatul sau: vinde actiunile US tech acum, nu astepta ajutor de la Wall Street. In plus, discuta despre impactul toxic al chimicalelor asupra fertilitatii si declinul societatii americane. + +--- + +## Puncte cheie + +- **Bula AI e reala si imensa.** Grantham a prezis prabusirea dot-com si criza din 2007. Spune ca bula AI va fi cea mai mare din istoria SUA. Indicatorii sunt identici cu cele anterioare: optimism irrational, suprainvestitie, prospecte ridicole (ex: SpaceX promite sa mineze asteroizi si sa adreseze 25% din PIB-ul global). + +- **Nu detine actiuni US.** Sfatul sau clar: vinde-le pe toate, mai ales tech. S&P 500? Nu. Actiuni tech mari? Nu. Alternativele: actiuni japoneze, europene, pietele emergente, resurse naturale. + +- **Bitcoin merge la zero.** "O piesa inutila de nonsens care nu faciliteaza nimic in afara de miscarea banilor criminali care nu vor sa fie vazuti." + +- **Wall Street nu te va avertiza niciodata.** Institutiile mari nu pot lua pozitii contrare pentru ca risca cariera. Trebuie sa te uiti singur la date. O bula e usor de vazut: e un plafon plat si apoi un varf himalayan — care mereu se intoarce in jos. + +- **Chimicale toxice = criza de fertilitate.** Numarul de spermatozoizi a scazut dramatic in 50 ani. Cauzele: ftalati, BPA, pesticide. Problema e mai acuta in SUA decat in Europa (reglementari mai slabe). Solutie: mancare organica, evitare plasticuri, filtrare apa. + +- **Inegalitate si declin social.** SUA ara spectaculos la media avutiei dar dezastruos pentru quartila de jos. "Singurii oameni care cred ca poti avea crestere compusa pe o planeta finita sunt nebunii si economistii." + +- **Unde sa traiesti.** Refuza sa spuna direct, dar implicit: nu SUA. Danemarca si tarile nordice ofera speranta de viata mai mare, securitate sociala mai buna, rate ale criminaltatii mai mici. + +--- + +## Descriere / Index capitole + +The man who predicted the dot-com crash and the 2007 housing collapse warns that the AI bubble is the biggest in American history. Billionaire investor Jeremy Grantham reveals why it will burst, the exact strategy to protect your money, and why house prices need to fall 30%. + +He explains: +◼ Why Wall Street will never warn you when to get out of the market, and what to do instead +◼ The exact portfolio Jeremy recommends to protect your money before the crash +◼ What everyday chemicals in your food and cosmetics are doing to your fertility +◼ Why house prices need to fall 30%, and what it means for your finances +◼ Why the AI boom won't automatically lead to higher profits, and what to buy instead + +**Capitole:** +- 00:00:00 Who Is Jeremy Grantham? +- 00:02:54 Will AI Become The Next Financial Bubble? +- 00:06:57 How Jeremy Grantham Built An Investing Empire +- 00:09:18 What Happens When The AI Bubble Bursts? +- 00:11:35 How AI Will Change Everyday Life +- 00:12:53 The Investing Strategy For Right Now +- 00:18:12 Why You Should Avoid US Stocks +- 00:20:13 Why Investment Advisors Mislead Clients +- 00:26:09 Advice For Entrepreneurs Right Now +- 00:28:59 The Real Risks Of AI +- 00:36:21 The Battle Between The Magnificent 7 +- 00:41:57 Which Jobs AI Will Replace First +- 00:44:18 Will SpaceX Eventually Fail? +- 00:50:40 The Most Valuable Skill For The Future +- 00:51:41 Is Society Declining And What Comes Next? +- 00:54:02 What History Says About Wealth Inequality +- 00:57:59 How To Build Wealth In Your 30s Today +- 01:00:08 How To Invest Your Salary Wisely +- 01:02:58 Should You Own Crypto? +- 01:04:05 Is Property Still A Good Investment? +- 01:07:27 What's Really Causing The Baby Bust? +- 01:14:24 How Microplastics Affect Fertility +- 01:16:42 How Pesticides Impact Fertility +- 01:21:43 How To Reduce Toxic Chemical Exposure +- 01:27:30 How To Stay Healthy In A Toxic World +- 01:35:55 The Flaw That Destroys Societies +- 01:39:22 The Best Places To Live Today + +--- + +## Quote-uri memorabile + +> "Don't own US stocks. That's a simple strategy that you can act on." + +> "Bubbles always occur around the very most important ideas — railroads, internet, AI. The bigger the idea, the bigger the bubble, and the bigger the bust." + +> "Amazon went up 6-7x in '99. In the crash, it went down 92%. Then out of the wreckage, it inherited the retail world. That's how it works." + +> "The only people who think you can have compound growth on a finite planet are madmen and economists." — Kenneth Boulding + +> "Large enterprises almost never get the big turning points because they can't take the career risk involved. The central political skill in life: never be wrong on your own." + +> "Get out of the most dangerous part, and do it now. Don't wait for help because no help is coming." + +> "Crypto is an unnecessary piece of nonsense that facilitates nothing except criminals moving money." + +--- + +## Idei actionabile + +- [ ] Evalueaza expunerea la actiuni US/tech din portofoliu +- [ ] Cerceteaza ETF-uri japoneze, europene, piete emergente ca alternative +- [ ] Verifica chimicalele din cosmetice/alimente — ftalati, BPA, pesticide +- [ ] Considera filtrarea apei potabile diff --git a/tools/youtube_subs.py b/tools/youtube_subs.py index cc04672..27df629 100755 --- a/tools/youtube_subs.py +++ b/tools/youtube_subs.py @@ -15,7 +15,7 @@ def clean_vtt(content): """Convert VTT to plain text, removing timestamps and duplicates.""" lines = [] seen = set() - + for line in content.split('\n'): # Skip VTT headers, timestamps, positioning if line.startswith('WEBVTT') or line.startswith('Kind:') or line.startswith('Language:'): @@ -28,28 +28,104 @@ def clean_vtt(content): continue if re.match(r'^\d+$', line.strip()): # Sequence numbers continue - + # Clean HTML tags clean = re.sub(r'<[^>]+>', '', line).strip() if clean and clean not in seen: seen.add(clean) lines.append(clean) - + return ' '.join(lines) + +def is_description_about_video(description): + """ + Determine if the description contains info about the video content + (chapters/timestamps, topics) vs. just author promotion/ads. + Returns True if description is worth including. + """ + if not description or len(description.strip()) < 50: + return False + + # Strong signal: contains timestamp markers like 00:00, 0:00:00, 1:23 + timestamp_pattern = re.compile(r'\b\d{1,2}:\d{2}(:\d{2})?\b') + timestamp_count = len(timestamp_pattern.findall(description)) + if timestamp_count >= 3: + return True + + # Strong signal: contains chapter/topic-like bullet lines + lines = description.strip().split('\n') + bullet_lines = [l for l in lines if re.match(r'^\s*[◼•\-\*▶►]\s+\S', l)] + if len(bullet_lines) >= 3: + return True + + # Signal: numbered list or clear topic breakdown + numbered_lines = [l for l in lines if re.match(r'^\s*\d+[\.\)]\s+\S', l)] + if len(numbered_lines) >= 3: + return True + + return False + + +def extract_relevant_description(description): + """ + Extract only the relevant parts of the description (about the video). + Removes trailing promotional links, author bio boilerplate, etc. + """ + if not description: + return "" + + lines = description.strip().split('\n') + + # Find the last line that looks like content (timestamps or bullets or substantive text) + # Cut off at lines that are clearly promotional (links, social media, etc.) + promo_patterns = [ + re.compile(r'https?://\S+'), # URLs + re.compile(r'instagram|twitter|facebook|tiktok|linkedin|patreon|spotify', re.I), + re.compile(r'follow|subscribe|newsletter|merch|sponsor|affiliate', re.I), + re.compile(r'purchase|buy|order|shop|store', re.I), + ] + + result_lines = [] + promo_streak = 0 + + for line in lines: + stripped = line.strip() + + # Check if this line is promotional + is_promo = any(p.search(stripped) for p in promo_patterns) + + if is_promo: + promo_streak += 1 + # Allow isolated promo lines (like a single URL after a chapter list) + # but stop if we hit multiple consecutive promo lines + if promo_streak >= 2: + break + else: + promo_streak = 0 + result_lines.append(line) + + # Also strip trailing empty lines + while result_lines and not result_lines[-1].strip(): + result_lines.pop() + + return '\n'.join(result_lines) + + def get_subtitles(url, lang='en'): """Download subtitles for a YouTube video.""" - + yt_dlp = os.path.expanduser('~/.local/bin/yt-dlp') temp_dir = Path('/tmp/yt_subs') temp_dir.mkdir(exist_ok=True) - + # Clean old files for f in temp_dir.glob('*'): f.unlink() - + # First, get video info title = "Unknown" + description = "" info_cmd = [yt_dlp, '--js-runtimes', 'node', '--dump-json', '--no-download', url] result = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30) print(f"INFO: returncode={result.returncode}, stderr={result.stderr[:200]}", file=sys.stderr) @@ -57,6 +133,7 @@ def get_subtitles(url, lang='en'): try: info = json.loads(result.stdout) title = info.get('title', 'Unknown') + description = info.get('description', '') duration = info.get('duration', 0) print(f"Title: {title}", file=sys.stderr) print(f"Duration: {duration//60}:{duration%60:02d}", file=sys.stderr) @@ -64,10 +141,10 @@ def get_subtitles(url, lang='en'): print(f"JSON parse error: {e}", file=sys.stderr) else: print(f"yt-dlp failed: {result.stderr[:500]}", file=sys.stderr) - + # Try to get subtitles in order of preference lang_preferences = [lang, 'ro', 'en', 'en-US', 'en-GB'] - + for try_lang in lang_preferences: # Try manual subtitles first cmd = [ @@ -79,15 +156,15 @@ def get_subtitles(url, lang='en'): '-o', str(temp_dir / '%(id)s.%(ext)s'), url ] - + subprocess.run(cmd, capture_output=True, timeout=60) - + # Check if we got subtitles for ext in ['vtt', 'srt', 'ass']: for sub_file in temp_dir.glob(f'*.{try_lang}*.{ext}'): content = sub_file.read_text(encoding='utf-8', errors='replace') - return title, clean_vtt(content) - + return title, description, clean_vtt(content) + # Try auto-generated subtitles for try_lang in lang_preferences: cmd = [ @@ -99,30 +176,39 @@ def get_subtitles(url, lang='en'): '-o', str(temp_dir / '%(id)s.%(ext)s'), url ] - + subprocess.run(cmd, capture_output=True, timeout=60) - + for ext in ['vtt', 'srt', 'ass']: for sub_file in temp_dir.glob(f'*.{ext}'): content = sub_file.read_text(encoding='utf-8', errors='replace') text = clean_vtt(content) if text: - return title, text - - return title or "Unknown", None + return title, description, text + + return title or "Unknown", description, None if __name__ == '__main__': if len(sys.argv) < 2: print("Usage: python3 youtube_subs.py [language]") sys.exit(1) - + url = sys.argv[1] lang = sys.argv[2] if len(sys.argv) > 2 else 'en' - - title, transcript = get_subtitles(url, lang) - + + title, description, transcript = get_subtitles(url, lang) + if transcript: print(f"\n=== {title} ===\n") + + # Include description if it's about the video content + if description and is_description_about_video(description): + relevant_desc = extract_relevant_description(description) + if relevant_desc: + print("--- Descriere / Index ---") + print(relevant_desc) + print("--- Transcript ---") + print(transcript) else: print(f"No subtitles found for: {title}", file=sys.stderr)