feat: youtube_subs + dashboard includ descrierea video ca index
- tools/youtube_subs.py: get_subtitles() returneaza acum (title, desc, transcript). Functii noi is_description_about_video() si extract_relevant_description() detecteaza daca descrierea contine capitole/timestamps (nu doar promotie autori) si curata trailing-urile promotionale inainte sa includa descrierea in output. - dashboard/handlers/youtube.py: aceleasi functii adaugate; nota KB generata include acum un bloc "Descriere / Index" daca descrierea e relevanta pentru video. - memory/kb/youtube: nota Jeremy Grantham (AI bubble, investitii, toxicitate) cu descrierea ca index de capitole. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -36,6 +36,50 @@ def _clean_vtt(content):
|
||||
return ' '.join(lines)
|
||||
|
||||
|
||||
def _is_description_about_video(description):
|
||||
"""Return True if description contains info about the video (chapters/topics)."""
|
||||
if not description or len(description.strip()) < 50:
|
||||
return False
|
||||
timestamp_pattern = re.compile(r'\b\d{1,2}:\d{2}(:\d{2})?\b')
|
||||
if len(timestamp_pattern.findall(description)) >= 3:
|
||||
return True
|
||||
lines = description.strip().split('\n')
|
||||
bullet_lines = [l for l in lines if re.match(r'^\s*[◼•\-\*▶►]\s+\S', l)]
|
||||
if len(bullet_lines) >= 3:
|
||||
return True
|
||||
numbered_lines = [l for l in lines if re.match(r'^\s*\d+[\.\)]\s+\S', l)]
|
||||
if len(numbered_lines) >= 3:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _extract_relevant_description(description):
|
||||
"""Strip promotional tails (links, social media) from description."""
|
||||
if not description:
|
||||
return ""
|
||||
promo_patterns = [
|
||||
re.compile(r'https?://\S+'),
|
||||
re.compile(r'instagram|twitter|facebook|tiktok|linkedin|patreon|spotify', re.I),
|
||||
re.compile(r'follow|subscribe|newsletter|merch|sponsor|affiliate', re.I),
|
||||
re.compile(r'purchase|buy|order|shop|store', re.I),
|
||||
]
|
||||
result_lines = []
|
||||
promo_streak = 0
|
||||
for line in description.strip().split('\n'):
|
||||
stripped = line.strip()
|
||||
is_promo = any(p.search(stripped) for p in promo_patterns)
|
||||
if is_promo:
|
||||
promo_streak += 1
|
||||
if promo_streak >= 2:
|
||||
break
|
||||
else:
|
||||
promo_streak = 0
|
||||
result_lines.append(line)
|
||||
while result_lines and not result_lines[-1].strip():
|
||||
result_lines.pop()
|
||||
return '\n'.join(result_lines)
|
||||
|
||||
|
||||
def _process_youtube(url):
|
||||
"""Download subtitles, save note."""
|
||||
yt_dlp = os.path.expanduser('~/.local/bin/yt-dlp')
|
||||
@@ -51,6 +95,7 @@ def _process_youtube(url):
|
||||
info = json.loads(result.stdout)
|
||||
title = info.get('title', 'Unknown')
|
||||
duration = info.get('duration', 0)
|
||||
description = info.get('description', '')
|
||||
|
||||
temp_dir = Path('/tmp/yt_subs')
|
||||
temp_dir.mkdir(exist_ok=True)
|
||||
@@ -78,6 +123,13 @@ def _process_youtube(url):
|
||||
slug = re.sub(r'[^\w\s-]', '', title.lower())[:50].strip().replace(' ', '-')
|
||||
filename = f"{date_str}_{slug}.md"
|
||||
|
||||
# Build optional description block
|
||||
desc_block = ""
|
||||
if _is_description_about_video(description):
|
||||
relevant_desc = _extract_relevant_description(description)
|
||||
if relevant_desc:
|
||||
desc_block = f"\n## Descriere / Index\n\n{relevant_desc}\n\n---\n"
|
||||
|
||||
note_content = f"""# {title}
|
||||
|
||||
**Video:** {url}
|
||||
@@ -86,7 +138,7 @@ def _process_youtube(url):
|
||||
**Tags:** #youtube #to-summarize
|
||||
|
||||
---
|
||||
|
||||
{desc_block}
|
||||
## Transcript
|
||||
|
||||
{transcript[:15000]}
|
||||
|
||||
Reference in New Issue
Block a user