Files
echo-core/dashboard/handlers/youtube.py

149 lines
4.2 KiB
Python

"""YouTube subtitle-download + note-creation endpoint."""
import json
import logging
import os
import re
import subprocess
import sys
import traceback
from datetime import datetime
from pathlib import Path
import constants
log = logging.getLogger(__name__)
def _clean_vtt(content):
"""Convert VTT captions to plain text."""
lines = []
seen = set()
for line in content.split('\n'):
if any([
line.startswith('WEBVTT'),
line.startswith('Kind:'),
line.startswith('Language:'),
'-->' in line,
line.strip().startswith('<'),
not line.strip(),
re.match(r'^\d+$', line.strip()),
]):
continue
clean = re.sub(r'<[^>]+>', '', line).strip()
if clean and clean not in seen:
seen.add(clean)
lines.append(clean)
return ' '.join(lines)
def _process_youtube(url):
"""Download subtitles, save note."""
yt_dlp = os.path.expanduser('~/.local/bin/yt-dlp')
result = subprocess.run(
[yt_dlp, '--dump-json', '--no-download', url],
capture_output=True, text=True, timeout=30,
)
if result.returncode != 0:
print(f"Failed to get video info: {result.stderr}")
return
info = json.loads(result.stdout)
title = info.get('title', 'Unknown')
duration = info.get('duration', 0)
temp_dir = Path('/tmp/yt_subs')
temp_dir.mkdir(exist_ok=True)
for f in temp_dir.glob('*'):
f.unlink()
subprocess.run([
yt_dlp, '--write-auto-subs', '--sub-langs', 'en',
'--skip-download', '--sub-format', 'vtt',
'-o', str(temp_dir / '%(id)s'),
url,
], capture_output=True, timeout=120)
transcript = None
for sub_file in temp_dir.glob('*.vtt'):
content = sub_file.read_text(encoding='utf-8', errors='replace')
transcript = _clean_vtt(content)
break
if not transcript:
print("No subtitles found")
return
date_str = datetime.now().strftime('%Y-%m-%d')
slug = re.sub(r'[^\w\s-]', '', title.lower())[:50].strip().replace(' ', '-')
filename = f"{date_str}_{slug}.md"
note_content = f"""# {title}
**Video:** {url}
**Duration:** {duration // 60}:{duration % 60:02d}
**Saved:** {date_str}
**Tags:** #youtube #to-summarize
---
## Transcript
{transcript[:15000]}
---
*Notă: Sumarizarea va fi adăugată de Echo.*
"""
constants.NOTES_DIR.mkdir(parents=True, exist_ok=True)
note_path = constants.NOTES_DIR / filename
note_path.write_text(note_content, encoding='utf-8')
subprocess.run(
[sys.executable, str(constants.TOOLS_DIR / 'update_notes_index.py')],
capture_output=True,
)
# Index new note with Ollama semantic embeddings
try:
sys.path.insert(0, str(constants.BASE_DIR))
from src.memory_search import index_file, MEMORY_DIR
n = index_file(note_path)
log.info("Ollama indexed %s (%d chunks)", filename, n)
except Exception as e:
log.warning("Ollama indexing failed for %s: %s", filename, e)
print(f"Created note: {filename}")
return filename
class YoutubeHandlers:
"""Mixin for /api/youtube."""
def handle_youtube(self):
"""Process a YouTube URL: download subs, save note."""
try:
content_length = int(self.headers['Content-Length'])
post_data = self.rfile.read(content_length).decode('utf-8')
data = json.loads(post_data)
url = data.get('url', '').strip()
if not url or ('youtube.com' not in url and 'youtu.be' not in url):
self.send_json({'error': 'URL YouTube invalid'}, 400)
return
try:
print(f"Processing YouTube URL: {url}")
_process_youtube(url)
self.send_json({
'status': 'done',
'message': 'Notița a fost creată! Refresh pagina Notes.',
})
except Exception as e:
print(f"YouTube processing error: {e}")
traceback.print_exc()
self.send_json({'status': 'error', 'message': f'Eroare: {str(e)}'}, 500)
except Exception as e:
self.send_json({'error': str(e)}, 500)