"""YouTube subtitle-download + note-creation endpoint.""" import json import logging import os import re import subprocess import sys import traceback from datetime import datetime from pathlib import Path import constants log = logging.getLogger(__name__) def _clean_vtt(content): """Convert VTT captions to plain text.""" lines = [] seen = set() for line in content.split('\n'): if any([ line.startswith('WEBVTT'), line.startswith('Kind:'), line.startswith('Language:'), '-->' in line, line.strip().startswith('<'), not line.strip(), re.match(r'^\d+$', line.strip()), ]): continue clean = re.sub(r'<[^>]+>', '', line).strip() if clean and clean not in seen: seen.add(clean) lines.append(clean) return ' '.join(lines) def _process_youtube(url): """Download subtitles, save note.""" yt_dlp = os.path.expanduser('~/.local/bin/yt-dlp') result = subprocess.run( [yt_dlp, '--dump-json', '--no-download', url], capture_output=True, text=True, timeout=30, ) if result.returncode != 0: print(f"Failed to get video info: {result.stderr}") return info = json.loads(result.stdout) title = info.get('title', 'Unknown') duration = info.get('duration', 0) temp_dir = Path('/tmp/yt_subs') temp_dir.mkdir(exist_ok=True) for f in temp_dir.glob('*'): f.unlink() subprocess.run([ yt_dlp, '--write-auto-subs', '--sub-langs', 'en', '--skip-download', '--sub-format', 'vtt', '-o', str(temp_dir / '%(id)s'), url, ], capture_output=True, timeout=120) transcript = None for sub_file in temp_dir.glob('*.vtt'): content = sub_file.read_text(encoding='utf-8', errors='replace') transcript = _clean_vtt(content) break if not transcript: print("No subtitles found") return date_str = datetime.now().strftime('%Y-%m-%d') slug = re.sub(r'[^\w\s-]', '', title.lower())[:50].strip().replace(' ', '-') filename = f"{date_str}_{slug}.md" note_content = f"""# {title} **Video:** {url} **Duration:** {duration // 60}:{duration % 60:02d} **Saved:** {date_str} **Tags:** #youtube #to-summarize --- ## Transcript {transcript[:15000]} --- *Notă: Sumarizarea va fi adăugată de Echo.* """ constants.NOTES_DIR.mkdir(parents=True, exist_ok=True) note_path = constants.NOTES_DIR / filename note_path.write_text(note_content, encoding='utf-8') subprocess.run( [sys.executable, str(constants.TOOLS_DIR / 'update_notes_index.py')], capture_output=True, ) # Index new note with Ollama semantic embeddings try: sys.path.insert(0, str(constants.BASE_DIR)) from src.memory_search import index_file, MEMORY_DIR n = index_file(note_path) log.info("Ollama indexed %s (%d chunks)", filename, n) except Exception as e: log.warning("Ollama indexing failed for %s: %s", filename, e) print(f"Created note: {filename}") return filename class YoutubeHandlers: """Mixin for /api/youtube.""" def handle_youtube(self): """Process a YouTube URL: download subs, save note.""" try: content_length = int(self.headers['Content-Length']) post_data = self.rfile.read(content_length).decode('utf-8') data = json.loads(post_data) url = data.get('url', '').strip() if not url or ('youtube.com' not in url and 'youtu.be' not in url): self.send_json({'error': 'URL YouTube invalid'}, 400) return try: print(f"Processing YouTube URL: {url}") _process_youtube(url) self.send_json({ 'status': 'done', 'message': 'Notița a fost creată! Refresh pagina Notes.', }) except Exception as e: print(f"YouTube processing error: {e}") traceback.print_exc() self.send_json({'status': 'error', 'message': f'Eroare: {str(e)}'}, 500) except Exception as e: self.send_json({'error': str(e)}, 500)