#!/usr/bin/env python3 """ YouTube subtitle downloader with rate limit protection and progressive retry. Usage: python3 yt_download.py URL [URL2] [URL3] ... Features: - Cookies support for higher limits - Sleep between downloads to avoid rate limiting - Progressive retry: 2h → 4h → 24h on 429 - Tracks rate limit state in JSON file """ import subprocess import sys import time import os import json from pathlib import Path from datetime import datetime, timedelta # Add deno to PATH os.environ["PATH"] = f"{Path.home()}/.deno/bin:" + os.environ.get("PATH", "") COOKIES_FILE = Path(__file__).parent.parent / "credentials" / "youtube-cookies.txt" RATE_LIMIT_FILE = Path(__file__).parent.parent / "memory" / "youtube-rate-limit.json" SLEEP_BETWEEN = 20 # seconds between downloads MAX_PER_SESSION = 30 # Progressive retry delays (in hours) RETRY_DELAYS = [2, 4, 24] def load_rate_limit_state() -> dict: """Load rate limit state from JSON file.""" if RATE_LIMIT_FILE.exists(): try: with open(RATE_LIMIT_FILE) as f: return json.load(f) except: pass return {"last_429": None, "retry_count": 0, "blocked_until": None} def save_rate_limit_state(state: dict): """Save rate limit state to JSON file.""" RATE_LIMIT_FILE.parent.mkdir(parents=True, exist_ok=True) with open(RATE_LIMIT_FILE, "w") as f: json.dump(state, f, indent=2, default=str) def check_rate_limit() -> tuple[bool, str]: """ Check if we're still in a rate limit cooldown period. Returns: (can_proceed, message) """ state = load_rate_limit_state() if state.get("blocked_until"): blocked_until = datetime.fromisoformat(state["blocked_until"]) now = datetime.now() if now < blocked_until: remaining = blocked_until - now hours = remaining.total_seconds() / 3600 return False, f"⏳ Rate limited. Retry în {hours:.1f}h ({blocked_until.strftime('%H:%M')})" else: # Cooldown period expired, reset retry count state["retry_count"] = 0 state["blocked_until"] = None save_rate_limit_state(state) return True, "OK" def record_rate_limit(): """Record a rate limit hit and calculate next retry time.""" state = load_rate_limit_state() retry_count = state.get("retry_count", 0) delay_hours = RETRY_DELAYS[min(retry_count, len(RETRY_DELAYS) - 1)] blocked_until = datetime.now() + timedelta(hours=delay_hours) state["last_429"] = datetime.now().isoformat() state["retry_count"] = retry_count + 1 state["blocked_until"] = blocked_until.isoformat() save_rate_limit_state(state) return delay_hours, blocked_until def clear_rate_limit(): """Clear rate limit state after successful downloads.""" state = load_rate_limit_state() if state.get("retry_count", 0) > 0: state["retry_count"] = 0 state["blocked_until"] = None save_rate_limit_state(state) def download_subtitles(url: str, use_cookies: bool = True) -> tuple[bool, bool]: """ Download subtitles for a single video. Returns: (success, rate_limited) """ cmd = [ "yt-dlp", "--remote-components", "ejs:github", "--write-auto-sub", "--sub-lang", "en,ro", "--skip-download", "-o", "temp_%(id)s", ] if use_cookies and COOKIES_FILE.exists(): cmd.extend(["--cookies", str(COOKIES_FILE)]) cmd.append(url) print(f"📥 Downloading: {url}") result = subprocess.run(cmd, capture_output=True, text=True) combined_output = result.stdout + result.stderr if "429" in combined_output: print(f" ⚠️ Rate limited (429)") return False, True elif "Writing video subtitles" in combined_output: print(f" ✅ Success") return True, False elif result.returncode == 0: print(f" ✅ Success") return True, False else: print(f" ❌ Error: {combined_output[:200]}") return False, False def main(): urls = sys.argv[1:] if not urls: print("Usage: python3 yt_download.py URL [URL2] ...") print("\nRate limit state:") state = load_rate_limit_state() print(f" Retry count: {state.get('retry_count', 0)}") print(f" Blocked until: {state.get('blocked_until', 'Not blocked')}") sys.exit(1) # Check if we're in cooldown can_proceed, message = check_rate_limit() if not can_proceed: print(message) sys.exit(3) # Special exit code for cooldown if len(urls) > MAX_PER_SESSION: print(f"⚠️ Max {MAX_PER_SESSION} per session. Processing first {MAX_PER_SESSION}.") urls = urls[:MAX_PER_SESSION] has_cookies = COOKIES_FILE.exists() state = load_rate_limit_state() print(f"🍪 Cookies: {'YES' if has_cookies else 'NO'}") print(f"⏱️ Sleep: {SLEEP_BETWEEN}s between videos") print(f"📊 Videos: {len(urls)}") print(f"🔄 Retry count: {state.get('retry_count', 0)}") print("-" * 40) success = 0 rate_limited = False for i, url in enumerate(urls): ok, limited = download_subtitles(url, has_cookies) if ok: success += 1 if limited: rate_limited = True delay_hours, blocked_until = record_rate_limit() print(f"🛑 Rate limit hit! Retry în {delay_hours}h ({blocked_until.strftime('%H:%M')})") print(f" {len(urls) - i - 1} videos rămase pentru retry.") break if i < len(urls) - 1: print(f" 💤 Sleeping {SLEEP_BETWEEN}s...") time.sleep(SLEEP_BETWEEN) print("-" * 40) print(f"✅ Done: {success}/{len(urls)} videos") # Clear rate limit state if we had successful downloads without hitting limit if success > 0 and not rate_limited: clear_rate_limit() if rate_limited: sys.exit(2) # Rate limit hit sys.exit(0 if success == len(urls) else 1) if __name__ == "__main__": main()