""" Auto-download and setup whisper.cpp (CPU build) + model for Windows. Called by run.bat when prerequisites are missing. """ import io import json import os import sys import zipfile from pathlib import Path from urllib.request import urlopen, Request MODELS_DIR = Path("models") MODEL_NAME = "ggml-medium-q5_0.bin" MODEL_URL = f"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/{MODEL_NAME}" GITHUB_API = "https://api.github.com/repos/ggml-org/whisper.cpp/releases/latest" # Community Vulkan builds (for AMD GPUs) VULKAN_BUILDS_API = "https://api.github.com/repos/jerryshell/whisper.cpp-windows-vulkan-bin/releases/latest" WHISPER_DIR = Path("whisper-bin") def progress_bar(current: int, total: int, width: int = 40): if total <= 0: return pct = current / total filled = int(width * pct) bar = "=" * filled + "-" * (width - filled) mb_done = current / 1_048_576 mb_total = total / 1_048_576 print(f"\r [{bar}] {pct:.0%} {mb_done:.0f}/{mb_total:.0f} MB", end="", flush=True) def download_file(url: str, dest: Path, desc: str): """Download a file with progress bar.""" print(f"\n Downloading {desc}...") print(f" URL: {url}") req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) resp = urlopen(req, timeout=60) total = int(resp.headers.get("Content-Length", 0)) downloaded = 0 tmp = dest.with_suffix(".tmp") with open(tmp, "wb") as f: while True: chunk = resp.read(1024 * 1024) if not chunk: break f.write(chunk) downloaded += len(chunk) progress_bar(downloaded, total) print() # newline after progress bar tmp.rename(dest) print(f" Saved: {dest} ({downloaded / 1_048_576:.0f} MB)") def fetch_release(api_url: str) -> dict | None: """Fetch a GitHub release JSON.""" req = Request(api_url, headers={"User-Agent": "Mozilla/5.0"}) try: resp = urlopen(req, timeout=30) return json.loads(resp.read()) except Exception as e: print(f" Could not fetch from {api_url}: {e}") return None def extract_zip(zip_path: Path): """Extract zip contents into WHISPER_DIR, flattened.""" print(f"\n Extracting to {WHISPER_DIR}/...") WHISPER_DIR.mkdir(exist_ok=True) with zipfile.ZipFile(zip_path) as zf: for member in zf.namelist(): filename = Path(member).name if not filename: continue target = WHISPER_DIR / filename with zf.open(member) as src, open(target, "wb") as dst: dst.write(src.read()) print(f" {filename}") zip_path.unlink() def find_whisper_exe() -> str | None: """Find whisper-cli.exe (or similar) in WHISPER_DIR.""" whisper_exe = WHISPER_DIR / "whisper-cli.exe" if whisper_exe.exists(): return str(whisper_exe) # Try main.exe (older naming) main_exe = WHISPER_DIR / "main.exe" if main_exe.exists(): return str(main_exe) exes = list(WHISPER_DIR.glob("*.exe")) for exe in exes: if "whisper" in exe.name.lower() and "cli" in exe.name.lower(): return str(exe) for exe in exes: if "whisper" in exe.name.lower(): return str(exe) if exes: return str(exes[0]) return None def try_community_vulkan_build() -> str | None: """Try downloading Vulkan build from jerryshell's community repo.""" print("\n Trying community Vulkan build (jerryshell/whisper.cpp-windows-vulkan-bin)...") release = fetch_release(VULKAN_BUILDS_API) if not release: return None tag = release.get("tag_name", "unknown") print(f" Community release: {tag}") # Find a zip asset for asset in release.get("assets", []): name = asset["name"].lower() if name.endswith(".zip"): print(f" Found: {asset['name']}") zip_path = Path(asset["name"]) download_file(asset["browser_download_url"], zip_path, asset["name"]) extract_zip(zip_path) return find_whisper_exe() print(" No zip asset found in community release") return None def try_official_vulkan_build() -> str | None: """Try downloading Vulkan build from official ggml-org releases.""" print("\n Fetching latest whisper.cpp release from ggml-org...") release = fetch_release(GITHUB_API) if not release: return None tag = release.get("tag_name", "unknown") print(f" Official release: {tag}") # Priority: CPU build (no GPU deps needed) cpu_asset = None for asset in release.get("assets", []): name = asset["name"].lower() if not name.endswith(".zip"): continue # Must be Windows if "win" not in name and "x64" not in name: continue # Skip GPU builds entirely if "cuda" in name or "vulkan" in name or "openblas" in name: continue if "noavx" not in name: cpu_asset = asset break if not cpu_asset: print(" No CPU build found in official releases") print(" Available assets:") for asset in release.get("assets", []): print(f" - {asset['name']}") return None print(f" Found CPU build: {cpu_asset['name']}") chosen = cpu_asset zip_path = Path(chosen["name"]) download_file(chosen["browser_download_url"], zip_path, chosen["name"]) extract_zip(zip_path) return find_whisper_exe() def setup_whisper_bin() -> str | None: """Download whisper.cpp CPU release. Returns path to whisper-cli.exe.""" whisper_exe = WHISPER_DIR / "whisper-cli.exe" if whisper_exe.exists(): print(f" whisper-cli.exe already exists at {whisper_exe}") return str(whisper_exe) exe_path = try_official_vulkan_build() if exe_path: print(f"\n whisper-cli.exe ready at: {exe_path}") return exe_path print("\n ERROR: Could not download whisper.cpp") print(" Manual install: https://github.com/ggml-org/whisper.cpp/releases") return None FFMPEG_DIR = Path("ffmpeg-bin") FFMPEG_URL = "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip" def setup_ffmpeg() -> str | None: """Download ffmpeg if not found. Returns path to ffmpeg.exe.""" import shutil # Already in PATH? if shutil.which("ffmpeg"): path = shutil.which("ffmpeg") print(f" ffmpeg already in PATH: {path}") return path # Already downloaded locally? local_exe = FFMPEG_DIR / "ffmpeg.exe" if local_exe.exists(): print(f" ffmpeg already exists at {local_exe}") return str(local_exe) print("\n Downloading ffmpeg (essentials build)...") zip_path = Path("ffmpeg-essentials.zip") download_file(FFMPEG_URL, zip_path, "ffmpeg") print(f"\n Extracting ffmpeg...") FFMPEG_DIR.mkdir(exist_ok=True) with zipfile.ZipFile(zip_path) as zf: for member in zf.namelist(): # Only extract the bin/*.exe files if member.endswith(".exe"): filename = Path(member).name target = FFMPEG_DIR / filename with zf.open(member) as src, open(target, "wb") as dst: dst.write(src.read()) print(f" {filename}") zip_path.unlink() if local_exe.exists(): print(f"\n ffmpeg ready at: {local_exe}") return str(local_exe) print(" ERROR: ffmpeg.exe not found after extraction") return None def setup_model() -> bool: """Download whisper model. Returns True on success.""" MODELS_DIR.mkdir(exist_ok=True) model_path = MODELS_DIR / MODEL_NAME if model_path.exists() and model_path.stat().st_size > 100_000_000: print(f" Model already exists: {model_path} ({model_path.stat().st_size / 1_048_576:.0f} MB)") return True download_file(MODEL_URL, model_path, f"Whisper model ({MODEL_NAME})") if model_path.exists() and model_path.stat().st_size > 100_000_000: return True print(" ERROR: Model file too small or missing after download") return False def main(): what = sys.argv[1] if len(sys.argv) > 1 else "all" if what in ("all", "ffmpeg"): print("=" * 60) print(" Setting up ffmpeg") print("=" * 60) ffmpeg_path = setup_ffmpeg() if ffmpeg_path: Path(".ffmpeg_bin_path").write_text(ffmpeg_path) else: print("\nFAILED to set up ffmpeg") if what == "ffmpeg": sys.exit(1) if what in ("all", "whisper"): print("=" * 60) print(" Setting up whisper.cpp") print("=" * 60) exe_path = setup_whisper_bin() if exe_path: # Write path to temp file so run.bat can read it Path(".whisper_bin_path").write_text(exe_path) else: print("\nFAILED to set up whisper.cpp") if what == "whisper": sys.exit(1) if what in ("all", "model"): print() print("=" * 60) print(f" Downloading Whisper model: {MODEL_NAME}") print("=" * 60) if not setup_model(): print("\nFAILED to download model") sys.exit(1) print() print("Setup complete!") if __name__ == "__main__": main()