""" Auto-download and setup whisper.cpp (Vulkan) + model for Windows. Called by run.bat when prerequisites are missing. """ import io import json import os import sys import zipfile from pathlib import Path from urllib.request import urlopen, Request MODELS_DIR = Path("models") MODEL_NAME = "ggml-medium-q5_0.bin" MODEL_URL = f"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/{MODEL_NAME}" GITHUB_API = "https://api.github.com/repos/ggml-org/whisper.cpp/releases/latest" # Community Vulkan builds (for AMD GPUs) VULKAN_BUILDS_API = "https://api.github.com/repos/jerryshell/whisper.cpp-windows-vulkan-bin/releases/latest" WHISPER_DIR = Path("whisper-bin") def progress_bar(current: int, total: int, width: int = 40): if total <= 0: return pct = current / total filled = int(width * pct) bar = "=" * filled + "-" * (width - filled) mb_done = current / 1_048_576 mb_total = total / 1_048_576 print(f"\r [{bar}] {pct:.0%} {mb_done:.0f}/{mb_total:.0f} MB", end="", flush=True) def download_file(url: str, dest: Path, desc: str): """Download a file with progress bar.""" print(f"\n Downloading {desc}...") print(f" URL: {url}") req = Request(url, headers={"User-Agent": "Mozilla/5.0"}) resp = urlopen(req, timeout=60) total = int(resp.headers.get("Content-Length", 0)) downloaded = 0 tmp = dest.with_suffix(".tmp") with open(tmp, "wb") as f: while True: chunk = resp.read(1024 * 1024) if not chunk: break f.write(chunk) downloaded += len(chunk) progress_bar(downloaded, total) print() # newline after progress bar tmp.rename(dest) print(f" Saved: {dest} ({downloaded / 1_048_576:.0f} MB)") def fetch_release(api_url: str) -> dict | None: """Fetch a GitHub release JSON.""" req = Request(api_url, headers={"User-Agent": "Mozilla/5.0"}) try: resp = urlopen(req, timeout=30) return json.loads(resp.read()) except Exception as e: print(f" Could not fetch from {api_url}: {e}") return None def extract_zip(zip_path: Path): """Extract zip contents into WHISPER_DIR, flattened.""" print(f"\n Extracting to {WHISPER_DIR}/...") WHISPER_DIR.mkdir(exist_ok=True) with zipfile.ZipFile(zip_path) as zf: for member in zf.namelist(): filename = Path(member).name if not filename: continue target = WHISPER_DIR / filename with zf.open(member) as src, open(target, "wb") as dst: dst.write(src.read()) print(f" {filename}") zip_path.unlink() def find_whisper_exe() -> str | None: """Find whisper-cli.exe (or similar) in WHISPER_DIR.""" whisper_exe = WHISPER_DIR / "whisper-cli.exe" if whisper_exe.exists(): return str(whisper_exe) # Try main.exe (older naming) main_exe = WHISPER_DIR / "main.exe" if main_exe.exists(): return str(main_exe) exes = list(WHISPER_DIR.glob("*.exe")) for exe in exes: if "whisper" in exe.name.lower() and "cli" in exe.name.lower(): return str(exe) for exe in exes: if "whisper" in exe.name.lower(): return str(exe) if exes: return str(exes[0]) return None def try_community_vulkan_build() -> str | None: """Try downloading Vulkan build from jerryshell's community repo.""" print("\n Trying community Vulkan build (jerryshell/whisper.cpp-windows-vulkan-bin)...") release = fetch_release(VULKAN_BUILDS_API) if not release: return None tag = release.get("tag_name", "unknown") print(f" Community release: {tag}") # Find a zip asset for asset in release.get("assets", []): name = asset["name"].lower() if name.endswith(".zip"): print(f" Found: {asset['name']}") zip_path = Path(asset["name"]) download_file(asset["browser_download_url"], zip_path, asset["name"]) extract_zip(zip_path) return find_whisper_exe() print(" No zip asset found in community release") return None def try_official_vulkan_build() -> str | None: """Try downloading Vulkan build from official ggml-org releases.""" print("\n Fetching latest whisper.cpp release from ggml-org...") release = fetch_release(GITHUB_API) if not release: return None tag = release.get("tag_name", "unknown") print(f" Official release: {tag}") # Priority: vulkan > noavx (cpu-only, no CUDA deps) > skip CUDA entirely vulkan_asset = None cpu_asset = None for asset in release.get("assets", []): name = asset["name"].lower() if not name.endswith(".zip"): continue # Must be Windows if "win" not in name and "x64" not in name: continue # Absolutely skip CUDA builds - they won't work on AMD if "cuda" in name: continue if "vulkan" in name: vulkan_asset = asset break if "noavx" not in name and "openblas" not in name: cpu_asset = asset chosen = vulkan_asset or cpu_asset if not chosen: print(" No Vulkan or CPU-only build found in official releases") print(" Available assets:") for asset in release.get("assets", []): print(f" - {asset['name']}") return None if vulkan_asset: print(f" Found official Vulkan build: {chosen['name']}") else: print(f" No Vulkan build in official release, using CPU build: {chosen['name']}") print(f" (Will work but without GPU acceleration)") zip_path = Path(chosen["name"]) download_file(chosen["browser_download_url"], zip_path, chosen["name"]) extract_zip(zip_path) return find_whisper_exe() def setup_whisper_bin() -> str | None: """Download whisper.cpp Vulkan release. Returns path to whisper-cli.exe.""" whisper_exe = WHISPER_DIR / "whisper-cli.exe" if whisper_exe.exists(): # Check if it's a CUDA build (has CUDA DLLs but no Vulkan DLL) has_cuda = (WHISPER_DIR / "ggml-cuda.dll").exists() has_vulkan = (WHISPER_DIR / "ggml-vulkan.dll").exists() if has_cuda and not has_vulkan: print(f" WARNING: Existing install is a CUDA build (won't work on AMD GPU)") print(f" Removing and re-downloading Vulkan build...") import shutil shutil.rmtree(WHISPER_DIR) else: print(f" whisper-cli.exe already exists at {whisper_exe}") return str(whisper_exe) # Strategy: try community Vulkan build first (reliable for AMD), # then fall back to official release exe_path = try_community_vulkan_build() if exe_path: print(f"\n whisper-cli.exe ready at: {exe_path} (Vulkan)") return exe_path print("\n Community build failed, trying official release...") exe_path = try_official_vulkan_build() if exe_path: print(f"\n whisper-cli.exe ready at: {exe_path}") return exe_path print("\n ERROR: Could not download whisper.cpp") print(" Manual install: https://github.com/ggml-org/whisper.cpp/releases") print(" Build from source with: cmake -DGGML_VULKAN=1") return None FFMPEG_DIR = Path("ffmpeg-bin") FFMPEG_URL = "https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip" def setup_ffmpeg() -> str | None: """Download ffmpeg if not found. Returns path to ffmpeg.exe.""" import shutil # Already in PATH? if shutil.which("ffmpeg"): path = shutil.which("ffmpeg") print(f" ffmpeg already in PATH: {path}") return path # Already downloaded locally? local_exe = FFMPEG_DIR / "ffmpeg.exe" if local_exe.exists(): print(f" ffmpeg already exists at {local_exe}") return str(local_exe) print("\n Downloading ffmpeg (essentials build)...") zip_path = Path("ffmpeg-essentials.zip") download_file(FFMPEG_URL, zip_path, "ffmpeg") print(f"\n Extracting ffmpeg...") FFMPEG_DIR.mkdir(exist_ok=True) with zipfile.ZipFile(zip_path) as zf: for member in zf.namelist(): # Only extract the bin/*.exe files if member.endswith(".exe"): filename = Path(member).name target = FFMPEG_DIR / filename with zf.open(member) as src, open(target, "wb") as dst: dst.write(src.read()) print(f" {filename}") zip_path.unlink() if local_exe.exists(): print(f"\n ffmpeg ready at: {local_exe}") return str(local_exe) print(" ERROR: ffmpeg.exe not found after extraction") return None def setup_model() -> bool: """Download whisper model. Returns True on success.""" MODELS_DIR.mkdir(exist_ok=True) model_path = MODELS_DIR / MODEL_NAME if model_path.exists() and model_path.stat().st_size > 100_000_000: print(f" Model already exists: {model_path} ({model_path.stat().st_size / 1_048_576:.0f} MB)") return True download_file(MODEL_URL, model_path, f"Whisper model ({MODEL_NAME})") if model_path.exists() and model_path.stat().st_size > 100_000_000: return True print(" ERROR: Model file too small or missing after download") return False def main(): what = sys.argv[1] if len(sys.argv) > 1 else "all" if what in ("all", "ffmpeg"): print("=" * 60) print(" Setting up ffmpeg") print("=" * 60) ffmpeg_path = setup_ffmpeg() if ffmpeg_path: Path(".ffmpeg_bin_path").write_text(ffmpeg_path) else: print("\nFAILED to set up ffmpeg") if what == "ffmpeg": sys.exit(1) if what in ("all", "whisper"): print("=" * 60) print(" Setting up whisper.cpp") print("=" * 60) exe_path = setup_whisper_bin() if exe_path: # Write path to temp file so run.bat can read it Path(".whisper_bin_path").write_text(exe_path) else: print("\nFAILED to set up whisper.cpp") if what == "whisper": sys.exit(1) if what in ("all", "model"): print() print("=" * 60) print(f" Downloading Whisper model: {MODEL_NAME}") print("=" * 60) if not setup_model(): print("\nFAILED to download model") sys.exit(1) print() print("Setup complete!") if __name__ == "__main__": main()