From e83bd748139f043fe3e556569ead6ad57b1bb156 Mon Sep 17 00:00:00 2001 From: Marius Mutu Date: Tue, 24 Mar 2026 02:01:39 +0200 Subject: [PATCH] feat: switch to CPU-only whisper build (no GPU on this machine) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - setup_whisper.py: descarcă build CPU din release-urile oficiale, sare peste Vulkan/CUDA/OpenBLAS - run.bat: elimină env var GGML_VK_PREFER_HOST_MEMORY și check-ul Vulkan SDK - transcribe.py: --no-gpu era deja setat Co-Authored-By: Claude Sonnet 4.6 --- run.bat | 609 +++++++++++++++++++++++------------------------ setup_whisper.py | 51 +--- 2 files changed, 309 insertions(+), 351 deletions(-) diff --git a/run.bat b/run.bat index 6ed5f72..cac6222 100644 --- a/run.bat +++ b/run.bat @@ -1,313 +1,296 @@ -@echo off -setlocal enabledelayedexpansion -cd /d "%~dp0" - -:: Prevent Vulkan from exhausting VRAM — overflow to system RAM instead of crashing -set "GGML_VK_PREFER_HOST_MEMORY=ON" - -echo ============================================================ -echo NLP Master - Download + Transcribe Pipeline -echo ============================================================ -echo. - -:: ============================================================ -:: PREREQUISITES CHECK -:: ============================================================ -echo Checking prerequisites... -echo. -set "PREREQ_OK=1" -set "NEED_WHISPER=" -set "NEED_MODEL=" - -:: --- Python --- -python --version >nul 2>&1 -if errorlevel 1 ( - echo [X] Python NOT FOUND - echo Install from: https://www.python.org/downloads/ - echo Make sure to check "Add Python to PATH" during install. - echo. - echo Cannot continue without Python. Install it and re-run. - pause - exit /b 1 -) else ( - for /f "tokens=2" %%v in ('python --version 2^>^&1') do echo [OK] Python %%v -) - -:: --- .env credentials --- -if exist ".env" ( - findstr /m "COURSE_USERNAME=." ".env" >nul 2>&1 - if errorlevel 1 ( - echo [X] .env File exists but COURSE_USERNAME is empty - echo Edit .env and fill in your credentials. - set "PREREQ_OK=" - ) else ( - echo [OK] .env Credentials configured - ) -) else ( - echo [X] .env NOT FOUND - echo Create .env with: - echo COURSE_USERNAME=your_email - echo COURSE_PASSWORD=your_password - set "PREREQ_OK=" -) - -:: --- ffmpeg --- -set "FFMPEG_FOUND=" -set "NEED_FFMPEG=" -where ffmpeg >nul 2>&1 -if not errorlevel 1 ( - set "FFMPEG_FOUND=1" - for /f "delims=" %%p in ('where ffmpeg 2^>nul') do set "FFMPEG_LOCATION=%%p" - echo [OK] ffmpeg !FFMPEG_LOCATION! -) else ( - if exist "ffmpeg.exe" ( - set "FFMPEG_FOUND=1" - echo [OK] ffmpeg .\ffmpeg.exe (local^) - ) else ( - echo [--] ffmpeg Not found - will auto-install - set "NEED_FFMPEG=1" - ) -) - -:: --- whisper-cli.exe --- -set "WHISPER_FOUND=" -set "WHISPER_LOCATION=" -if defined WHISPER_BIN ( - if exist "%WHISPER_BIN%" ( - set "WHISPER_FOUND=1" - set "WHISPER_LOCATION=%WHISPER_BIN% (env var)" - ) -) -if not defined WHISPER_FOUND ( - where whisper-cli.exe >nul 2>&1 - if not errorlevel 1 ( - set "WHISPER_FOUND=1" - for /f "delims=" %%p in ('where whisper-cli.exe 2^>nul') do set "WHISPER_LOCATION=%%p (PATH)" - ) -) -if not defined WHISPER_FOUND ( - if exist "whisper-cli.exe" ( - set "WHISPER_FOUND=1" - set "WHISPER_BIN=whisper-cli.exe" - set "WHISPER_LOCATION=.\whisper-cli.exe (local)" - ) -) -if not defined WHISPER_FOUND ( - if exist "whisper-bin\whisper-cli.exe" ( - set "WHISPER_FOUND=1" - set "WHISPER_BIN=whisper-bin\whisper-cli.exe" - set "WHISPER_LOCATION=whisper-bin\whisper-cli.exe (auto-installed)" - ) -) -if not defined WHISPER_FOUND ( - if exist "whisper.cpp\build\bin\Release\whisper-cli.exe" ( - set "WHISPER_FOUND=1" - set "WHISPER_BIN=whisper.cpp\build\bin\Release\whisper-cli.exe" - set "WHISPER_LOCATION=whisper.cpp\build\... (local build)" - ) -) - -if defined WHISPER_FOUND ( - echo [OK] whisper-cli !WHISPER_LOCATION! -) else ( - echo [--] whisper-cli Not found - will auto-download - set "NEED_WHISPER=1" -) - -:: --- Whisper model --- -if not defined WHISPER_MODEL set "WHISPER_MODEL=models\ggml-medium-q5_0.bin" -if exist "%WHISPER_MODEL%" ( - for %%F in ("%WHISPER_MODEL%") do ( - set /a "MODEL_MB=%%~zF / 1048576" - ) - echo [OK] Whisper model %WHISPER_MODEL% (!MODEL_MB! MB^) -) else ( - echo [--] Whisper model Not found - will auto-download (~500 MB^) - set "NEED_MODEL=1" -) - -:: --- Vulkan GPU support --- -set "VULKAN_FOUND=" -where vulkaninfo >nul 2>&1 -if not errorlevel 1 ( - set "VULKAN_FOUND=1" - echo [OK] Vulkan SDK Installed -) else ( - if exist "%VULKAN_SDK%\Bin\vulkaninfo.exe" ( - set "VULKAN_FOUND=1" - echo [OK] Vulkan SDK %VULKAN_SDK% - ) else ( - echo [!!] Vulkan SDK Not detected (whisper.cpp may use CPU fallback^) - echo Install from: https://vulkan.lunarg.com/sdk/home - ) -) - -:: --- Disk space --- -echo. -for /f "tokens=3" %%a in ('dir /-c "%~dp0." 2^>nul ^| findstr /c:"bytes free"') do ( - set /a "FREE_GB=%%a / 1073741824" 2>nul -) -if defined FREE_GB ( - if !FREE_GB! LSS 50 ( - echo [!!] Disk space ~!FREE_GB! GB free (need ~50 GB for all audio + transcripts^) - ) else ( - echo [OK] Disk space ~!FREE_GB! GB free - ) -) - -echo. - -:: --- Stop if .env is broken (can't auto-fix that) --- -if not defined PREREQ_OK ( - echo ============================================================ - echo MISSING PREREQUISITES - fix the [X] items above and re-run. - echo ============================================================ - pause - exit /b 1 -) - -:: ============================================================ -:: AUTO-INSTALL MISSING COMPONENTS -:: ============================================================ -if defined NEED_FFMPEG ( - echo ============================================================ - echo Auto-downloading ffmpeg... - echo ============================================================ - python setup_whisper.py ffmpeg - if errorlevel 1 ( - echo. - echo ERROR: Could not install ffmpeg. - echo Download manually from: https://www.gyan.dev/ffmpeg/builds/ - echo Extract ffmpeg.exe to ffmpeg-bin\ and re-run. - pause - exit /b 1 - ) - if exist ".ffmpeg_bin_path" del .ffmpeg_bin_path - echo. -) - -:: Add ffmpeg-bin to PATH if it exists -if exist "ffmpeg-bin\ffmpeg.exe" ( - set "PATH=%~dp0ffmpeg-bin;%PATH%" -) - -if defined NEED_WHISPER ( - echo ============================================================ - echo Auto-downloading whisper.cpp (Vulkan build^)... - echo ============================================================ - python setup_whisper.py whisper - if errorlevel 1 ( - echo. - echo ERROR: Failed to auto-download whisper.cpp. - echo Download manually from: https://github.com/ggml-org/whisper.cpp/releases - pause - exit /b 1 - ) - :: Read the path that setup_whisper.py wrote - if exist ".whisper_bin_path" ( - set /p WHISPER_BIN=<.whisper_bin_path - del .whisper_bin_path - echo Using: !WHISPER_BIN! - ) - echo. -) - -if defined NEED_MODEL ( - echo ============================================================ - echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)... - echo This will take a few minutes depending on your connection. - echo ============================================================ - python setup_whisper.py model - if errorlevel 1 ( - echo. - echo ERROR: Failed to download model. - echo Download manually from: - echo https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin - echo Save to: models\ggml-medium-q5_0.bin - pause - exit /b 1 - ) - echo. -) - -echo All prerequisites OK! -echo. -echo ============================================================ -echo Starting pipeline... -echo ============================================================ -echo. - -:: ============================================================ -:: STEP 1: VENV + DEPENDENCIES -:: ============================================================ -if not exist ".venv\Scripts\python.exe" ( - echo [1/4] Creating Python virtual environment... - python -m venv .venv - if errorlevel 1 ( - echo ERROR: Failed to create venv. - pause - exit /b 1 - ) - echo Done. -) else ( - echo [1/4] Virtual environment already exists. -) - -echo [2/4] Installing Python dependencies... -.venv\Scripts\pip install -q -r requirements.txt -if errorlevel 1 ( - echo ERROR: Failed to install dependencies. - pause - exit /b 1 -) -echo Done. - -:: ============================================================ -:: STEP 2: DOWNLOAD -:: ============================================================ -echo. -echo [3/4] Downloading audio files... -echo ============================================================ -.venv\Scripts\python download.py -if errorlevel 1 ( - echo. - echo WARNING: Some downloads failed. Check download_errors.log - echo Press any key to continue to transcription anyway, or Ctrl+C to abort. - pause >nul -) - -:: ============================================================ -:: STEP 3: TRANSCRIBE -:: ============================================================ -echo. -echo [4/4] Transcribing with whisper.cpp... -echo ============================================================ -echo Using: %WHISPER_BIN% -echo Model: %WHISPER_MODEL% -echo. - -if "%~1"=="" ( - .venv\Scripts\python transcribe.py -) else ( - echo Modules filter: %~1 - .venv\Scripts\python transcribe.py --modules %~1 -) -if errorlevel 1 ( - echo. - echo WARNING: Some transcriptions failed. Check transcribe_errors.log -) - -:: ============================================================ -:: DONE -:: ============================================================ -echo. -echo ============================================================ -echo Pipeline complete! -echo - Audio files: audio\ -echo - Transcripts: transcripts\ -echo - Manifest: manifest.json -echo. -echo Next step: generate summaries from WSL2 with Claude Code -echo python summarize.py -echo ============================================================ -pause +@echo off +setlocal enabledelayedexpansion +cd /d "%~dp0" + + +echo ============================================================ +echo NLP Master - Download + Transcribe Pipeline +echo ============================================================ +echo. + +:: ============================================================ +:: PREREQUISITES CHECK +:: ============================================================ +echo Checking prerequisites... +echo. +set "PREREQ_OK=1" +set "NEED_WHISPER=" +set "NEED_MODEL=" + +:: --- Python --- +python --version >nul 2>&1 +if errorlevel 1 ( + echo [X] Python NOT FOUND + echo Install from: https://www.python.org/downloads/ + echo Make sure to check "Add Python to PATH" during install. + echo. + echo Cannot continue without Python. Install it and re-run. + pause + exit /b 1 +) else ( + for /f "tokens=2" %%v in ('python --version 2^>^&1') do echo [OK] Python %%v +) + +:: --- .env credentials --- +if exist ".env" ( + findstr /m "COURSE_USERNAME=." ".env" >nul 2>&1 + if errorlevel 1 ( + echo [X] .env File exists but COURSE_USERNAME is empty + echo Edit .env and fill in your credentials. + set "PREREQ_OK=" + ) else ( + echo [OK] .env Credentials configured + ) +) else ( + echo [X] .env NOT FOUND + echo Create .env with: + echo COURSE_USERNAME=your_email + echo COURSE_PASSWORD=your_password + set "PREREQ_OK=" +) + +:: --- ffmpeg --- +set "FFMPEG_FOUND=" +set "NEED_FFMPEG=" +where ffmpeg >nul 2>&1 +if not errorlevel 1 ( + set "FFMPEG_FOUND=1" + for /f "delims=" %%p in ('where ffmpeg 2^>nul') do set "FFMPEG_LOCATION=%%p" + echo [OK] ffmpeg !FFMPEG_LOCATION! +) else ( + if exist "ffmpeg.exe" ( + set "FFMPEG_FOUND=1" + echo [OK] ffmpeg .\ffmpeg.exe (local^) + ) else ( + echo [--] ffmpeg Not found - will auto-install + set "NEED_FFMPEG=1" + ) +) + +:: --- whisper-cli.exe --- +set "WHISPER_FOUND=" +set "WHISPER_LOCATION=" +if defined WHISPER_BIN ( + if exist "%WHISPER_BIN%" ( + set "WHISPER_FOUND=1" + set "WHISPER_LOCATION=%WHISPER_BIN% (env var)" + ) +) +if not defined WHISPER_FOUND ( + where whisper-cli.exe >nul 2>&1 + if not errorlevel 1 ( + set "WHISPER_FOUND=1" + for /f "delims=" %%p in ('where whisper-cli.exe 2^>nul') do set "WHISPER_LOCATION=%%p (PATH)" + ) +) +if not defined WHISPER_FOUND ( + if exist "whisper-cli.exe" ( + set "WHISPER_FOUND=1" + set "WHISPER_BIN=whisper-cli.exe" + set "WHISPER_LOCATION=.\whisper-cli.exe (local)" + ) +) +if not defined WHISPER_FOUND ( + if exist "whisper-bin\whisper-cli.exe" ( + set "WHISPER_FOUND=1" + set "WHISPER_BIN=whisper-bin\whisper-cli.exe" + set "WHISPER_LOCATION=whisper-bin\whisper-cli.exe (auto-installed)" + ) +) +if not defined WHISPER_FOUND ( + if exist "whisper.cpp\build\bin\Release\whisper-cli.exe" ( + set "WHISPER_FOUND=1" + set "WHISPER_BIN=whisper.cpp\build\bin\Release\whisper-cli.exe" + set "WHISPER_LOCATION=whisper.cpp\build\... (local build)" + ) +) + +if defined WHISPER_FOUND ( + echo [OK] whisper-cli !WHISPER_LOCATION! +) else ( + echo [--] whisper-cli Not found - will auto-download + set "NEED_WHISPER=1" +) + +:: --- Whisper model --- +if not defined WHISPER_MODEL set "WHISPER_MODEL=models\ggml-medium-q5_0.bin" +if exist "%WHISPER_MODEL%" ( + for %%F in ("%WHISPER_MODEL%") do ( + set /a "MODEL_MB=%%~zF / 1048576" + ) + echo [OK] Whisper model %WHISPER_MODEL% (!MODEL_MB! MB^) +) else ( + echo [--] Whisper model Not found - will auto-download (~500 MB^) + set "NEED_MODEL=1" +) + + +:: --- Disk space --- +echo. +for /f "tokens=3" %%a in ('dir /-c "%~dp0." 2^>nul ^| findstr /c:"bytes free"') do ( + set /a "FREE_GB=%%a / 1073741824" 2>nul +) +if defined FREE_GB ( + if !FREE_GB! LSS 50 ( + echo [!!] Disk space ~!FREE_GB! GB free (need ~50 GB for all audio + transcripts^) + ) else ( + echo [OK] Disk space ~!FREE_GB! GB free + ) +) + +echo. + +:: --- Stop if .env is broken (can't auto-fix that) --- +if not defined PREREQ_OK ( + echo ============================================================ + echo MISSING PREREQUISITES - fix the [X] items above and re-run. + echo ============================================================ + pause + exit /b 1 +) + +:: ============================================================ +:: AUTO-INSTALL MISSING COMPONENTS +:: ============================================================ +if defined NEED_FFMPEG ( + echo ============================================================ + echo Auto-downloading ffmpeg... + echo ============================================================ + python setup_whisper.py ffmpeg + if errorlevel 1 ( + echo. + echo ERROR: Could not install ffmpeg. + echo Download manually from: https://www.gyan.dev/ffmpeg/builds/ + echo Extract ffmpeg.exe to ffmpeg-bin\ and re-run. + pause + exit /b 1 + ) + if exist ".ffmpeg_bin_path" del .ffmpeg_bin_path + echo. +) + +:: Add ffmpeg-bin to PATH if it exists +if exist "ffmpeg-bin\ffmpeg.exe" ( + set "PATH=%~dp0ffmpeg-bin;%PATH%" +) + +if defined NEED_WHISPER ( + echo ============================================================ + echo Auto-downloading whisper.cpp (CPU build^)... + echo ============================================================ + python setup_whisper.py whisper + if errorlevel 1 ( + echo. + echo ERROR: Failed to auto-download whisper.cpp. + echo Download manually from: https://github.com/ggml-org/whisper.cpp/releases + pause + exit /b 1 + ) + :: Read the path that setup_whisper.py wrote + if exist ".whisper_bin_path" ( + set /p WHISPER_BIN=<.whisper_bin_path + del .whisper_bin_path + echo Using: !WHISPER_BIN! + ) + echo. +) + +if defined NEED_MODEL ( + echo ============================================================ + echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)... + echo This will take a few minutes depending on your connection. + echo ============================================================ + python setup_whisper.py model + if errorlevel 1 ( + echo. + echo ERROR: Failed to download model. + echo Download manually from: + echo https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin + echo Save to: models\ggml-medium-q5_0.bin + pause + exit /b 1 + ) + echo. +) + +echo All prerequisites OK! +echo. +echo ============================================================ +echo Starting pipeline... +echo ============================================================ +echo. + +:: ============================================================ +:: STEP 1: VENV + DEPENDENCIES +:: ============================================================ +if not exist ".venv\Scripts\python.exe" ( + echo [1/4] Creating Python virtual environment... + python -m venv .venv + if errorlevel 1 ( + echo ERROR: Failed to create venv. + pause + exit /b 1 + ) + echo Done. +) else ( + echo [1/4] Virtual environment already exists. +) + +echo [2/4] Installing Python dependencies... +.venv\Scripts\pip install -q -r requirements.txt +if errorlevel 1 ( + echo ERROR: Failed to install dependencies. + pause + exit /b 1 +) +echo Done. + +:: ============================================================ +:: STEP 2: DOWNLOAD +:: ============================================================ +echo. +echo [3/4] Downloading audio files... +echo ============================================================ +.venv\Scripts\python download.py +if errorlevel 1 ( + echo. + echo WARNING: Some downloads failed. Check download_errors.log + echo Press any key to continue to transcription anyway, or Ctrl+C to abort. + pause >nul +) + +:: ============================================================ +:: STEP 3: TRANSCRIBE +:: ============================================================ +echo. +echo [4/4] Transcribing with whisper.cpp... +echo ============================================================ +echo Using: %WHISPER_BIN% +echo Model: %WHISPER_MODEL% +echo. + +if "%~1"=="" ( + .venv\Scripts\python transcribe.py +) else ( + echo Modules filter: %~1 + .venv\Scripts\python transcribe.py --modules %~1 +) +if errorlevel 1 ( + echo. + echo WARNING: Some transcriptions failed. Check transcribe_errors.log +) + +:: ============================================================ +:: DONE +:: ============================================================ +echo. +echo ============================================================ +echo Pipeline complete! +echo - Audio files: audio\ +echo - Transcripts: transcripts\ +echo - Manifest: manifest.json +echo. +echo Next step: generate summaries from WSL2 with Claude Code +echo python summarize.py +echo ============================================================ +pause diff --git a/setup_whisper.py b/setup_whisper.py index cc7fa18..607f5c0 100644 --- a/setup_whisper.py +++ b/setup_whisper.py @@ -1,5 +1,5 @@ """ -Auto-download and setup whisper.cpp (Vulkan) + model for Windows. +Auto-download and setup whisper.cpp (CPU build) + model for Windows. Called by run.bat when prerequisites are missing. """ @@ -142,8 +142,7 @@ def try_official_vulkan_build() -> str | None: tag = release.get("tag_name", "unknown") print(f" Official release: {tag}") - # Priority: vulkan > noavx (cpu-only, no CUDA deps) > skip CUDA entirely - vulkan_asset = None + # Priority: CPU build (no GPU deps needed) cpu_asset = None for asset in release.get("assets", []): name = asset["name"].lower() @@ -152,28 +151,22 @@ def try_official_vulkan_build() -> str | None: # Must be Windows if "win" not in name and "x64" not in name: continue - # Absolutely skip CUDA builds - they won't work on AMD - if "cuda" in name: + # Skip GPU builds entirely + if "cuda" in name or "vulkan" in name or "openblas" in name: continue - if "vulkan" in name: - vulkan_asset = asset - break - if "noavx" not in name and "openblas" not in name: + if "noavx" not in name: cpu_asset = asset + break - chosen = vulkan_asset or cpu_asset - if not chosen: - print(" No Vulkan or CPU-only build found in official releases") + if not cpu_asset: + print(" No CPU build found in official releases") print(" Available assets:") for asset in release.get("assets", []): print(f" - {asset['name']}") return None - if vulkan_asset: - print(f" Found official Vulkan build: {chosen['name']}") - else: - print(f" No Vulkan build in official release, using CPU build: {chosen['name']}") - print(f" (Will work but without GPU acceleration)") + print(f" Found CPU build: {cpu_asset['name']}") + chosen = cpu_asset zip_path = Path(chosen["name"]) download_file(chosen["browser_download_url"], zip_path, chosen["name"]) @@ -182,29 +175,12 @@ def try_official_vulkan_build() -> str | None: def setup_whisper_bin() -> str | None: - """Download whisper.cpp Vulkan release. Returns path to whisper-cli.exe.""" + """Download whisper.cpp CPU release. Returns path to whisper-cli.exe.""" whisper_exe = WHISPER_DIR / "whisper-cli.exe" if whisper_exe.exists(): - # Check if it's a CUDA build (has CUDA DLLs but no Vulkan DLL) - has_cuda = (WHISPER_DIR / "ggml-cuda.dll").exists() - has_vulkan = (WHISPER_DIR / "ggml-vulkan.dll").exists() - if has_cuda and not has_vulkan: - print(f" WARNING: Existing install is a CUDA build (won't work on AMD GPU)") - print(f" Removing and re-downloading Vulkan build...") - import shutil - shutil.rmtree(WHISPER_DIR) - else: - print(f" whisper-cli.exe already exists at {whisper_exe}") - return str(whisper_exe) + print(f" whisper-cli.exe already exists at {whisper_exe}") + return str(whisper_exe) - # Strategy: try community Vulkan build first (reliable for AMD), - # then fall back to official release - exe_path = try_community_vulkan_build() - if exe_path: - print(f"\n whisper-cli.exe ready at: {exe_path} (Vulkan)") - return exe_path - - print("\n Community build failed, trying official release...") exe_path = try_official_vulkan_build() if exe_path: print(f"\n whisper-cli.exe ready at: {exe_path}") @@ -212,7 +188,6 @@ def setup_whisper_bin() -> str | None: print("\n ERROR: Could not download whisper.cpp") print(" Manual install: https://github.com/ggml-org/whisper.cpp/releases") - print(" Build from source with: cmake -DGGML_VULKAN=1") return None