feat: switch to CPU-only whisper build (no GPU on this machine)

- setup_whisper.py: descarcă build CPU din release-urile oficiale,
  sare peste Vulkan/CUDA/OpenBLAS
- run.bat: elimină env var GGML_VK_PREFER_HOST_MEMORY și check-ul Vulkan SDK
- transcribe.py: --no-gpu era deja setat

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-24 02:01:39 +02:00
parent 60f564c107
commit e83bd74813
2 changed files with 309 additions and 351 deletions

609
run.bat
View File

@@ -1,313 +1,296 @@
@echo off @echo off
setlocal enabledelayedexpansion setlocal enabledelayedexpansion
cd /d "%~dp0" cd /d "%~dp0"
:: Prevent Vulkan from exhausting VRAM — overflow to system RAM instead of crashing
set "GGML_VK_PREFER_HOST_MEMORY=ON" echo ============================================================
echo NLP Master - Download + Transcribe Pipeline
echo ============================================================ echo ============================================================
echo NLP Master - Download + Transcribe Pipeline echo.
echo ============================================================
echo. :: ============================================================
:: PREREQUISITES CHECK
:: ============================================================ :: ============================================================
:: PREREQUISITES CHECK echo Checking prerequisites...
:: ============================================================ echo.
echo Checking prerequisites... set "PREREQ_OK=1"
echo. set "NEED_WHISPER="
set "PREREQ_OK=1" set "NEED_MODEL="
set "NEED_WHISPER="
set "NEED_MODEL=" :: --- Python ---
python --version >nul 2>&1
:: --- Python --- if errorlevel 1 (
python --version >nul 2>&1 echo [X] Python NOT FOUND
if errorlevel 1 ( echo Install from: https://www.python.org/downloads/
echo [X] Python NOT FOUND echo Make sure to check "Add Python to PATH" during install.
echo Install from: https://www.python.org/downloads/ echo.
echo Make sure to check "Add Python to PATH" during install. echo Cannot continue without Python. Install it and re-run.
echo. pause
echo Cannot continue without Python. Install it and re-run. exit /b 1
pause ) else (
exit /b 1 for /f "tokens=2" %%v in ('python --version 2^>^&1') do echo [OK] Python %%v
) else ( )
for /f "tokens=2" %%v in ('python --version 2^>^&1') do echo [OK] Python %%v
) :: --- .env credentials ---
if exist ".env" (
:: --- .env credentials --- findstr /m "COURSE_USERNAME=." ".env" >nul 2>&1
if exist ".env" ( if errorlevel 1 (
findstr /m "COURSE_USERNAME=." ".env" >nul 2>&1 echo [X] .env File exists but COURSE_USERNAME is empty
if errorlevel 1 ( echo Edit .env and fill in your credentials.
echo [X] .env File exists but COURSE_USERNAME is empty set "PREREQ_OK="
echo Edit .env and fill in your credentials. ) else (
set "PREREQ_OK=" echo [OK] .env Credentials configured
) else ( )
echo [OK] .env Credentials configured ) else (
) echo [X] .env NOT FOUND
) else ( echo Create .env with:
echo [X] .env NOT FOUND echo COURSE_USERNAME=your_email
echo Create .env with: echo COURSE_PASSWORD=your_password
echo COURSE_USERNAME=your_email set "PREREQ_OK="
echo COURSE_PASSWORD=your_password )
set "PREREQ_OK="
) :: --- ffmpeg ---
set "FFMPEG_FOUND="
:: --- ffmpeg --- set "NEED_FFMPEG="
set "FFMPEG_FOUND=" where ffmpeg >nul 2>&1
set "NEED_FFMPEG=" if not errorlevel 1 (
where ffmpeg >nul 2>&1 set "FFMPEG_FOUND=1"
if not errorlevel 1 ( for /f "delims=" %%p in ('where ffmpeg 2^>nul') do set "FFMPEG_LOCATION=%%p"
set "FFMPEG_FOUND=1" echo [OK] ffmpeg !FFMPEG_LOCATION!
for /f "delims=" %%p in ('where ffmpeg 2^>nul') do set "FFMPEG_LOCATION=%%p" ) else (
echo [OK] ffmpeg !FFMPEG_LOCATION! if exist "ffmpeg.exe" (
) else ( set "FFMPEG_FOUND=1"
if exist "ffmpeg.exe" ( echo [OK] ffmpeg .\ffmpeg.exe (local^)
set "FFMPEG_FOUND=1" ) else (
echo [OK] ffmpeg .\ffmpeg.exe (local^) echo [--] ffmpeg Not found - will auto-install
) else ( set "NEED_FFMPEG=1"
echo [--] ffmpeg Not found - will auto-install )
set "NEED_FFMPEG=1" )
)
) :: --- whisper-cli.exe ---
set "WHISPER_FOUND="
:: --- whisper-cli.exe --- set "WHISPER_LOCATION="
set "WHISPER_FOUND=" if defined WHISPER_BIN (
set "WHISPER_LOCATION=" if exist "%WHISPER_BIN%" (
if defined WHISPER_BIN ( set "WHISPER_FOUND=1"
if exist "%WHISPER_BIN%" ( set "WHISPER_LOCATION=%WHISPER_BIN% (env var)"
set "WHISPER_FOUND=1" )
set "WHISPER_LOCATION=%WHISPER_BIN% (env var)" )
) if not defined WHISPER_FOUND (
) where whisper-cli.exe >nul 2>&1
if not defined WHISPER_FOUND ( if not errorlevel 1 (
where whisper-cli.exe >nul 2>&1 set "WHISPER_FOUND=1"
if not errorlevel 1 ( for /f "delims=" %%p in ('where whisper-cli.exe 2^>nul') do set "WHISPER_LOCATION=%%p (PATH)"
set "WHISPER_FOUND=1" )
for /f "delims=" %%p in ('where whisper-cli.exe 2^>nul') do set "WHISPER_LOCATION=%%p (PATH)" )
) if not defined WHISPER_FOUND (
) if exist "whisper-cli.exe" (
if not defined WHISPER_FOUND ( set "WHISPER_FOUND=1"
if exist "whisper-cli.exe" ( set "WHISPER_BIN=whisper-cli.exe"
set "WHISPER_FOUND=1" set "WHISPER_LOCATION=.\whisper-cli.exe (local)"
set "WHISPER_BIN=whisper-cli.exe" )
set "WHISPER_LOCATION=.\whisper-cli.exe (local)" )
) if not defined WHISPER_FOUND (
) if exist "whisper-bin\whisper-cli.exe" (
if not defined WHISPER_FOUND ( set "WHISPER_FOUND=1"
if exist "whisper-bin\whisper-cli.exe" ( set "WHISPER_BIN=whisper-bin\whisper-cli.exe"
set "WHISPER_FOUND=1" set "WHISPER_LOCATION=whisper-bin\whisper-cli.exe (auto-installed)"
set "WHISPER_BIN=whisper-bin\whisper-cli.exe" )
set "WHISPER_LOCATION=whisper-bin\whisper-cli.exe (auto-installed)" )
) if not defined WHISPER_FOUND (
) if exist "whisper.cpp\build\bin\Release\whisper-cli.exe" (
if not defined WHISPER_FOUND ( set "WHISPER_FOUND=1"
if exist "whisper.cpp\build\bin\Release\whisper-cli.exe" ( set "WHISPER_BIN=whisper.cpp\build\bin\Release\whisper-cli.exe"
set "WHISPER_FOUND=1" set "WHISPER_LOCATION=whisper.cpp\build\... (local build)"
set "WHISPER_BIN=whisper.cpp\build\bin\Release\whisper-cli.exe" )
set "WHISPER_LOCATION=whisper.cpp\build\... (local build)" )
)
) if defined WHISPER_FOUND (
echo [OK] whisper-cli !WHISPER_LOCATION!
if defined WHISPER_FOUND ( ) else (
echo [OK] whisper-cli !WHISPER_LOCATION! echo [--] whisper-cli Not found - will auto-download
) else ( set "NEED_WHISPER=1"
echo [--] whisper-cli Not found - will auto-download )
set "NEED_WHISPER=1"
) :: --- Whisper model ---
if not defined WHISPER_MODEL set "WHISPER_MODEL=models\ggml-medium-q5_0.bin"
:: --- Whisper model --- if exist "%WHISPER_MODEL%" (
if not defined WHISPER_MODEL set "WHISPER_MODEL=models\ggml-medium-q5_0.bin" for %%F in ("%WHISPER_MODEL%") do (
if exist "%WHISPER_MODEL%" ( set /a "MODEL_MB=%%~zF / 1048576"
for %%F in ("%WHISPER_MODEL%") do ( )
set /a "MODEL_MB=%%~zF / 1048576" echo [OK] Whisper model %WHISPER_MODEL% (!MODEL_MB! MB^)
) ) else (
echo [OK] Whisper model %WHISPER_MODEL% (!MODEL_MB! MB^) echo [--] Whisper model Not found - will auto-download (~500 MB^)
) else ( set "NEED_MODEL=1"
echo [--] Whisper model Not found - will auto-download (~500 MB^) )
set "NEED_MODEL=1"
)
:: --- Disk space ---
:: --- Vulkan GPU support --- echo.
set "VULKAN_FOUND=" for /f "tokens=3" %%a in ('dir /-c "%~dp0." 2^>nul ^| findstr /c:"bytes free"') do (
where vulkaninfo >nul 2>&1 set /a "FREE_GB=%%a / 1073741824" 2>nul
if not errorlevel 1 ( )
set "VULKAN_FOUND=1" if defined FREE_GB (
echo [OK] Vulkan SDK Installed if !FREE_GB! LSS 50 (
) else ( echo [!!] Disk space ~!FREE_GB! GB free (need ~50 GB for all audio + transcripts^)
if exist "%VULKAN_SDK%\Bin\vulkaninfo.exe" ( ) else (
set "VULKAN_FOUND=1" echo [OK] Disk space ~!FREE_GB! GB free
echo [OK] Vulkan SDK %VULKAN_SDK% )
) else ( )
echo [!!] Vulkan SDK Not detected (whisper.cpp may use CPU fallback^)
echo Install from: https://vulkan.lunarg.com/sdk/home echo.
)
) :: --- Stop if .env is broken (can't auto-fix that) ---
if not defined PREREQ_OK (
:: --- Disk space --- echo ============================================================
echo. echo MISSING PREREQUISITES - fix the [X] items above and re-run.
for /f "tokens=3" %%a in ('dir /-c "%~dp0." 2^>nul ^| findstr /c:"bytes free"') do ( echo ============================================================
set /a "FREE_GB=%%a / 1073741824" 2>nul pause
) exit /b 1
if defined FREE_GB ( )
if !FREE_GB! LSS 50 (
echo [!!] Disk space ~!FREE_GB! GB free (need ~50 GB for all audio + transcripts^) :: ============================================================
) else ( :: AUTO-INSTALL MISSING COMPONENTS
echo [OK] Disk space ~!FREE_GB! GB free :: ============================================================
) if defined NEED_FFMPEG (
) echo ============================================================
echo Auto-downloading ffmpeg...
echo. echo ============================================================
python setup_whisper.py ffmpeg
:: --- Stop if .env is broken (can't auto-fix that) --- if errorlevel 1 (
if not defined PREREQ_OK ( echo.
echo ============================================================ echo ERROR: Could not install ffmpeg.
echo MISSING PREREQUISITES - fix the [X] items above and re-run. echo Download manually from: https://www.gyan.dev/ffmpeg/builds/
echo ============================================================ echo Extract ffmpeg.exe to ffmpeg-bin\ and re-run.
pause pause
exit /b 1 exit /b 1
) )
if exist ".ffmpeg_bin_path" del .ffmpeg_bin_path
:: ============================================================ echo.
:: AUTO-INSTALL MISSING COMPONENTS )
:: ============================================================
if defined NEED_FFMPEG ( :: Add ffmpeg-bin to PATH if it exists
echo ============================================================ if exist "ffmpeg-bin\ffmpeg.exe" (
echo Auto-downloading ffmpeg... set "PATH=%~dp0ffmpeg-bin;%PATH%"
echo ============================================================ )
python setup_whisper.py ffmpeg
if errorlevel 1 ( if defined NEED_WHISPER (
echo. echo ============================================================
echo ERROR: Could not install ffmpeg. echo Auto-downloading whisper.cpp (CPU build^)...
echo Download manually from: https://www.gyan.dev/ffmpeg/builds/ echo ============================================================
echo Extract ffmpeg.exe to ffmpeg-bin\ and re-run. python setup_whisper.py whisper
pause if errorlevel 1 (
exit /b 1 echo.
) echo ERROR: Failed to auto-download whisper.cpp.
if exist ".ffmpeg_bin_path" del .ffmpeg_bin_path echo Download manually from: https://github.com/ggml-org/whisper.cpp/releases
echo. pause
) exit /b 1
)
:: Add ffmpeg-bin to PATH if it exists :: Read the path that setup_whisper.py wrote
if exist "ffmpeg-bin\ffmpeg.exe" ( if exist ".whisper_bin_path" (
set "PATH=%~dp0ffmpeg-bin;%PATH%" set /p WHISPER_BIN=<.whisper_bin_path
) del .whisper_bin_path
echo Using: !WHISPER_BIN!
if defined NEED_WHISPER ( )
echo ============================================================ echo.
echo Auto-downloading whisper.cpp (Vulkan build^)... )
echo ============================================================
python setup_whisper.py whisper if defined NEED_MODEL (
if errorlevel 1 ( echo ============================================================
echo. echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)...
echo ERROR: Failed to auto-download whisper.cpp. echo This will take a few minutes depending on your connection.
echo Download manually from: https://github.com/ggml-org/whisper.cpp/releases echo ============================================================
pause python setup_whisper.py model
exit /b 1 if errorlevel 1 (
) echo.
:: Read the path that setup_whisper.py wrote echo ERROR: Failed to download model.
if exist ".whisper_bin_path" ( echo Download manually from:
set /p WHISPER_BIN=<.whisper_bin_path echo https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin
del .whisper_bin_path echo Save to: models\ggml-medium-q5_0.bin
echo Using: !WHISPER_BIN! pause
) exit /b 1
echo. )
) echo.
)
if defined NEED_MODEL (
echo ============================================================ echo All prerequisites OK!
echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)... echo.
echo This will take a few minutes depending on your connection. echo ============================================================
echo ============================================================ echo Starting pipeline...
python setup_whisper.py model echo ============================================================
if errorlevel 1 ( echo.
echo.
echo ERROR: Failed to download model. :: ============================================================
echo Download manually from: :: STEP 1: VENV + DEPENDENCIES
echo https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin :: ============================================================
echo Save to: models\ggml-medium-q5_0.bin if not exist ".venv\Scripts\python.exe" (
pause echo [1/4] Creating Python virtual environment...
exit /b 1 python -m venv .venv
) if errorlevel 1 (
echo. echo ERROR: Failed to create venv.
) pause
exit /b 1
echo All prerequisites OK! )
echo. echo Done.
echo ============================================================ ) else (
echo Starting pipeline... echo [1/4] Virtual environment already exists.
echo ============================================================ )
echo.
echo [2/4] Installing Python dependencies...
:: ============================================================ .venv\Scripts\pip install -q -r requirements.txt
:: STEP 1: VENV + DEPENDENCIES if errorlevel 1 (
:: ============================================================ echo ERROR: Failed to install dependencies.
if not exist ".venv\Scripts\python.exe" ( pause
echo [1/4] Creating Python virtual environment... exit /b 1
python -m venv .venv )
if errorlevel 1 ( echo Done.
echo ERROR: Failed to create venv.
pause :: ============================================================
exit /b 1 :: STEP 2: DOWNLOAD
) :: ============================================================
echo Done. echo.
) else ( echo [3/4] Downloading audio files...
echo [1/4] Virtual environment already exists. echo ============================================================
) .venv\Scripts\python download.py
if errorlevel 1 (
echo [2/4] Installing Python dependencies... echo.
.venv\Scripts\pip install -q -r requirements.txt echo WARNING: Some downloads failed. Check download_errors.log
if errorlevel 1 ( echo Press any key to continue to transcription anyway, or Ctrl+C to abort.
echo ERROR: Failed to install dependencies. pause >nul
pause )
exit /b 1
) :: ============================================================
echo Done. :: STEP 3: TRANSCRIBE
:: ============================================================
:: ============================================================ echo.
:: STEP 2: DOWNLOAD echo [4/4] Transcribing with whisper.cpp...
:: ============================================================ echo ============================================================
echo. echo Using: %WHISPER_BIN%
echo [3/4] Downloading audio files... echo Model: %WHISPER_MODEL%
echo ============================================================ echo.
.venv\Scripts\python download.py
if errorlevel 1 ( if "%~1"=="" (
echo. .venv\Scripts\python transcribe.py
echo WARNING: Some downloads failed. Check download_errors.log ) else (
echo Press any key to continue to transcription anyway, or Ctrl+C to abort. echo Modules filter: %~1
pause >nul .venv\Scripts\python transcribe.py --modules %~1
) )
if errorlevel 1 (
:: ============================================================ echo.
:: STEP 3: TRANSCRIBE echo WARNING: Some transcriptions failed. Check transcribe_errors.log
:: ============================================================ )
echo.
echo [4/4] Transcribing with whisper.cpp... :: ============================================================
echo ============================================================ :: DONE
echo Using: %WHISPER_BIN% :: ============================================================
echo Model: %WHISPER_MODEL% echo.
echo. echo ============================================================
echo Pipeline complete!
if "%~1"=="" ( echo - Audio files: audio\
.venv\Scripts\python transcribe.py echo - Transcripts: transcripts\
) else ( echo - Manifest: manifest.json
echo Modules filter: %~1 echo.
.venv\Scripts\python transcribe.py --modules %~1 echo Next step: generate summaries from WSL2 with Claude Code
) echo python summarize.py
if errorlevel 1 ( echo ============================================================
echo. pause
echo WARNING: Some transcriptions failed. Check transcribe_errors.log
)
:: ============================================================
:: DONE
:: ============================================================
echo.
echo ============================================================
echo Pipeline complete!
echo - Audio files: audio\
echo - Transcripts: transcripts\
echo - Manifest: manifest.json
echo.
echo Next step: generate summaries from WSL2 with Claude Code
echo python summarize.py
echo ============================================================
pause

View File

@@ -1,5 +1,5 @@
""" """
Auto-download and setup whisper.cpp (Vulkan) + model for Windows. Auto-download and setup whisper.cpp (CPU build) + model for Windows.
Called by run.bat when prerequisites are missing. Called by run.bat when prerequisites are missing.
""" """
@@ -142,8 +142,7 @@ def try_official_vulkan_build() -> str | None:
tag = release.get("tag_name", "unknown") tag = release.get("tag_name", "unknown")
print(f" Official release: {tag}") print(f" Official release: {tag}")
# Priority: vulkan > noavx (cpu-only, no CUDA deps) > skip CUDA entirely # Priority: CPU build (no GPU deps needed)
vulkan_asset = None
cpu_asset = None cpu_asset = None
for asset in release.get("assets", []): for asset in release.get("assets", []):
name = asset["name"].lower() name = asset["name"].lower()
@@ -152,28 +151,22 @@ def try_official_vulkan_build() -> str | None:
# Must be Windows # Must be Windows
if "win" not in name and "x64" not in name: if "win" not in name and "x64" not in name:
continue continue
# Absolutely skip CUDA builds - they won't work on AMD # Skip GPU builds entirely
if "cuda" in name: if "cuda" in name or "vulkan" in name or "openblas" in name:
continue continue
if "vulkan" in name: if "noavx" not in name:
vulkan_asset = asset
break
if "noavx" not in name and "openblas" not in name:
cpu_asset = asset cpu_asset = asset
break
chosen = vulkan_asset or cpu_asset if not cpu_asset:
if not chosen: print(" No CPU build found in official releases")
print(" No Vulkan or CPU-only build found in official releases")
print(" Available assets:") print(" Available assets:")
for asset in release.get("assets", []): for asset in release.get("assets", []):
print(f" - {asset['name']}") print(f" - {asset['name']}")
return None return None
if vulkan_asset: print(f" Found CPU build: {cpu_asset['name']}")
print(f" Found official Vulkan build: {chosen['name']}") chosen = cpu_asset
else:
print(f" No Vulkan build in official release, using CPU build: {chosen['name']}")
print(f" (Will work but without GPU acceleration)")
zip_path = Path(chosen["name"]) zip_path = Path(chosen["name"])
download_file(chosen["browser_download_url"], zip_path, chosen["name"]) download_file(chosen["browser_download_url"], zip_path, chosen["name"])
@@ -182,29 +175,12 @@ def try_official_vulkan_build() -> str | None:
def setup_whisper_bin() -> str | None: def setup_whisper_bin() -> str | None:
"""Download whisper.cpp Vulkan release. Returns path to whisper-cli.exe.""" """Download whisper.cpp CPU release. Returns path to whisper-cli.exe."""
whisper_exe = WHISPER_DIR / "whisper-cli.exe" whisper_exe = WHISPER_DIR / "whisper-cli.exe"
if whisper_exe.exists(): if whisper_exe.exists():
# Check if it's a CUDA build (has CUDA DLLs but no Vulkan DLL) print(f" whisper-cli.exe already exists at {whisper_exe}")
has_cuda = (WHISPER_DIR / "ggml-cuda.dll").exists() return str(whisper_exe)
has_vulkan = (WHISPER_DIR / "ggml-vulkan.dll").exists()
if has_cuda and not has_vulkan:
print(f" WARNING: Existing install is a CUDA build (won't work on AMD GPU)")
print(f" Removing and re-downloading Vulkan build...")
import shutil
shutil.rmtree(WHISPER_DIR)
else:
print(f" whisper-cli.exe already exists at {whisper_exe}")
return str(whisper_exe)
# Strategy: try community Vulkan build first (reliable for AMD),
# then fall back to official release
exe_path = try_community_vulkan_build()
if exe_path:
print(f"\n whisper-cli.exe ready at: {exe_path} (Vulkan)")
return exe_path
print("\n Community build failed, trying official release...")
exe_path = try_official_vulkan_build() exe_path = try_official_vulkan_build()
if exe_path: if exe_path:
print(f"\n whisper-cli.exe ready at: {exe_path}") print(f"\n whisper-cli.exe ready at: {exe_path}")
@@ -212,7 +188,6 @@ def setup_whisper_bin() -> str | None:
print("\n ERROR: Could not download whisper.cpp") print("\n ERROR: Could not download whisper.cpp")
print(" Manual install: https://github.com/ggml-org/whisper.cpp/releases") print(" Manual install: https://github.com/ggml-org/whisper.cpp/releases")
print(" Build from source with: cmake -DGGML_VULKAN=1")
return None return None