@echo off setlocal enabledelayedexpansion cd /d "%~dp0" echo ============================================================ echo NLP Master - Download + Transcribe Pipeline echo ============================================================ echo. :: ============================================================ :: PREREQUISITES CHECK :: ============================================================ echo Checking prerequisites... echo. set "PREREQ_OK=1" set "NEED_WHISPER=" set "NEED_MODEL=" :: --- Python --- :: Avoid executing python.exe directly — the Microsoft Store stub terminates cmd.exe. :: Use 'py' launcher first (safe), then find python.exe excluding WindowsApps stub. set "PYTHON_CMD=" where py >nul 2>&1 if not errorlevel 1 ( set "PYTHON_CMD=py" for /f "tokens=2" %%v in ('py --version 2^>^&1') do echo [OK] Python %%v (py launcher^) ) if not defined PYTHON_CMD ( for /f "delims=" %%p in ('where python 2^>nul ^| findstr /v /i "WindowsApps"') do ( if not defined PYTHON_CMD ( set "PYTHON_CMD=%%p" for /f "tokens=2" %%v in ('"%%p" --version 2^>^&1') do echo [OK] Python %%v ) ) ) if not defined PYTHON_CMD ( echo [X] Python NOT FOUND echo The Microsoft Store stub does not count as a real Python install. echo Install from: https://www.python.org/downloads/ echo Make sure to check "Add Python to PATH" during install. echo. echo Cannot continue without Python. Install it and re-run. pause exit /b 1 ) :: --- .env credentials --- if exist ".env" ( findstr /m "COURSE_USERNAME=." ".env" >nul 2>&1 if errorlevel 1 ( echo [X] .env File exists but COURSE_USERNAME is empty echo Edit .env and fill in your credentials. set "PREREQ_OK=" ) else ( echo [OK] .env Credentials configured ) ) else ( echo [X] .env NOT FOUND echo Create .env with: echo COURSE_USERNAME=your_email echo COURSE_PASSWORD=your_password set "PREREQ_OK=" ) :: --- ffmpeg --- set "FFMPEG_FOUND=" set "NEED_FFMPEG=" where ffmpeg >nul 2>&1 if not errorlevel 1 ( set "FFMPEG_FOUND=1" for /f "delims=" %%p in ('where ffmpeg 2^>nul') do set "FFMPEG_LOCATION=%%p" echo [OK] ffmpeg !FFMPEG_LOCATION! ) else ( if exist "ffmpeg.exe" ( set "FFMPEG_FOUND=1" echo [OK] ffmpeg .\ffmpeg.exe (local^) ) else ( echo [--] ffmpeg Not found - will auto-install set "NEED_FFMPEG=1" ) ) :: --- whisper-cli.exe --- set "WHISPER_FOUND=" set "WHISPER_LOCATION=" if defined WHISPER_BIN ( if exist "%WHISPER_BIN%" ( set "WHISPER_FOUND=1" set "WHISPER_LOCATION=%WHISPER_BIN% (env var)" ) ) if not defined WHISPER_FOUND ( where whisper-cli.exe >nul 2>&1 if not errorlevel 1 ( set "WHISPER_FOUND=1" for /f "delims=" %%p in ('where whisper-cli.exe 2^>nul') do set "WHISPER_LOCATION=%%p (PATH)" ) ) if not defined WHISPER_FOUND ( if exist "whisper-cli.exe" ( set "WHISPER_FOUND=1" set "WHISPER_BIN=whisper-cli.exe" set "WHISPER_LOCATION=.\whisper-cli.exe (local)" ) ) if not defined WHISPER_FOUND ( if exist "whisper-bin\whisper-cli.exe" ( set "WHISPER_FOUND=1" set "WHISPER_BIN=whisper-bin\whisper-cli.exe" set "WHISPER_LOCATION=whisper-bin\whisper-cli.exe (auto-installed)" ) ) if not defined WHISPER_FOUND ( if exist "whisper.cpp\build\bin\Release\whisper-cli.exe" ( set "WHISPER_FOUND=1" set "WHISPER_BIN=whisper.cpp\build\bin\Release\whisper-cli.exe" set "WHISPER_LOCATION=whisper.cpp\build\... (local build)" ) ) if defined WHISPER_FOUND ( echo [OK] whisper-cli !WHISPER_LOCATION! ) else ( echo [--] whisper-cli Not found - will auto-download set "NEED_WHISPER=1" ) :: --- Whisper model --- if not defined WHISPER_MODEL set "WHISPER_MODEL=models\ggml-medium-q5_0.bin" if exist "%WHISPER_MODEL%" ( for %%F in ("%WHISPER_MODEL%") do ( set /a "MODEL_MB=%%~zF / 1048576" ) echo [OK] Whisper model %WHISPER_MODEL% (!MODEL_MB! MB^) ) else ( echo [--] Whisper model Not found - will auto-download (~500 MB^) set "NEED_MODEL=1" ) :: --- Disk space --- echo. for /f "tokens=3" %%a in ('dir /-c "%~dp0." 2^>nul ^| findstr /c:"bytes free"') do ( set /a "FREE_GB=%%a / 1073741824" 2>nul ) if defined FREE_GB ( if !FREE_GB! LSS 50 ( echo [!!] Disk space ~!FREE_GB! GB free (need ~50 GB for all audio + transcripts^) ) else ( echo [OK] Disk space ~!FREE_GB! GB free ) ) echo. :: --- Stop if .env is broken (can't auto-fix that) --- if not defined PREREQ_OK ( echo ============================================================ echo MISSING PREREQUISITES - fix the [X] items above and re-run. echo ============================================================ pause exit /b 1 ) :: ============================================================ :: AUTO-INSTALL MISSING COMPONENTS :: ============================================================ if defined NEED_FFMPEG ( echo ============================================================ echo Auto-downloading ffmpeg... echo ============================================================ "!PYTHON_CMD!" setup_whisper.py ffmpeg if errorlevel 1 ( echo. echo ERROR: Could not install ffmpeg. echo Download manually from: https://www.gyan.dev/ffmpeg/builds/ echo Extract ffmpeg.exe to ffmpeg-bin\ and re-run. pause exit /b 1 ) if exist ".ffmpeg_bin_path" del .ffmpeg_bin_path echo. ) :: Add ffmpeg-bin to PATH if it exists if exist "ffmpeg-bin\ffmpeg.exe" ( set "PATH=%~dp0ffmpeg-bin;%PATH%" ) if defined NEED_WHISPER ( echo ============================================================ echo Auto-downloading whisper.cpp (CPU build^)... echo ============================================================ "!PYTHON_CMD!" setup_whisper.py whisper if errorlevel 1 ( echo. echo ERROR: Failed to auto-download whisper.cpp. echo Download manually from: https://github.com/ggml-org/whisper.cpp/releases pause exit /b 1 ) :: Read the path that setup_whisper.py wrote if exist ".whisper_bin_path" ( set /p WHISPER_BIN=<.whisper_bin_path del .whisper_bin_path echo Using: !WHISPER_BIN! ) echo. ) if defined NEED_MODEL ( echo ============================================================ echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)... echo This will take a few minutes depending on your connection. echo ============================================================ "!PYTHON_CMD!" setup_whisper.py model if errorlevel 1 ( echo. echo ERROR: Failed to download model. echo Download manually from: echo https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin echo Save to: models\ggml-medium-q5_0.bin pause exit /b 1 ) echo. ) echo All prerequisites OK! echo. echo ============================================================ echo Starting pipeline... echo ============================================================ echo. :: ============================================================ :: STEP 1: VENV + DEPENDENCIES :: ============================================================ if not exist ".venv\Scripts\python.exe" ( echo [1/4] Creating Python virtual environment... "!PYTHON_CMD!" -m venv .venv if errorlevel 1 ( echo ERROR: Failed to create venv. pause exit /b 1 ) echo Done. ) else ( echo [1/4] Virtual environment already exists. ) echo [2/4] Installing Python dependencies... .venv\Scripts\pip install -q -r requirements.txt if errorlevel 1 ( echo ERROR: Failed to install dependencies. pause exit /b 1 ) echo Done. :: ============================================================ :: STEP 2: DOWNLOAD :: ============================================================ echo. echo [3/4] Downloading audio files... echo ============================================================ if "%~1"=="" ( .venv\Scripts\python download.py ) else ( echo Modules filter: %~1 .venv\Scripts\python download.py --modules %~1 ) if errorlevel 1 ( echo. echo WARNING: Some downloads failed. Check download_errors.log echo Continuing to transcription automatically... ) :: ============================================================ :: STEP 3: TRANSCRIBE :: ============================================================ echo. echo [4/4] Transcribing with whisper.cpp... echo ============================================================ echo Using: %WHISPER_BIN% echo Model: %WHISPER_MODEL% echo. if "%~1"=="" ( .venv\Scripts\python transcribe.py ) else ( echo Modules filter: %~1 .venv\Scripts\python transcribe.py --modules %~1 ) if errorlevel 1 ( echo. echo WARNING: Some transcriptions failed. Check transcribe_errors.log ) :: ============================================================ :: DONE :: ============================================================ echo. echo ============================================================ echo Pipeline complete! echo - Audio files: audio\ echo - Transcripts: transcripts\ echo - Manifest: manifest.json echo. echo Next step: generate summaries from WSL2 with Claude Code echo python summarize.py echo ============================================================