nlp-master/run.bat

@echo off
setlocal enabledelayedexpansion
cd /d "%~dp0"

:: ============================================================
:: Course + module filter argument parsing
:: Usage:
::   run.bat                    -> master, all modules (backward-compat)
::   run.bat 1-3                -> master, modules 1-3 (backward-compat)
::   run.bat practitioner       -> practitioner, all modules
::   run.bat practitioner 1-3   -> practitioner, modules 1-3
:: ============================================================
set "COURSE_KEY=master"
set "MODULE_FILTER=%~1"
if /i "%~1"=="master" (
    set "COURSE_KEY=master"
    set "MODULE_FILTER=%~2"
)
if /i "%~1"=="practitioner" (
    set "COURSE_KEY=practitioner"
    set "MODULE_FILTER=%~2"
)

echo ============================================================
echo  NLP Course Pipeline  (course: %COURSE_KEY%)
echo ============================================================
echo.

:: ============================================================
:: PREREQUISITES CHECK
:: ============================================================
echo Checking prerequisites...
echo.
set "PREREQ_OK=1"
set "NEED_WHISPER="
set "NEED_MODEL="

:: --- Python ---
:: Avoid executing python.exe directly — the Microsoft Store stub terminates cmd.exe.
:: Use 'py' launcher first (safe), then find python.exe excluding WindowsApps stub.
set "PYTHON_CMD="
where py >nul 2>&1
if not errorlevel 1 (
    set "PYTHON_CMD=py"
    for /f "tokens=2" %%v in ('py --version 2^>^&1') do echo [OK] Python         %%v (py launcher^)
)
if not defined PYTHON_CMD (
    for /f "delims=" %%p in ('where python 2^>nul ^| findstr /v /i "WindowsApps"') do (
        if not defined PYTHON_CMD (
            set "PYTHON_CMD=%%p"
            for /f "tokens=2" %%v in ('"%%p" --version 2^>^&1') do echo [OK] Python         %%v
        )
    )
)
if not defined PYTHON_CMD (
    echo [X] Python         NOT FOUND
    echo     The Microsoft Store stub does not count as a real Python install.
    echo     Install from: https://www.python.org/downloads/
    echo     Make sure to check "Add Python to PATH" during install.
    echo.
    echo     Cannot continue without Python. Install it and re-run.
    pause
    exit /b 1
)

:: --- .env credentials ---
:: Each course uses its own env var pair. Check based on selected course.
if /i "%COURSE_KEY%"=="practitioner" (
    set "ENV_USER=PRACTITIONER_USERNAME"
    set "ENV_PASS=PRACTITIONER_PASSWORD"
) else (
    set "ENV_USER=COURSE_USERNAME"
    set "ENV_PASS=COURSE_PASSWORD"
)
if exist ".env" (
    findstr /m "!ENV_USER!=." ".env" >nul 2>&1
    if errorlevel 1 (
        echo [X] .env           File exists but !ENV_USER! is empty
        echo     Edit .env and set !ENV_USER! and !ENV_PASS!.
        set "PREREQ_OK="
    ) else (
        echo [OK] .env           Credentials configured for %COURSE_KEY%
    )
) else (
    echo [X] .env           NOT FOUND
    echo     Create .env with:
    echo       !ENV_USER!=your_email
    echo       !ENV_PASS!=your_password
    set "PREREQ_OK="
)

:: --- ffmpeg ---
set "FFMPEG_FOUND="
set "NEED_FFMPEG="
where ffmpeg >nul 2>&1
if not errorlevel 1 (
    set "FFMPEG_FOUND=1"
    for /f "delims=" %%p in ('where ffmpeg 2^>nul') do set "FFMPEG_LOCATION=%%p"
    echo [OK] ffmpeg          !FFMPEG_LOCATION!
) else (
    if exist "ffmpeg.exe" (
        set "FFMPEG_FOUND=1"
        echo [OK] ffmpeg          .\ffmpeg.exe (local^)
    ) else (
        echo [--] ffmpeg          Not found - will auto-install
        set "NEED_FFMPEG=1"
    )
)

:: --- whisper-cli.exe ---
set "WHISPER_FOUND="
set "WHISPER_LOCATION="
if defined WHISPER_BIN (
    if exist "%WHISPER_BIN%" (
        set "WHISPER_FOUND=1"
        set "WHISPER_LOCATION=%WHISPER_BIN% (env var)"
    )
)
if not defined WHISPER_FOUND (
    where whisper-cli.exe >nul 2>&1
    if not errorlevel 1 (
        set "WHISPER_FOUND=1"
        for /f "delims=" %%p in ('where whisper-cli.exe 2^>nul') do set "WHISPER_LOCATION=%%p (PATH)"
    )
)
if not defined WHISPER_FOUND (
    if exist "whisper-cli.exe" (
        set "WHISPER_FOUND=1"
        set "WHISPER_BIN=whisper-cli.exe"
        set "WHISPER_LOCATION=.\whisper-cli.exe (local)"
    )
)
if not defined WHISPER_FOUND (
    if exist "whisper-bin\whisper-cli.exe" (
        set "WHISPER_FOUND=1"
        set "WHISPER_BIN=whisper-bin\whisper-cli.exe"
        set "WHISPER_LOCATION=whisper-bin\whisper-cli.exe (auto-installed)"
    )
)
if not defined WHISPER_FOUND (
    if exist "whisper.cpp\build\bin\Release\whisper-cli.exe" (
        set "WHISPER_FOUND=1"
        set "WHISPER_BIN=whisper.cpp\build\bin\Release\whisper-cli.exe"
        set "WHISPER_LOCATION=whisper.cpp\build\... (local build)"
    )
)

if defined WHISPER_FOUND (
    echo [OK] whisper-cli     !WHISPER_LOCATION!
) else (
    echo [--] whisper-cli     Not found - will auto-download
    set "NEED_WHISPER=1"
)

:: --- Whisper model ---
if not defined WHISPER_MODEL set "WHISPER_MODEL=models\ggml-medium-q5_0.bin"
if exist "%WHISPER_MODEL%" (
    for %%F in ("%WHISPER_MODEL%") do (
        set /a "MODEL_MB=%%~zF / 1048576"
    )
    echo [OK] Whisper model   %WHISPER_MODEL% (!MODEL_MB! MB^)
) else (
    echo [--] Whisper model   Not found - will auto-download (~500 MB^)
    set "NEED_MODEL=1"
)


:: --- Disk space ---
echo.
for /f "tokens=3" %%a in ('dir /-c "%~dp0." 2^>nul ^| findstr /c:"bytes free"') do (
    set /a "FREE_GB=%%a / 1073741824" 2>nul
)
if defined FREE_GB (
    if !FREE_GB! LSS 50 (
        echo [!!] Disk space      ~!FREE_GB! GB free (need ~50 GB for all audio + transcripts^)
    ) else (
        echo [OK] Disk space      ~!FREE_GB! GB free
    )
)

echo.

:: --- Stop if .env is broken (can't auto-fix that) ---
if not defined PREREQ_OK (
    echo ============================================================
    echo  MISSING PREREQUISITES - fix the [X] items above and re-run.
    echo ============================================================
    pause
    exit /b 1
)

:: ============================================================
:: AUTO-INSTALL MISSING COMPONENTS
:: ============================================================
if defined NEED_FFMPEG (
    echo ============================================================
    echo  Auto-downloading ffmpeg...
    echo ============================================================
    "!PYTHON_CMD!" setup_whisper.py ffmpeg
    if errorlevel 1 (
        echo.
        echo ERROR: Could not install ffmpeg.
        echo Download manually from: https://www.gyan.dev/ffmpeg/builds/
        echo Extract ffmpeg.exe to ffmpeg-bin\ and re-run.
        pause
        exit /b 1
    )
    if exist ".ffmpeg_bin_path" del .ffmpeg_bin_path
    echo.
)

:: Add ffmpeg-bin to PATH if it exists
if exist "ffmpeg-bin\ffmpeg.exe" (
    set "PATH=%~dp0ffmpeg-bin;%PATH%"
)

if defined NEED_WHISPER (
    echo ============================================================
    echo  Auto-downloading whisper.cpp (CPU build^)...
    echo ============================================================
    "!PYTHON_CMD!" setup_whisper.py whisper
    if errorlevel 1 (
        echo.
        echo ERROR: Failed to auto-download whisper.cpp.
        echo Download manually from: https://github.com/ggml-org/whisper.cpp/releases
        pause
        exit /b 1
    )
    :: Read the path that setup_whisper.py wrote
    if exist ".whisper_bin_path" (
        set /p WHISPER_BIN=<.whisper_bin_path
        del .whisper_bin_path
        echo Using: !WHISPER_BIN!
    )
    echo.
)

if defined NEED_MODEL (
    echo ============================================================
    echo  Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)...
    echo  This will take a few minutes depending on your connection.
    echo ============================================================
    "!PYTHON_CMD!" setup_whisper.py model
    if errorlevel 1 (
        echo.
        echo ERROR: Failed to download model.
        echo Download manually from:
        echo   https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin
        echo Save to: models\ggml-medium-q5_0.bin
        pause
        exit /b 1
    )
    echo.
)

echo All prerequisites OK!
echo.
echo ============================================================
echo  Starting pipeline...
echo ============================================================
echo.

:: ============================================================
:: STEP 1: VENV + DEPENDENCIES
:: ============================================================
if not exist ".venv\Scripts\python.exe" (
    echo [1/4] Creating Python virtual environment...
    "!PYTHON_CMD!" -m venv .venv
    if errorlevel 1 (
        echo ERROR: Failed to create venv.
        pause
        exit /b 1
    )
    echo      Done.
) else (
    echo [1/4] Virtual environment already exists.
)

echo [2/4] Installing Python dependencies...
.venv\Scripts\pip install -q -r requirements.txt
if errorlevel 1 (
    echo ERROR: Failed to install dependencies.
    pause
    exit /b 1
)
echo      Done.

:: ============================================================
:: STEP 2: DOWNLOAD
:: ============================================================
echo.
echo [3/4] Downloading audio files...
echo ============================================================
if "!MODULE_FILTER!"=="" (
    .venv\Scripts\python download.py --course %COURSE_KEY%
) else (
    echo Modules filter: !MODULE_FILTER!
    .venv\Scripts\python download.py --course %COURSE_KEY% --modules !MODULE_FILTER!
)
if errorlevel 1 (
    echo.
    echo WARNING: Some downloads failed. Check download_errors.log
    echo Continuing to transcription automatically...
)

:: ============================================================
:: STEP 3: TRANSCRIBE
:: ============================================================
echo.
echo [4/4] Transcribing with whisper.cpp...
echo ============================================================
echo Using: %WHISPER_BIN%
echo Model: %WHISPER_MODEL%
echo.

if "!MODULE_FILTER!"=="" (
    .venv\Scripts\python transcribe.py --course %COURSE_KEY%
) else (
    echo Modules filter: !MODULE_FILTER!
    .venv\Scripts\python transcribe.py --course %COURSE_KEY% --modules !MODULE_FILTER!
)
if errorlevel 1 (
    echo.
    echo WARNING: Some transcriptions failed. Check transcribe_errors.log
)

:: ============================================================
:: DONE
:: ============================================================
echo.
echo ============================================================
echo  Pipeline complete!
echo  - Audio files:   audio\
echo  - Transcripts:   transcripts\
echo  - Manifest:      manifest.json
echo.
echo  Next step: generate summaries from WSL2 with Claude Code
echo    python summarize.py
echo ============================================================