Un singur set de scripturi acum rulează pe orice curs configurat în courses.py. Master rămâne la rădăcina repo (backward-compat M1-M6); cursuri noi (ex. practitioner la shop.cursnlp.ro) primesc un root dedicat (nlp-practitioner/) cu propriile artefacte. - courses.py: config dict (master, practitioner) + course_paths() + validate_manifest_course() (manifest fără course_key = master). - download.py: --course + --modules; trei tipuri de lecții (audio HTTP, Vimeo iframe via yt-dlp audio-only, text-only cu captură HTML); merge cu manifest existent în loc de replace; strip [Audio] pentru backward-compat paths. - transcribe.py: --course + --modules; skip type==text; path-uri prin course_paths(); validare course_key. - summarize.py: --course + --compile; template prompt folosește course['name']; scrie SUPORT_CURS.md cu LF explicit (WSL2 baseline). - md_to_pdf.py: --course resolv-ă summaries_dir / pdf_dir per curs. - run.bat: detectează master|practitioner ca primul argument, propagă --course la sub-scripturi; backward-compat run.bat [modules]. - requirements.txt: + yt-dlp. - .gitignore: nlp-practitioner/audio/, audio_wav/, scratch_recon.py, tmp_recon/. - tests/test_regression.sh: 5 gate-uri read-only (import, schema, disk-coherence, SUPORT_CURS byte-identic, cross-course isolation). Regression curs master: PASS (manifest + SUPORT_CURS.md hash identic cu baseline /tmp/suport_before.md). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
340 lines
11 KiB
Batchfile
340 lines
11 KiB
Batchfile
@echo off
|
|
setlocal enabledelayedexpansion
|
|
cd /d "%~dp0"
|
|
|
|
:: ============================================================
|
|
:: Course + module filter argument parsing
|
|
:: Usage:
|
|
:: run.bat -> master, all modules (backward-compat)
|
|
:: run.bat 1-3 -> master, modules 1-3 (backward-compat)
|
|
:: run.bat practitioner -> practitioner, all modules
|
|
:: run.bat practitioner 1-3 -> practitioner, modules 1-3
|
|
:: ============================================================
|
|
set "COURSE_KEY=master"
|
|
set "MODULE_FILTER=%~1"
|
|
if /i "%~1"=="master" (
|
|
set "COURSE_KEY=master"
|
|
set "MODULE_FILTER=%~2"
|
|
)
|
|
if /i "%~1"=="practitioner" (
|
|
set "COURSE_KEY=practitioner"
|
|
set "MODULE_FILTER=%~2"
|
|
)
|
|
|
|
echo ============================================================
|
|
echo NLP Course Pipeline (course: %COURSE_KEY%)
|
|
echo ============================================================
|
|
echo.
|
|
|
|
:: ============================================================
|
|
:: PREREQUISITES CHECK
|
|
:: ============================================================
|
|
echo Checking prerequisites...
|
|
echo.
|
|
set "PREREQ_OK=1"
|
|
set "NEED_WHISPER="
|
|
set "NEED_MODEL="
|
|
|
|
:: --- Python ---
|
|
:: Avoid executing python.exe directly — the Microsoft Store stub terminates cmd.exe.
|
|
:: Use 'py' launcher first (safe), then find python.exe excluding WindowsApps stub.
|
|
set "PYTHON_CMD="
|
|
where py >nul 2>&1
|
|
if not errorlevel 1 (
|
|
set "PYTHON_CMD=py"
|
|
for /f "tokens=2" %%v in ('py --version 2^>^&1') do echo [OK] Python %%v (py launcher^)
|
|
)
|
|
if not defined PYTHON_CMD (
|
|
for /f "delims=" %%p in ('where python 2^>nul ^| findstr /v /i "WindowsApps"') do (
|
|
if not defined PYTHON_CMD (
|
|
set "PYTHON_CMD=%%p"
|
|
for /f "tokens=2" %%v in ('"%%p" --version 2^>^&1') do echo [OK] Python %%v
|
|
)
|
|
)
|
|
)
|
|
if not defined PYTHON_CMD (
|
|
echo [X] Python NOT FOUND
|
|
echo The Microsoft Store stub does not count as a real Python install.
|
|
echo Install from: https://www.python.org/downloads/
|
|
echo Make sure to check "Add Python to PATH" during install.
|
|
echo.
|
|
echo Cannot continue without Python. Install it and re-run.
|
|
pause
|
|
exit /b 1
|
|
)
|
|
|
|
:: --- .env credentials ---
|
|
:: Each course uses its own env var pair. Check based on selected course.
|
|
if /i "%COURSE_KEY%"=="practitioner" (
|
|
set "ENV_USER=PRACTITIONER_USERNAME"
|
|
set "ENV_PASS=PRACTITIONER_PASSWORD"
|
|
) else (
|
|
set "ENV_USER=COURSE_USERNAME"
|
|
set "ENV_PASS=COURSE_PASSWORD"
|
|
)
|
|
if exist ".env" (
|
|
findstr /m "!ENV_USER!=." ".env" >nul 2>&1
|
|
if errorlevel 1 (
|
|
echo [X] .env File exists but !ENV_USER! is empty
|
|
echo Edit .env and set !ENV_USER! and !ENV_PASS!.
|
|
set "PREREQ_OK="
|
|
) else (
|
|
echo [OK] .env Credentials configured for %COURSE_KEY%
|
|
)
|
|
) else (
|
|
echo [X] .env NOT FOUND
|
|
echo Create .env with:
|
|
echo !ENV_USER!=your_email
|
|
echo !ENV_PASS!=your_password
|
|
set "PREREQ_OK="
|
|
)
|
|
|
|
:: --- ffmpeg ---
|
|
set "FFMPEG_FOUND="
|
|
set "NEED_FFMPEG="
|
|
where ffmpeg >nul 2>&1
|
|
if not errorlevel 1 (
|
|
set "FFMPEG_FOUND=1"
|
|
for /f "delims=" %%p in ('where ffmpeg 2^>nul') do set "FFMPEG_LOCATION=%%p"
|
|
echo [OK] ffmpeg !FFMPEG_LOCATION!
|
|
) else (
|
|
if exist "ffmpeg.exe" (
|
|
set "FFMPEG_FOUND=1"
|
|
echo [OK] ffmpeg .\ffmpeg.exe (local^)
|
|
) else (
|
|
echo [--] ffmpeg Not found - will auto-install
|
|
set "NEED_FFMPEG=1"
|
|
)
|
|
)
|
|
|
|
:: --- whisper-cli.exe ---
|
|
set "WHISPER_FOUND="
|
|
set "WHISPER_LOCATION="
|
|
if defined WHISPER_BIN (
|
|
if exist "%WHISPER_BIN%" (
|
|
set "WHISPER_FOUND=1"
|
|
set "WHISPER_LOCATION=%WHISPER_BIN% (env var)"
|
|
)
|
|
)
|
|
if not defined WHISPER_FOUND (
|
|
where whisper-cli.exe >nul 2>&1
|
|
if not errorlevel 1 (
|
|
set "WHISPER_FOUND=1"
|
|
for /f "delims=" %%p in ('where whisper-cli.exe 2^>nul') do set "WHISPER_LOCATION=%%p (PATH)"
|
|
)
|
|
)
|
|
if not defined WHISPER_FOUND (
|
|
if exist "whisper-cli.exe" (
|
|
set "WHISPER_FOUND=1"
|
|
set "WHISPER_BIN=whisper-cli.exe"
|
|
set "WHISPER_LOCATION=.\whisper-cli.exe (local)"
|
|
)
|
|
)
|
|
if not defined WHISPER_FOUND (
|
|
if exist "whisper-bin\whisper-cli.exe" (
|
|
set "WHISPER_FOUND=1"
|
|
set "WHISPER_BIN=whisper-bin\whisper-cli.exe"
|
|
set "WHISPER_LOCATION=whisper-bin\whisper-cli.exe (auto-installed)"
|
|
)
|
|
)
|
|
if not defined WHISPER_FOUND (
|
|
if exist "whisper.cpp\build\bin\Release\whisper-cli.exe" (
|
|
set "WHISPER_FOUND=1"
|
|
set "WHISPER_BIN=whisper.cpp\build\bin\Release\whisper-cli.exe"
|
|
set "WHISPER_LOCATION=whisper.cpp\build\... (local build)"
|
|
)
|
|
)
|
|
|
|
if defined WHISPER_FOUND (
|
|
echo [OK] whisper-cli !WHISPER_LOCATION!
|
|
) else (
|
|
echo [--] whisper-cli Not found - will auto-download
|
|
set "NEED_WHISPER=1"
|
|
)
|
|
|
|
:: --- Whisper model ---
|
|
if not defined WHISPER_MODEL set "WHISPER_MODEL=models\ggml-medium-q5_0.bin"
|
|
if exist "%WHISPER_MODEL%" (
|
|
for %%F in ("%WHISPER_MODEL%") do (
|
|
set /a "MODEL_MB=%%~zF / 1048576"
|
|
)
|
|
echo [OK] Whisper model %WHISPER_MODEL% (!MODEL_MB! MB^)
|
|
) else (
|
|
echo [--] Whisper model Not found - will auto-download (~500 MB^)
|
|
set "NEED_MODEL=1"
|
|
)
|
|
|
|
|
|
:: --- Disk space ---
|
|
echo.
|
|
for /f "tokens=3" %%a in ('dir /-c "%~dp0." 2^>nul ^| findstr /c:"bytes free"') do (
|
|
set /a "FREE_GB=%%a / 1073741824" 2>nul
|
|
)
|
|
if defined FREE_GB (
|
|
if !FREE_GB! LSS 50 (
|
|
echo [!!] Disk space ~!FREE_GB! GB free (need ~50 GB for all audio + transcripts^)
|
|
) else (
|
|
echo [OK] Disk space ~!FREE_GB! GB free
|
|
)
|
|
)
|
|
|
|
echo.
|
|
|
|
:: --- Stop if .env is broken (can't auto-fix that) ---
|
|
if not defined PREREQ_OK (
|
|
echo ============================================================
|
|
echo MISSING PREREQUISITES - fix the [X] items above and re-run.
|
|
echo ============================================================
|
|
pause
|
|
exit /b 1
|
|
)
|
|
|
|
:: ============================================================
|
|
:: AUTO-INSTALL MISSING COMPONENTS
|
|
:: ============================================================
|
|
if defined NEED_FFMPEG (
|
|
echo ============================================================
|
|
echo Auto-downloading ffmpeg...
|
|
echo ============================================================
|
|
"!PYTHON_CMD!" setup_whisper.py ffmpeg
|
|
if errorlevel 1 (
|
|
echo.
|
|
echo ERROR: Could not install ffmpeg.
|
|
echo Download manually from: https://www.gyan.dev/ffmpeg/builds/
|
|
echo Extract ffmpeg.exe to ffmpeg-bin\ and re-run.
|
|
pause
|
|
exit /b 1
|
|
)
|
|
if exist ".ffmpeg_bin_path" del .ffmpeg_bin_path
|
|
echo.
|
|
)
|
|
|
|
:: Add ffmpeg-bin to PATH if it exists
|
|
if exist "ffmpeg-bin\ffmpeg.exe" (
|
|
set "PATH=%~dp0ffmpeg-bin;%PATH%"
|
|
)
|
|
|
|
if defined NEED_WHISPER (
|
|
echo ============================================================
|
|
echo Auto-downloading whisper.cpp (CPU build^)...
|
|
echo ============================================================
|
|
"!PYTHON_CMD!" setup_whisper.py whisper
|
|
if errorlevel 1 (
|
|
echo.
|
|
echo ERROR: Failed to auto-download whisper.cpp.
|
|
echo Download manually from: https://github.com/ggml-org/whisper.cpp/releases
|
|
pause
|
|
exit /b 1
|
|
)
|
|
:: Read the path that setup_whisper.py wrote
|
|
if exist ".whisper_bin_path" (
|
|
set /p WHISPER_BIN=<.whisper_bin_path
|
|
del .whisper_bin_path
|
|
echo Using: !WHISPER_BIN!
|
|
)
|
|
echo.
|
|
)
|
|
|
|
if defined NEED_MODEL (
|
|
echo ============================================================
|
|
echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)...
|
|
echo This will take a few minutes depending on your connection.
|
|
echo ============================================================
|
|
"!PYTHON_CMD!" setup_whisper.py model
|
|
if errorlevel 1 (
|
|
echo.
|
|
echo ERROR: Failed to download model.
|
|
echo Download manually from:
|
|
echo https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin
|
|
echo Save to: models\ggml-medium-q5_0.bin
|
|
pause
|
|
exit /b 1
|
|
)
|
|
echo.
|
|
)
|
|
|
|
echo All prerequisites OK!
|
|
echo.
|
|
echo ============================================================
|
|
echo Starting pipeline...
|
|
echo ============================================================
|
|
echo.
|
|
|
|
:: ============================================================
|
|
:: STEP 1: VENV + DEPENDENCIES
|
|
:: ============================================================
|
|
if not exist ".venv\Scripts\python.exe" (
|
|
echo [1/4] Creating Python virtual environment...
|
|
"!PYTHON_CMD!" -m venv .venv
|
|
if errorlevel 1 (
|
|
echo ERROR: Failed to create venv.
|
|
pause
|
|
exit /b 1
|
|
)
|
|
echo Done.
|
|
) else (
|
|
echo [1/4] Virtual environment already exists.
|
|
)
|
|
|
|
echo [2/4] Installing Python dependencies...
|
|
.venv\Scripts\pip install -q -r requirements.txt
|
|
if errorlevel 1 (
|
|
echo ERROR: Failed to install dependencies.
|
|
pause
|
|
exit /b 1
|
|
)
|
|
echo Done.
|
|
|
|
:: ============================================================
|
|
:: STEP 2: DOWNLOAD
|
|
:: ============================================================
|
|
echo.
|
|
echo [3/4] Downloading audio files...
|
|
echo ============================================================
|
|
if "!MODULE_FILTER!"=="" (
|
|
.venv\Scripts\python download.py --course %COURSE_KEY%
|
|
) else (
|
|
echo Modules filter: !MODULE_FILTER!
|
|
.venv\Scripts\python download.py --course %COURSE_KEY% --modules !MODULE_FILTER!
|
|
)
|
|
if errorlevel 1 (
|
|
echo.
|
|
echo WARNING: Some downloads failed. Check download_errors.log
|
|
echo Continuing to transcription automatically...
|
|
)
|
|
|
|
:: ============================================================
|
|
:: STEP 3: TRANSCRIBE
|
|
:: ============================================================
|
|
echo.
|
|
echo [4/4] Transcribing with whisper.cpp...
|
|
echo ============================================================
|
|
echo Using: %WHISPER_BIN%
|
|
echo Model: %WHISPER_MODEL%
|
|
echo.
|
|
|
|
if "!MODULE_FILTER!"=="" (
|
|
.venv\Scripts\python transcribe.py --course %COURSE_KEY%
|
|
) else (
|
|
echo Modules filter: !MODULE_FILTER!
|
|
.venv\Scripts\python transcribe.py --course %COURSE_KEY% --modules !MODULE_FILTER!
|
|
)
|
|
if errorlevel 1 (
|
|
echo.
|
|
echo WARNING: Some transcriptions failed. Check transcribe_errors.log
|
|
)
|
|
|
|
:: ============================================================
|
|
:: DONE
|
|
:: ============================================================
|
|
echo.
|
|
echo ============================================================
|
|
echo Pipeline complete!
|
|
echo - Audio files: audio\
|
|
echo - Transcripts: transcripts\
|
|
echo - Manifest: manifest.json
|
|
echo.
|
|
echo Next step: generate summaries from WSL2 with Claude Code
|
|
echo python summarize.py
|
|
echo ============================================================
|