Compare commits

...

5 Commits

Author SHA1 Message Date
45e72bc89b feat: adauga --modules filter si la download.py
Parametrul din run.bat (ex: 4-5) era transmis doar la transcribe.py.
Acum download.py primeste acelasi filtru si descarca doar modulele
specificate. Sintaxa acceptata: '4-5', '1,3', '1-3,5'.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 02:10:33 +02:00
7b18e8fc41 fix(run.bat): evita Microsoft Store Python stub care termina cmd.exe
Store stub-ul pentru python.exe termina procesul batch cand e apelat
direct. Fix: foloseste 'py' launcher (safe) sau detecteaza python.exe
real via 'where | findstr /v WindowsApps', fara a executa python
in contextul check-ului. Toate apelurile python -> !PYTHON_CMD!.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 02:06:42 +02:00
e83bd74813 feat: switch to CPU-only whisper build (no GPU on this machine)
- setup_whisper.py: descarcă build CPU din release-urile oficiale,
  sare peste Vulkan/CUDA/OpenBLAS
- run.bat: elimină env var GGML_VK_PREFER_HOST_MEMORY și check-ul Vulkan SDK
- transcribe.py: --no-gpu era deja setat

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 02:01:39 +02:00
60f564c107 fix(run.bat): restore CRLF line endings, add .gitattributes
Windows batch files require CRLF — LF-only caused cmd.exe to exit
silently mid-script. .gitattributes ensures *.bat stays CRLF.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 01:55:03 +02:00
696c04c41c chore: normalize line endings from CRLF to LF across all files
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-24 01:53:35 +02:00
11 changed files with 1924 additions and 1916 deletions

7
.gitattributes vendored Normal file
View File

@@ -0,0 +1,7 @@
# Default: LF for all text files
* text=auto eol=lf
# Windows-only files must stay CRLF
*.bat text eol=crlf
*.cmd text eol=crlf
*.ps1 text eol=crlf

View File

@@ -49,6 +49,19 @@ def login(session: requests.Session, email: str, password: str) -> bool:
return True return True
def parse_module_filter(arg: str) -> set[int]:
"""Parse module filter like '1-3' or '4,5' or '1-3,5' into a set of 1-based indices."""
result = set()
for part in arg.split(","):
part = part.strip()
if "-" in part:
a, b = part.split("-", 1)
result.update(range(int(a), int(b) + 1))
else:
result.add(int(part))
return result
def discover_modules(session: requests.Session) -> list[dict]: def discover_modules(session: requests.Session) -> list[dict]:
"""Fetch course page and return list of {name, url, module_id}.""" """Fetch course page and return list of {name, url, module_id}."""
resp = session.get(COURSE_URL) resp = session.get(COURSE_URL)
@@ -157,6 +170,14 @@ def main():
log.error("Set COURSE_USERNAME and COURSE_PASSWORD in .env") log.error("Set COURSE_USERNAME and COURSE_PASSWORD in .env")
sys.exit(1) sys.exit(1)
# Parse --modules filter (e.g. "4-5" or "1,3,5")
module_filter = None
if "--modules" in sys.argv:
idx = sys.argv.index("--modules")
if idx + 1 < len(sys.argv):
module_filter = parse_module_filter(sys.argv[idx + 1])
log.info(f"Module filter: {sorted(module_filter)}")
AUDIO_DIR.mkdir(exist_ok=True) AUDIO_DIR.mkdir(exist_ok=True)
session = requests.Session() session = requests.Session()
@@ -185,7 +206,10 @@ def main():
skipped = 0 skipped = 0
failed = 0 failed = 0
for mod in modules: for mod_idx, mod in enumerate(modules, 1):
if module_filter and mod_idx not in module_filter:
log.info(f" Skipping module {mod_idx}: {mod['name']}")
continue
lectures = discover_lectures(session, mod) lectures = discover_lectures(session, mod)
module_entry = { module_entry = {
"name": mod["name"], "name": mod["name"],

54
run.bat
View File

@@ -2,8 +2,6 @@
setlocal enabledelayedexpansion setlocal enabledelayedexpansion
cd /d "%~dp0" cd /d "%~dp0"
:: Prevent Vulkan from exhausting VRAM — overflow to system RAM instead of crashing
set "GGML_VK_PREFER_HOST_MEMORY=ON"
echo ============================================================ echo ============================================================
echo NLP Master - Download + Transcribe Pipeline echo NLP Master - Download + Transcribe Pipeline
@@ -20,17 +18,31 @@ set "NEED_WHISPER="
set "NEED_MODEL=" set "NEED_MODEL="
:: --- Python --- :: --- Python ---
python --version >nul 2>&1 :: Avoid executing python.exe directly — the Microsoft Store stub terminates cmd.exe.
if errorlevel 1 ( :: Use 'py' launcher first (safe), then find python.exe excluding WindowsApps stub.
set "PYTHON_CMD="
where py >nul 2>&1
if not errorlevel 1 (
set "PYTHON_CMD=py"
for /f "tokens=2" %%v in ('py --version 2^>^&1') do echo [OK] Python %%v (py launcher^)
)
if not defined PYTHON_CMD (
for /f "delims=" %%p in ('where python 2^>nul ^| findstr /v /i "WindowsApps"') do (
if not defined PYTHON_CMD (
set "PYTHON_CMD=%%p"
for /f "tokens=2" %%v in ('"%%p" --version 2^>^&1') do echo [OK] Python %%v
)
)
)
if not defined PYTHON_CMD (
echo [X] Python NOT FOUND echo [X] Python NOT FOUND
echo The Microsoft Store stub does not count as a real Python install.
echo Install from: https://www.python.org/downloads/ echo Install from: https://www.python.org/downloads/
echo Make sure to check "Add Python to PATH" during install. echo Make sure to check "Add Python to PATH" during install.
echo. echo.
echo Cannot continue without Python. Install it and re-run. echo Cannot continue without Python. Install it and re-run.
pause pause
exit /b 1 exit /b 1
) else (
for /f "tokens=2" %%v in ('python --version 2^>^&1') do echo [OK] Python %%v
) )
:: --- .env credentials --- :: --- .env credentials ---
@@ -126,21 +138,6 @@ if exist "%WHISPER_MODEL%" (
set "NEED_MODEL=1" set "NEED_MODEL=1"
) )
:: --- Vulkan GPU support ---
set "VULKAN_FOUND="
where vulkaninfo >nul 2>&1
if not errorlevel 1 (
set "VULKAN_FOUND=1"
echo [OK] Vulkan SDK Installed
) else (
if exist "%VULKAN_SDK%\Bin\vulkaninfo.exe" (
set "VULKAN_FOUND=1"
echo [OK] Vulkan SDK %VULKAN_SDK%
) else (
echo [!!] Vulkan SDK Not detected (whisper.cpp may use CPU fallback^)
echo Install from: https://vulkan.lunarg.com/sdk/home
)
)
:: --- Disk space --- :: --- Disk space ---
echo. echo.
@@ -173,7 +170,7 @@ if defined NEED_FFMPEG (
echo ============================================================ echo ============================================================
echo Auto-downloading ffmpeg... echo Auto-downloading ffmpeg...
echo ============================================================ echo ============================================================
python setup_whisper.py ffmpeg "!PYTHON_CMD!" setup_whisper.py ffmpeg
if errorlevel 1 ( if errorlevel 1 (
echo. echo.
echo ERROR: Could not install ffmpeg. echo ERROR: Could not install ffmpeg.
@@ -193,9 +190,9 @@ if exist "ffmpeg-bin\ffmpeg.exe" (
if defined NEED_WHISPER ( if defined NEED_WHISPER (
echo ============================================================ echo ============================================================
echo Auto-downloading whisper.cpp (Vulkan build^)... echo Auto-downloading whisper.cpp (CPU build^)...
echo ============================================================ echo ============================================================
python setup_whisper.py whisper "!PYTHON_CMD!" setup_whisper.py whisper
if errorlevel 1 ( if errorlevel 1 (
echo. echo.
echo ERROR: Failed to auto-download whisper.cpp. echo ERROR: Failed to auto-download whisper.cpp.
@@ -217,7 +214,7 @@ if defined NEED_MODEL (
echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)... echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)...
echo This will take a few minutes depending on your connection. echo This will take a few minutes depending on your connection.
echo ============================================================ echo ============================================================
python setup_whisper.py model "!PYTHON_CMD!" setup_whisper.py model
if errorlevel 1 ( if errorlevel 1 (
echo. echo.
echo ERROR: Failed to download model. echo ERROR: Failed to download model.
@@ -242,7 +239,7 @@ echo.
:: ============================================================ :: ============================================================
if not exist ".venv\Scripts\python.exe" ( if not exist ".venv\Scripts\python.exe" (
echo [1/4] Creating Python virtual environment... echo [1/4] Creating Python virtual environment...
python -m venv .venv "!PYTHON_CMD!" -m venv .venv
if errorlevel 1 ( if errorlevel 1 (
echo ERROR: Failed to create venv. echo ERROR: Failed to create venv.
pause pause
@@ -268,7 +265,12 @@ echo Done.
echo. echo.
echo [3/4] Downloading audio files... echo [3/4] Downloading audio files...
echo ============================================================ echo ============================================================
if "%~1"=="" (
.venv\Scripts\python download.py .venv\Scripts\python download.py
) else (
echo Modules filter: %~1
.venv\Scripts\python download.py --modules %~1
)
if errorlevel 1 ( if errorlevel 1 (
echo. echo.
echo WARNING: Some downloads failed. Check download_errors.log echo WARNING: Some downloads failed. Check download_errors.log

View File

@@ -1,5 +1,5 @@
""" """
Auto-download and setup whisper.cpp (Vulkan) + model for Windows. Auto-download and setup whisper.cpp (CPU build) + model for Windows.
Called by run.bat when prerequisites are missing. Called by run.bat when prerequisites are missing.
""" """
@@ -142,8 +142,7 @@ def try_official_vulkan_build() -> str | None:
tag = release.get("tag_name", "unknown") tag = release.get("tag_name", "unknown")
print(f" Official release: {tag}") print(f" Official release: {tag}")
# Priority: vulkan > noavx (cpu-only, no CUDA deps) > skip CUDA entirely # Priority: CPU build (no GPU deps needed)
vulkan_asset = None
cpu_asset = None cpu_asset = None
for asset in release.get("assets", []): for asset in release.get("assets", []):
name = asset["name"].lower() name = asset["name"].lower()
@@ -152,28 +151,22 @@ def try_official_vulkan_build() -> str | None:
# Must be Windows # Must be Windows
if "win" not in name and "x64" not in name: if "win" not in name and "x64" not in name:
continue continue
# Absolutely skip CUDA builds - they won't work on AMD # Skip GPU builds entirely
if "cuda" in name: if "cuda" in name or "vulkan" in name or "openblas" in name:
continue continue
if "vulkan" in name: if "noavx" not in name:
vulkan_asset = asset
break
if "noavx" not in name and "openblas" not in name:
cpu_asset = asset cpu_asset = asset
break
chosen = vulkan_asset or cpu_asset if not cpu_asset:
if not chosen: print(" No CPU build found in official releases")
print(" No Vulkan or CPU-only build found in official releases")
print(" Available assets:") print(" Available assets:")
for asset in release.get("assets", []): for asset in release.get("assets", []):
print(f" - {asset['name']}") print(f" - {asset['name']}")
return None return None
if vulkan_asset: print(f" Found CPU build: {cpu_asset['name']}")
print(f" Found official Vulkan build: {chosen['name']}") chosen = cpu_asset
else:
print(f" No Vulkan build in official release, using CPU build: {chosen['name']}")
print(f" (Will work but without GPU acceleration)")
zip_path = Path(chosen["name"]) zip_path = Path(chosen["name"])
download_file(chosen["browser_download_url"], zip_path, chosen["name"]) download_file(chosen["browser_download_url"], zip_path, chosen["name"])
@@ -182,29 +175,12 @@ def try_official_vulkan_build() -> str | None:
def setup_whisper_bin() -> str | None: def setup_whisper_bin() -> str | None:
"""Download whisper.cpp Vulkan release. Returns path to whisper-cli.exe.""" """Download whisper.cpp CPU release. Returns path to whisper-cli.exe."""
whisper_exe = WHISPER_DIR / "whisper-cli.exe" whisper_exe = WHISPER_DIR / "whisper-cli.exe"
if whisper_exe.exists(): if whisper_exe.exists():
# Check if it's a CUDA build (has CUDA DLLs but no Vulkan DLL)
has_cuda = (WHISPER_DIR / "ggml-cuda.dll").exists()
has_vulkan = (WHISPER_DIR / "ggml-vulkan.dll").exists()
if has_cuda and not has_vulkan:
print(f" WARNING: Existing install is a CUDA build (won't work on AMD GPU)")
print(f" Removing and re-downloading Vulkan build...")
import shutil
shutil.rmtree(WHISPER_DIR)
else:
print(f" whisper-cli.exe already exists at {whisper_exe}") print(f" whisper-cli.exe already exists at {whisper_exe}")
return str(whisper_exe) return str(whisper_exe)
# Strategy: try community Vulkan build first (reliable for AMD),
# then fall back to official release
exe_path = try_community_vulkan_build()
if exe_path:
print(f"\n whisper-cli.exe ready at: {exe_path} (Vulkan)")
return exe_path
print("\n Community build failed, trying official release...")
exe_path = try_official_vulkan_build() exe_path = try_official_vulkan_build()
if exe_path: if exe_path:
print(f"\n whisper-cli.exe ready at: {exe_path}") print(f"\n whisper-cli.exe ready at: {exe_path}")
@@ -212,7 +188,6 @@ def setup_whisper_bin() -> str | None:
print("\n ERROR: Could not download whisper.cpp") print("\n ERROR: Could not download whisper.cpp")
print(" Manual install: https://github.com/ggml-org/whisper.cpp/releases") print(" Manual install: https://github.com/ggml-org/whisper.cpp/releases")
print(" Build from source with: cmake -DGGML_VULKAN=1")
return None return None