Compare commits
5 Commits
bbc5884545
...
45e72bc89b
| Author | SHA1 | Date | |
|---|---|---|---|
| 45e72bc89b | |||
| 7b18e8fc41 | |||
| e83bd74813 | |||
| 60f564c107 | |||
| 696c04c41c |
7
.gitattributes
vendored
Normal file
7
.gitattributes
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# Default: LF for all text files
|
||||||
|
* text=auto eol=lf
|
||||||
|
|
||||||
|
# Windows-only files must stay CRLF
|
||||||
|
*.bat text eol=crlf
|
||||||
|
*.cmd text eol=crlf
|
||||||
|
*.ps1 text eol=crlf
|
||||||
26
download.py
26
download.py
@@ -49,6 +49,19 @@ def login(session: requests.Session, email: str, password: str) -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def parse_module_filter(arg: str) -> set[int]:
|
||||||
|
"""Parse module filter like '1-3' or '4,5' or '1-3,5' into a set of 1-based indices."""
|
||||||
|
result = set()
|
||||||
|
for part in arg.split(","):
|
||||||
|
part = part.strip()
|
||||||
|
if "-" in part:
|
||||||
|
a, b = part.split("-", 1)
|
||||||
|
result.update(range(int(a), int(b) + 1))
|
||||||
|
else:
|
||||||
|
result.add(int(part))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def discover_modules(session: requests.Session) -> list[dict]:
|
def discover_modules(session: requests.Session) -> list[dict]:
|
||||||
"""Fetch course page and return list of {name, url, module_id}."""
|
"""Fetch course page and return list of {name, url, module_id}."""
|
||||||
resp = session.get(COURSE_URL)
|
resp = session.get(COURSE_URL)
|
||||||
@@ -157,6 +170,14 @@ def main():
|
|||||||
log.error("Set COURSE_USERNAME and COURSE_PASSWORD in .env")
|
log.error("Set COURSE_USERNAME and COURSE_PASSWORD in .env")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Parse --modules filter (e.g. "4-5" or "1,3,5")
|
||||||
|
module_filter = None
|
||||||
|
if "--modules" in sys.argv:
|
||||||
|
idx = sys.argv.index("--modules")
|
||||||
|
if idx + 1 < len(sys.argv):
|
||||||
|
module_filter = parse_module_filter(sys.argv[idx + 1])
|
||||||
|
log.info(f"Module filter: {sorted(module_filter)}")
|
||||||
|
|
||||||
AUDIO_DIR.mkdir(exist_ok=True)
|
AUDIO_DIR.mkdir(exist_ok=True)
|
||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
@@ -185,7 +206,10 @@ def main():
|
|||||||
skipped = 0
|
skipped = 0
|
||||||
failed = 0
|
failed = 0
|
||||||
|
|
||||||
for mod in modules:
|
for mod_idx, mod in enumerate(modules, 1):
|
||||||
|
if module_filter and mod_idx not in module_filter:
|
||||||
|
log.info(f" Skipping module {mod_idx}: {mod['name']}")
|
||||||
|
continue
|
||||||
lectures = discover_lectures(session, mod)
|
lectures = discover_lectures(session, mod)
|
||||||
module_entry = {
|
module_entry = {
|
||||||
"name": mod["name"],
|
"name": mod["name"],
|
||||||
|
|||||||
56
run.bat
56
run.bat
@@ -2,8 +2,6 @@
|
|||||||
setlocal enabledelayedexpansion
|
setlocal enabledelayedexpansion
|
||||||
cd /d "%~dp0"
|
cd /d "%~dp0"
|
||||||
|
|
||||||
:: Prevent Vulkan from exhausting VRAM — overflow to system RAM instead of crashing
|
|
||||||
set "GGML_VK_PREFER_HOST_MEMORY=ON"
|
|
||||||
|
|
||||||
echo ============================================================
|
echo ============================================================
|
||||||
echo NLP Master - Download + Transcribe Pipeline
|
echo NLP Master - Download + Transcribe Pipeline
|
||||||
@@ -20,17 +18,31 @@ set "NEED_WHISPER="
|
|||||||
set "NEED_MODEL="
|
set "NEED_MODEL="
|
||||||
|
|
||||||
:: --- Python ---
|
:: --- Python ---
|
||||||
python --version >nul 2>&1
|
:: Avoid executing python.exe directly — the Microsoft Store stub terminates cmd.exe.
|
||||||
if errorlevel 1 (
|
:: Use 'py' launcher first (safe), then find python.exe excluding WindowsApps stub.
|
||||||
|
set "PYTHON_CMD="
|
||||||
|
where py >nul 2>&1
|
||||||
|
if not errorlevel 1 (
|
||||||
|
set "PYTHON_CMD=py"
|
||||||
|
for /f "tokens=2" %%v in ('py --version 2^>^&1') do echo [OK] Python %%v (py launcher^)
|
||||||
|
)
|
||||||
|
if not defined PYTHON_CMD (
|
||||||
|
for /f "delims=" %%p in ('where python 2^>nul ^| findstr /v /i "WindowsApps"') do (
|
||||||
|
if not defined PYTHON_CMD (
|
||||||
|
set "PYTHON_CMD=%%p"
|
||||||
|
for /f "tokens=2" %%v in ('"%%p" --version 2^>^&1') do echo [OK] Python %%v
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if not defined PYTHON_CMD (
|
||||||
echo [X] Python NOT FOUND
|
echo [X] Python NOT FOUND
|
||||||
|
echo The Microsoft Store stub does not count as a real Python install.
|
||||||
echo Install from: https://www.python.org/downloads/
|
echo Install from: https://www.python.org/downloads/
|
||||||
echo Make sure to check "Add Python to PATH" during install.
|
echo Make sure to check "Add Python to PATH" during install.
|
||||||
echo.
|
echo.
|
||||||
echo Cannot continue without Python. Install it and re-run.
|
echo Cannot continue without Python. Install it and re-run.
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
) else (
|
|
||||||
for /f "tokens=2" %%v in ('python --version 2^>^&1') do echo [OK] Python %%v
|
|
||||||
)
|
)
|
||||||
|
|
||||||
:: --- .env credentials ---
|
:: --- .env credentials ---
|
||||||
@@ -126,21 +138,6 @@ if exist "%WHISPER_MODEL%" (
|
|||||||
set "NEED_MODEL=1"
|
set "NEED_MODEL=1"
|
||||||
)
|
)
|
||||||
|
|
||||||
:: --- Vulkan GPU support ---
|
|
||||||
set "VULKAN_FOUND="
|
|
||||||
where vulkaninfo >nul 2>&1
|
|
||||||
if not errorlevel 1 (
|
|
||||||
set "VULKAN_FOUND=1"
|
|
||||||
echo [OK] Vulkan SDK Installed
|
|
||||||
) else (
|
|
||||||
if exist "%VULKAN_SDK%\Bin\vulkaninfo.exe" (
|
|
||||||
set "VULKAN_FOUND=1"
|
|
||||||
echo [OK] Vulkan SDK %VULKAN_SDK%
|
|
||||||
) else (
|
|
||||||
echo [!!] Vulkan SDK Not detected (whisper.cpp may use CPU fallback^)
|
|
||||||
echo Install from: https://vulkan.lunarg.com/sdk/home
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
:: --- Disk space ---
|
:: --- Disk space ---
|
||||||
echo.
|
echo.
|
||||||
@@ -173,7 +170,7 @@ if defined NEED_FFMPEG (
|
|||||||
echo ============================================================
|
echo ============================================================
|
||||||
echo Auto-downloading ffmpeg...
|
echo Auto-downloading ffmpeg...
|
||||||
echo ============================================================
|
echo ============================================================
|
||||||
python setup_whisper.py ffmpeg
|
"!PYTHON_CMD!" setup_whisper.py ffmpeg
|
||||||
if errorlevel 1 (
|
if errorlevel 1 (
|
||||||
echo.
|
echo.
|
||||||
echo ERROR: Could not install ffmpeg.
|
echo ERROR: Could not install ffmpeg.
|
||||||
@@ -193,9 +190,9 @@ if exist "ffmpeg-bin\ffmpeg.exe" (
|
|||||||
|
|
||||||
if defined NEED_WHISPER (
|
if defined NEED_WHISPER (
|
||||||
echo ============================================================
|
echo ============================================================
|
||||||
echo Auto-downloading whisper.cpp (Vulkan build^)...
|
echo Auto-downloading whisper.cpp (CPU build^)...
|
||||||
echo ============================================================
|
echo ============================================================
|
||||||
python setup_whisper.py whisper
|
"!PYTHON_CMD!" setup_whisper.py whisper
|
||||||
if errorlevel 1 (
|
if errorlevel 1 (
|
||||||
echo.
|
echo.
|
||||||
echo ERROR: Failed to auto-download whisper.cpp.
|
echo ERROR: Failed to auto-download whisper.cpp.
|
||||||
@@ -217,7 +214,7 @@ if defined NEED_MODEL (
|
|||||||
echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)...
|
echo Auto-downloading Whisper model (ggml-medium-q5_0, ~500 MB^)...
|
||||||
echo This will take a few minutes depending on your connection.
|
echo This will take a few minutes depending on your connection.
|
||||||
echo ============================================================
|
echo ============================================================
|
||||||
python setup_whisper.py model
|
"!PYTHON_CMD!" setup_whisper.py model
|
||||||
if errorlevel 1 (
|
if errorlevel 1 (
|
||||||
echo.
|
echo.
|
||||||
echo ERROR: Failed to download model.
|
echo ERROR: Failed to download model.
|
||||||
@@ -242,7 +239,7 @@ echo.
|
|||||||
:: ============================================================
|
:: ============================================================
|
||||||
if not exist ".venv\Scripts\python.exe" (
|
if not exist ".venv\Scripts\python.exe" (
|
||||||
echo [1/4] Creating Python virtual environment...
|
echo [1/4] Creating Python virtual environment...
|
||||||
python -m venv .venv
|
"!PYTHON_CMD!" -m venv .venv
|
||||||
if errorlevel 1 (
|
if errorlevel 1 (
|
||||||
echo ERROR: Failed to create venv.
|
echo ERROR: Failed to create venv.
|
||||||
pause
|
pause
|
||||||
@@ -268,7 +265,12 @@ echo Done.
|
|||||||
echo.
|
echo.
|
||||||
echo [3/4] Downloading audio files...
|
echo [3/4] Downloading audio files...
|
||||||
echo ============================================================
|
echo ============================================================
|
||||||
.venv\Scripts\python download.py
|
if "%~1"=="" (
|
||||||
|
.venv\Scripts\python download.py
|
||||||
|
) else (
|
||||||
|
echo Modules filter: %~1
|
||||||
|
.venv\Scripts\python download.py --modules %~1
|
||||||
|
)
|
||||||
if errorlevel 1 (
|
if errorlevel 1 (
|
||||||
echo.
|
echo.
|
||||||
echo WARNING: Some downloads failed. Check download_errors.log
|
echo WARNING: Some downloads failed. Check download_errors.log
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Auto-download and setup whisper.cpp (Vulkan) + model for Windows.
|
Auto-download and setup whisper.cpp (CPU build) + model for Windows.
|
||||||
Called by run.bat when prerequisites are missing.
|
Called by run.bat when prerequisites are missing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -142,8 +142,7 @@ def try_official_vulkan_build() -> str | None:
|
|||||||
tag = release.get("tag_name", "unknown")
|
tag = release.get("tag_name", "unknown")
|
||||||
print(f" Official release: {tag}")
|
print(f" Official release: {tag}")
|
||||||
|
|
||||||
# Priority: vulkan > noavx (cpu-only, no CUDA deps) > skip CUDA entirely
|
# Priority: CPU build (no GPU deps needed)
|
||||||
vulkan_asset = None
|
|
||||||
cpu_asset = None
|
cpu_asset = None
|
||||||
for asset in release.get("assets", []):
|
for asset in release.get("assets", []):
|
||||||
name = asset["name"].lower()
|
name = asset["name"].lower()
|
||||||
@@ -152,28 +151,22 @@ def try_official_vulkan_build() -> str | None:
|
|||||||
# Must be Windows
|
# Must be Windows
|
||||||
if "win" not in name and "x64" not in name:
|
if "win" not in name and "x64" not in name:
|
||||||
continue
|
continue
|
||||||
# Absolutely skip CUDA builds - they won't work on AMD
|
# Skip GPU builds entirely
|
||||||
if "cuda" in name:
|
if "cuda" in name or "vulkan" in name or "openblas" in name:
|
||||||
continue
|
continue
|
||||||
if "vulkan" in name:
|
if "noavx" not in name:
|
||||||
vulkan_asset = asset
|
|
||||||
break
|
|
||||||
if "noavx" not in name and "openblas" not in name:
|
|
||||||
cpu_asset = asset
|
cpu_asset = asset
|
||||||
|
break
|
||||||
|
|
||||||
chosen = vulkan_asset or cpu_asset
|
if not cpu_asset:
|
||||||
if not chosen:
|
print(" No CPU build found in official releases")
|
||||||
print(" No Vulkan or CPU-only build found in official releases")
|
|
||||||
print(" Available assets:")
|
print(" Available assets:")
|
||||||
for asset in release.get("assets", []):
|
for asset in release.get("assets", []):
|
||||||
print(f" - {asset['name']}")
|
print(f" - {asset['name']}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if vulkan_asset:
|
print(f" Found CPU build: {cpu_asset['name']}")
|
||||||
print(f" Found official Vulkan build: {chosen['name']}")
|
chosen = cpu_asset
|
||||||
else:
|
|
||||||
print(f" No Vulkan build in official release, using CPU build: {chosen['name']}")
|
|
||||||
print(f" (Will work but without GPU acceleration)")
|
|
||||||
|
|
||||||
zip_path = Path(chosen["name"])
|
zip_path = Path(chosen["name"])
|
||||||
download_file(chosen["browser_download_url"], zip_path, chosen["name"])
|
download_file(chosen["browser_download_url"], zip_path, chosen["name"])
|
||||||
@@ -182,29 +175,12 @@ def try_official_vulkan_build() -> str | None:
|
|||||||
|
|
||||||
|
|
||||||
def setup_whisper_bin() -> str | None:
|
def setup_whisper_bin() -> str | None:
|
||||||
"""Download whisper.cpp Vulkan release. Returns path to whisper-cli.exe."""
|
"""Download whisper.cpp CPU release. Returns path to whisper-cli.exe."""
|
||||||
whisper_exe = WHISPER_DIR / "whisper-cli.exe"
|
whisper_exe = WHISPER_DIR / "whisper-cli.exe"
|
||||||
if whisper_exe.exists():
|
if whisper_exe.exists():
|
||||||
# Check if it's a CUDA build (has CUDA DLLs but no Vulkan DLL)
|
print(f" whisper-cli.exe already exists at {whisper_exe}")
|
||||||
has_cuda = (WHISPER_DIR / "ggml-cuda.dll").exists()
|
return str(whisper_exe)
|
||||||
has_vulkan = (WHISPER_DIR / "ggml-vulkan.dll").exists()
|
|
||||||
if has_cuda and not has_vulkan:
|
|
||||||
print(f" WARNING: Existing install is a CUDA build (won't work on AMD GPU)")
|
|
||||||
print(f" Removing and re-downloading Vulkan build...")
|
|
||||||
import shutil
|
|
||||||
shutil.rmtree(WHISPER_DIR)
|
|
||||||
else:
|
|
||||||
print(f" whisper-cli.exe already exists at {whisper_exe}")
|
|
||||||
return str(whisper_exe)
|
|
||||||
|
|
||||||
# Strategy: try community Vulkan build first (reliable for AMD),
|
|
||||||
# then fall back to official release
|
|
||||||
exe_path = try_community_vulkan_build()
|
|
||||||
if exe_path:
|
|
||||||
print(f"\n whisper-cli.exe ready at: {exe_path} (Vulkan)")
|
|
||||||
return exe_path
|
|
||||||
|
|
||||||
print("\n Community build failed, trying official release...")
|
|
||||||
exe_path = try_official_vulkan_build()
|
exe_path = try_official_vulkan_build()
|
||||||
if exe_path:
|
if exe_path:
|
||||||
print(f"\n whisper-cli.exe ready at: {exe_path}")
|
print(f"\n whisper-cli.exe ready at: {exe_path}")
|
||||||
@@ -212,7 +188,6 @@ def setup_whisper_bin() -> str | None:
|
|||||||
|
|
||||||
print("\n ERROR: Could not download whisper.cpp")
|
print("\n ERROR: Could not download whisper.cpp")
|
||||||
print(" Manual install: https://github.com/ggml-org/whisper.cpp/releases")
|
print(" Manual install: https://github.com/ggml-org/whisper.cpp/releases")
|
||||||
print(" Build from source with: cmake -DGGML_VULKAN=1")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user