refactor: parametrize pipeline cu --course flag + suport Vimeo/text
Un singur set de scripturi acum rulează pe orice curs configurat în courses.py. Master rămâne la rădăcina repo (backward-compat M1-M6); cursuri noi (ex. practitioner la shop.cursnlp.ro) primesc un root dedicat (nlp-practitioner/) cu propriile artefacte. - courses.py: config dict (master, practitioner) + course_paths() + validate_manifest_course() (manifest fără course_key = master). - download.py: --course + --modules; trei tipuri de lecții (audio HTTP, Vimeo iframe via yt-dlp audio-only, text-only cu captură HTML); merge cu manifest existent în loc de replace; strip [Audio] pentru backward-compat paths. - transcribe.py: --course + --modules; skip type==text; path-uri prin course_paths(); validare course_key. - summarize.py: --course + --compile; template prompt folosește course['name']; scrie SUPORT_CURS.md cu LF explicit (WSL2 baseline). - md_to_pdf.py: --course resolv-ă summaries_dir / pdf_dir per curs. - run.bat: detectează master|practitioner ca primul argument, propagă --course la sub-scripturi; backward-compat run.bat [modules]. - requirements.txt: + yt-dlp. - .gitignore: nlp-practitioner/audio/, audio_wav/, scratch_recon.py, tmp_recon/. - tests/test_regression.sh: 5 gate-uri read-only (import, schema, disk-coherence, SUPORT_CURS byte-identic, cross-course isolation). Regression curs master: PASS (manifest + SUPORT_CURS.md hash identic cu baseline /tmp/suport_before.md). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
80
courses.py
Normal file
80
courses.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
Shared course configuration for the NLP Master pipeline.
|
||||
|
||||
A single pipeline (download -> transcribe -> summarize -> pdf) runs on
|
||||
multiple courses by passing --course <key>. Scripts resolve all artifact
|
||||
paths against course["root"], so curs master remains in-place at repo root
|
||||
and subsequent courses land in their own subdirectory.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
COURSES = {
|
||||
"master": {
|
||||
"name": "NLP Master Practitioner Bucuresti 2025",
|
||||
"base_url": "https://cursuri.aresens.ro",
|
||||
"course_path": "/curs/26",
|
||||
"login_path": "/login",
|
||||
"env_user": "COURSE_USERNAME",
|
||||
"env_pass": "COURSE_PASSWORD",
|
||||
# Curs master stays at repo root for backward-compat with M1-M6 outputs.
|
||||
"root": Path("."),
|
||||
},
|
||||
"practitioner": {
|
||||
"name": "NLP Practitioner (cursnlp.ro)",
|
||||
"base_url": "https://shop.cursnlp.ro",
|
||||
"course_path": "/curs/50",
|
||||
"login_path": "/login",
|
||||
"env_user": "PRACTITIONER_USERNAME",
|
||||
"env_pass": "PRACTITIONER_PASSWORD",
|
||||
"root": Path("nlp-practitioner"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_course(key: str) -> dict:
|
||||
"""Return course config by key; SystemExit on unknown key."""
|
||||
if key not in COURSES:
|
||||
raise SystemExit(
|
||||
f"Unknown course '{key}'. Available: {sorted(COURSES)}"
|
||||
)
|
||||
c = dict(COURSES[key])
|
||||
c["key"] = key
|
||||
c["course_url"] = c["base_url"] + c["course_path"]
|
||||
c["login_url"] = c["base_url"] + c["login_path"]
|
||||
return c
|
||||
|
||||
|
||||
def course_paths(course: dict) -> dict:
|
||||
"""Resolve artifact paths under course['root']."""
|
||||
root = course["root"]
|
||||
return {
|
||||
"root": root,
|
||||
"manifest": root / "manifest.json",
|
||||
"audio_dir": root / "audio",
|
||||
"wav_cache_dir": root / "audio_wav",
|
||||
"transcripts_dir": root / "transcripts",
|
||||
"summaries_dir": root / "summaries",
|
||||
"pdf_dir": root / "summaries" / "pdf",
|
||||
"master_guide": root / "SUPORT_CURS.md",
|
||||
}
|
||||
|
||||
|
||||
def validate_manifest_course(manifest: dict, course_key: str) -> None:
|
||||
"""
|
||||
Ensure a pre-existing manifest belongs to the course currently being run.
|
||||
|
||||
Legacy policy: a manifest without `course_key` (written before this refactor)
|
||||
is treated as `master`. This keeps backward-compat with the existing
|
||||
curs_26 manifest.json from M1-M6.
|
||||
"""
|
||||
mck = manifest.get("course_key")
|
||||
if mck is None:
|
||||
effective = "master"
|
||||
else:
|
||||
effective = mck
|
||||
if effective != course_key:
|
||||
raise SystemExit(
|
||||
f"Manifest belongs to course '{effective}' but --course='{course_key}'. "
|
||||
f"Refusing to corrupt cross-course state. "
|
||||
f"Delete {course_key}'s manifest to start fresh, or run with --course={effective}."
|
||||
)
|
||||
Reference in New Issue
Block a user