Un singur set de scripturi acum rulează pe orice curs configurat în courses.py. Master rămâne la rădăcina repo (backward-compat M1-M6); cursuri noi (ex. practitioner la shop.cursnlp.ro) primesc un root dedicat (nlp-practitioner/) cu propriile artefacte. - courses.py: config dict (master, practitioner) + course_paths() + validate_manifest_course() (manifest fără course_key = master). - download.py: --course + --modules; trei tipuri de lecții (audio HTTP, Vimeo iframe via yt-dlp audio-only, text-only cu captură HTML); merge cu manifest existent în loc de replace; strip [Audio] pentru backward-compat paths. - transcribe.py: --course + --modules; skip type==text; path-uri prin course_paths(); validare course_key. - summarize.py: --course + --compile; template prompt folosește course['name']; scrie SUPORT_CURS.md cu LF explicit (WSL2 baseline). - md_to_pdf.py: --course resolv-ă summaries_dir / pdf_dir per curs. - run.bat: detectează master|practitioner ca primul argument, propagă --course la sub-scripturi; backward-compat run.bat [modules]. - requirements.txt: + yt-dlp. - .gitignore: nlp-practitioner/audio/, audio_wav/, scratch_recon.py, tmp_recon/. - tests/test_regression.sh: 5 gate-uri read-only (import, schema, disk-coherence, SUPORT_CURS byte-identic, cross-course isolation). Regression curs master: PASS (manifest + SUPORT_CURS.md hash identic cu baseline /tmp/suport_before.md). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
81 lines
2.7 KiB
Python
81 lines
2.7 KiB
Python
"""
|
|
Shared course configuration for the NLP Master pipeline.
|
|
|
|
A single pipeline (download -> transcribe -> summarize -> pdf) runs on
|
|
multiple courses by passing --course <key>. Scripts resolve all artifact
|
|
paths against course["root"], so curs master remains in-place at repo root
|
|
and subsequent courses land in their own subdirectory.
|
|
"""
|
|
from pathlib import Path
|
|
|
|
COURSES = {
|
|
"master": {
|
|
"name": "NLP Master Practitioner Bucuresti 2025",
|
|
"base_url": "https://cursuri.aresens.ro",
|
|
"course_path": "/curs/26",
|
|
"login_path": "/login",
|
|
"env_user": "COURSE_USERNAME",
|
|
"env_pass": "COURSE_PASSWORD",
|
|
# Curs master stays at repo root for backward-compat with M1-M6 outputs.
|
|
"root": Path("."),
|
|
},
|
|
"practitioner": {
|
|
"name": "NLP Practitioner (cursnlp.ro)",
|
|
"base_url": "https://shop.cursnlp.ro",
|
|
"course_path": "/curs/50",
|
|
"login_path": "/login",
|
|
"env_user": "PRACTITIONER_USERNAME",
|
|
"env_pass": "PRACTITIONER_PASSWORD",
|
|
"root": Path("nlp-practitioner"),
|
|
},
|
|
}
|
|
|
|
|
|
def get_course(key: str) -> dict:
|
|
"""Return course config by key; SystemExit on unknown key."""
|
|
if key not in COURSES:
|
|
raise SystemExit(
|
|
f"Unknown course '{key}'. Available: {sorted(COURSES)}"
|
|
)
|
|
c = dict(COURSES[key])
|
|
c["key"] = key
|
|
c["course_url"] = c["base_url"] + c["course_path"]
|
|
c["login_url"] = c["base_url"] + c["login_path"]
|
|
return c
|
|
|
|
|
|
def course_paths(course: dict) -> dict:
|
|
"""Resolve artifact paths under course['root']."""
|
|
root = course["root"]
|
|
return {
|
|
"root": root,
|
|
"manifest": root / "manifest.json",
|
|
"audio_dir": root / "audio",
|
|
"wav_cache_dir": root / "audio_wav",
|
|
"transcripts_dir": root / "transcripts",
|
|
"summaries_dir": root / "summaries",
|
|
"pdf_dir": root / "summaries" / "pdf",
|
|
"master_guide": root / "SUPORT_CURS.md",
|
|
}
|
|
|
|
|
|
def validate_manifest_course(manifest: dict, course_key: str) -> None:
|
|
"""
|
|
Ensure a pre-existing manifest belongs to the course currently being run.
|
|
|
|
Legacy policy: a manifest without `course_key` (written before this refactor)
|
|
is treated as `master`. This keeps backward-compat with the existing
|
|
curs_26 manifest.json from M1-M6.
|
|
"""
|
|
mck = manifest.get("course_key")
|
|
if mck is None:
|
|
effective = "master"
|
|
else:
|
|
effective = mck
|
|
if effective != course_key:
|
|
raise SystemExit(
|
|
f"Manifest belongs to course '{effective}' but --course='{course_key}'. "
|
|
f"Refusing to corrupt cross-course state. "
|
|
f"Delete {course_key}'s manifest to start fresh, or run with --course={effective}."
|
|
)
|