feat: anti-hallucination params + retranscribe script for fixing broken transcripts
- transcribe.py: add --max-context 0, --entropy-thold 2.4, --max-len 60, --suppress-nst, --no-fallback to whisper.cpp to prevent hallucination loops - transcribe.py: remove interactive quality gate (runs unattended now) - run.bat: remove pause prompts for unattended operation - retranscribe_tail.py: new script that detects hallucination bursts in SRT files, extracts and re-transcribes only the affected audio segments, then splices the result back together. Drops segments that re-hallucinate (silence/music). Backs up originals to transcripts/backup/. - fix_hallucinations.bat: Windows wrapper for retranscribe_tail.py Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -131,6 +131,11 @@ def transcribe_file(audio_path: str, output_base: str) -> bool:
|
||||
"--threads", str(os.cpu_count() or 4),
|
||||
"--beam-size", "1",
|
||||
"--best-of", "1",
|
||||
"--max-context", "0", # don't carry context between segments (prevents hallucination loops)
|
||||
"--entropy-thold", "2.4", # reject high-entropy (hallucinated) segments
|
||||
"--max-len", "60", # shorter segments reduce drift
|
||||
"--suppress-nst", # suppress non-speech tokens (reduces hallucination on silence)
|
||||
"--no-fallback", # don't retry with higher temperature
|
||||
"--output-txt",
|
||||
"--output-srt",
|
||||
"--output-file", output_base,
|
||||
@@ -260,17 +265,9 @@ def main():
|
||||
# Save manifest after each file (checkpoint)
|
||||
save_manifest(manifest)
|
||||
|
||||
# Quality gate: pause after first module
|
||||
# Log milestone after first module (no longer pauses)
|
||||
if mod == manifest["modules"][0] and transcribed > 0:
|
||||
log.info("\n" + "!" * 60)
|
||||
log.info("QUALITY GATE: First module complete.")
|
||||
log.info("Spot-check 2-3 transcripts in transcripts/ before continuing.")
|
||||
log.info("Press Enter to continue, or Ctrl+C to abort...")
|
||||
log.info("!" * 60)
|
||||
try:
|
||||
input()
|
||||
except EOFError:
|
||||
pass # Non-interactive mode, continue
|
||||
log.info(f"First module complete ({transcribed} files). Continuing automatically...")
|
||||
|
||||
# Validation
|
||||
empty_outputs = [
|
||||
|
||||
Reference in New Issue
Block a user