- transcribe.py: add --max-context 0, --entropy-thold 2.4, --max-len 60, --suppress-nst, --no-fallback to whisper.cpp to prevent hallucination loops - transcribe.py: remove interactive quality gate (runs unattended now) - run.bat: remove pause prompts for unattended operation - retranscribe_tail.py: new script that detects hallucination bursts in SRT files, extracts and re-transcribes only the affected audio segments, then splices the result back together. Drops segments that re-hallucinate (silence/music). Backs up originals to transcripts/backup/. - fix_hallucinations.bat: Windows wrapper for retranscribe_tail.py Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
75 lines
1.9 KiB
Batchfile
75 lines
1.9 KiB
Batchfile
@echo off
|
|
setlocal enabledelayedexpansion
|
|
cd /d "%~dp0"
|
|
|
|
echo ============================================================
|
|
echo Fix Hallucinated Transcripts
|
|
echo ============================================================
|
|
echo.
|
|
|
|
:: --- Find Python ---
|
|
set "PYTHON_CMD="
|
|
where py >nul 2>&1
|
|
if not errorlevel 1 (
|
|
set "PYTHON_CMD=py"
|
|
)
|
|
if not defined PYTHON_CMD (
|
|
for /f "delims=" %%p in ('where python 2^>nul ^| findstr /v /i "WindowsApps"') do (
|
|
if not defined PYTHON_CMD set "PYTHON_CMD=%%p"
|
|
)
|
|
)
|
|
if not defined PYTHON_CMD (
|
|
echo [X] Python not found. Install from https://www.python.org/downloads/
|
|
pause
|
|
exit /b 1
|
|
)
|
|
|
|
:: --- Find whisper-cli.exe ---
|
|
if not defined WHISPER_BIN (
|
|
if exist "whisper-cli.exe" (
|
|
set "WHISPER_BIN=whisper-cli.exe"
|
|
) else if exist "whisper-bin\whisper-cli.exe" (
|
|
set "WHISPER_BIN=whisper-bin\whisper-cli.exe"
|
|
) else if exist "whisper.cpp\build\bin\Release\whisper-cli.exe" (
|
|
set "WHISPER_BIN=whisper.cpp\build\bin\Release\whisper-cli.exe"
|
|
) else (
|
|
echo [X] whisper-cli.exe not found
|
|
pause
|
|
exit /b 1
|
|
)
|
|
)
|
|
echo Using whisper: %WHISPER_BIN%
|
|
|
|
:: --- Find model ---
|
|
if not defined WHISPER_MODEL (
|
|
set "WHISPER_MODEL=models\ggml-medium-q5_0.bin"
|
|
)
|
|
|
|
:: --- Activate venv if available ---
|
|
if exist ".venv\Scripts\activate.bat" (
|
|
call .venv\Scripts\activate.bat
|
|
)
|
|
|
|
:: --- Dry run first ---
|
|
echo.
|
|
echo [1/2] Scanning for hallucinations...
|
|
echo.
|
|
set "WHISPER_BIN=%WHISPER_BIN%"
|
|
set "WHISPER_MODEL=%WHISPER_MODEL%"
|
|
.venv\Scripts\python retranscribe_tail.py --dry-run
|
|
echo.
|
|
|
|
:: --- Fix ---
|
|
echo [2/2] Fixing hallucinated transcripts...
|
|
echo.
|
|
.venv\Scripts\python retranscribe_tail.py
|
|
if errorlevel 1 (
|
|
echo.
|
|
echo WARNING: Some fixes failed. Check output above.
|
|
)
|
|
|
|
echo.
|
|
echo ============================================================
|
|
echo Done! Originals backed up to transcripts\backup\
|
|
echo ============================================================
|