""" voice_setup.py — One-shot setup for Discord voice pipeline. Run after `pip install -r requirements.txt`. Idempotent. Steps: 1. Verify libopus0 loaded by discord.py (apt install libopus0 if missing) 2. Verify ffmpeg in PATH 3. Verify Supertonic TTS reachable at :7788 4. Warm faster-whisper small int8 (downloads to ~/.cache/huggingface/ if cold) 5. Warm silero-vad 6. Generate assets/voice/{beep_200ms,mhm,thinking}.wav via Supertonic + ffmpeg Exit code: 0 = all green, 1 = something needs human intervention. """ from __future__ import annotations import os import shutil import subprocess import sys import time import urllib.request import urllib.error import json from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent ASSETS_DIR = REPO_ROOT / "assets" / "voice" SUPERTONIC_URL = "http://127.0.0.1:7788/v1/audio/speech" SUPERTONIC_VOICE = "M2" GREEN = "\033[32m" RED = "\033[31m" YELLOW = "\033[33m" RESET = "\033[0m" def _ok(msg: str) -> None: print(f"{GREEN}[ OK ]{RESET} {msg}") def _fail(msg: str) -> None: print(f"{RED}[FAIL]{RESET} {msg}") def _warn(msg: str) -> None: print(f"{YELLOW}[WARN]{RESET} {msg}") def check_libopus() -> bool: try: import discord except ImportError: _fail("discord.py not installed — run `pip install -r requirements.txt`") return False if discord.opus.is_loaded(): _ok("libopus loaded (discord.py)") return True try: discord.opus._load_default() except Exception: pass if discord.opus.is_loaded(): _ok("libopus loaded after fallback") return True _fail( "libopus NOT loaded — Discord voice will fail silent. " "Run: sudo apt install -y libopus0" ) return False def check_ffmpeg() -> bool: if not shutil.which("ffmpeg"): _fail("ffmpeg not in PATH — required for audio asset generation") return False _ok(f"ffmpeg at {shutil.which('ffmpeg')}") return True def check_supertonic() -> bool: try: req = urllib.request.Request( SUPERTONIC_URL, data=json.dumps( { "model": "supertonic-3", "input": "test", "voice": SUPERTONIC_VOICE, "response_format": "wav", "lang": "ro", } ).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST", ) with urllib.request.urlopen(req, timeout=5) as resp: if resp.status == 200: _ok(f"Supertonic up at {SUPERTONIC_URL}") return True except (urllib.error.URLError, ConnectionError) as e: _fail(f"Supertonic unreachable at :7788 — {e}. Start: systemctl --user start supertonic-tts") return False _fail(f"Supertonic returned non-200") return False def warm_whisper() -> bool: try: from faster_whisper import WhisperModel except ImportError: _fail("faster-whisper not installed") return False print(" Warming faster-whisper small int8 (downloads if cold)...") t0 = time.perf_counter() try: WhisperModel("small", device="cpu", compute_type="int8", cpu_threads=4) elapsed = time.perf_counter() - t0 _ok(f"faster-whisper small int8 warm ({elapsed:.1f}s)") return True except Exception as e: _fail(f"faster-whisper warm failed: {e}") return False def warm_silero() -> bool: try: from silero_vad import load_silero_vad except ImportError: _fail("silero-vad not installed") return False print(" Warming silero-vad...") t0 = time.perf_counter() try: load_silero_vad() elapsed = time.perf_counter() - t0 _ok(f"silero-vad warm ({elapsed:.1f}s)") return True except Exception as e: _fail(f"silero-vad warm failed: {e}") return False def _supertonic_synth(text: str, out_path: Path) -> bool: payload = { "model": "supertonic-3", "input": text, "voice": SUPERTONIC_VOICE, "response_format": "wav", "lang": "ro", } req = urllib.request.Request( SUPERTONIC_URL, data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST", ) try: with urllib.request.urlopen(req, timeout=30) as resp: wav_bytes = resp.read() out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_bytes(wav_bytes) return True except Exception as e: _fail(f"Supertonic synth failed for {out_path.name}: {e}") return False def gen_thinking_wav() -> bool: path = ASSETS_DIR / "thinking.wav" if path.exists() and path.stat().st_size > 1024: _ok(f"thinking.wav exists ({path.stat().st_size} bytes)") return True print(" Generating thinking.wav via Supertonic...") if _supertonic_synth("Stai puțin să-mi adun gândurile.", path): _ok(f"thinking.wav generated ({path.stat().st_size} bytes)") return True return False def gen_mhm_wav() -> bool: path = ASSETS_DIR / "mhm.wav" if path.exists() and path.stat().st_size > 512: _ok(f"mhm.wav exists ({path.stat().st_size} bytes)") return True print(" Generating mhm.wav via Supertonic...") if _supertonic_synth("Mhm.", path): _ok(f"mhm.wav generated ({path.stat().st_size} bytes)") return True return False def gen_beep_wav() -> bool: path = ASSETS_DIR / "beep_200ms.wav" if path.exists() and path.stat().st_size > 512: _ok(f"beep_200ms.wav exists ({path.stat().st_size} bytes)") return True print(" Generating beep_200ms.wav via ffmpeg (880Hz sine, 200ms)...") path.parent.mkdir(parents=True, exist_ok=True) try: subprocess.run( [ "ffmpeg", "-y", "-loglevel", "error", "-f", "lavfi", "-i", "sine=frequency=880:duration=0.2:sample_rate=48000", "-af", "afade=t=out:st=0.15:d=0.05,volume=0.3", "-ac", "2", str(path), ], check=True, ) _ok(f"beep_200ms.wav generated ({path.stat().st_size} bytes)") return True except subprocess.CalledProcessError as e: _fail(f"ffmpeg beep gen failed: {e}") return False def main() -> int: print(f"voice_setup.py — Discord voice pipeline setup\n") checks: list[tuple[str, bool]] = [] checks.append(("libopus", check_libopus())) checks.append(("ffmpeg", check_ffmpeg())) checks.append(("Supertonic", check_supertonic())) checks.append(("faster-whisper", warm_whisper())) checks.append(("silero-vad", warm_silero())) if checks[2][1]: # Supertonic OK checks.append(("thinking.wav", gen_thinking_wav())) checks.append(("mhm.wav", gen_mhm_wav())) else: _warn("Skipping thinking.wav / mhm.wav generation — Supertonic down") checks.append(("thinking.wav", False)) checks.append(("mhm.wav", False)) if checks[1][1]: # ffmpeg OK checks.append(("beep_200ms.wav", gen_beep_wav())) else: _warn("Skipping beep_200ms.wav — ffmpeg missing") checks.append(("beep_200ms.wav", False)) print() failed = [name for name, ok in checks if not ok] if failed: print(f"{RED}FAILED:{RESET} {len(failed)}/{len(checks)} — fix above before /voice join works:") for name in failed: print(f" - {name}") return 1 print(f"{GREEN}ALL GREEN{RESET} ({len(checks)} checks). Voice pipeline ready.") return 0 if __name__ == "__main__": sys.exit(main())