echo-core/tools/voice_setup.py

"""
voice_setup.py — One-shot setup for Discord voice pipeline.

Run after `pip install -r requirements.txt`. Idempotent.

Steps:
1. Verify libopus0 loaded by discord.py (apt install libopus0 if missing)
2. Verify ffmpeg in PATH
3. Verify Supertonic TTS reachable at :7788
4. Warm faster-whisper small int8 (downloads to ~/.cache/huggingface/ if cold)
5. Warm silero-vad
6. Generate assets/voice/{beep_200ms,mhm,thinking}.wav via Supertonic + ffmpeg

Exit code: 0 = all green, 1 = something needs human intervention.
"""

from __future__ import annotations

import os
import shutil
import subprocess
import sys
import time
import urllib.request
import urllib.error
import json
from pathlib import Path


REPO_ROOT = Path(__file__).resolve().parent.parent
ASSETS_DIR = REPO_ROOT / "assets" / "voice"
SUPERTONIC_URL = "http://127.0.0.1:7788/v1/audio/speech"
SUPERTONIC_VOICE = "M2"

GREEN = "\033[32m"
RED = "\033[31m"
YELLOW = "\033[33m"
RESET = "\033[0m"


def _ok(msg: str) -> None:
    print(f"{GREEN}[ OK ]{RESET} {msg}")


def _fail(msg: str) -> None:
    print(f"{RED}[FAIL]{RESET} {msg}")


def _warn(msg: str) -> None:
    print(f"{YELLOW}[WARN]{RESET} {msg}")


def check_libopus() -> bool:
    try:
        import discord
    except ImportError:
        _fail("discord.py not installed — run `pip install -r requirements.txt`")
        return False

    if discord.opus.is_loaded():
        _ok("libopus loaded (discord.py)")
        return True

    try:
        discord.opus._load_default()
    except Exception:
        pass

    if discord.opus.is_loaded():
        _ok("libopus loaded after fallback")
        return True

    _fail(
        "libopus NOT loaded — Discord voice will fail silent. "
        "Run: sudo apt install -y libopus0"
    )
    return False


def check_ffmpeg() -> bool:
    if not shutil.which("ffmpeg"):
        _fail("ffmpeg not in PATH — required for audio asset generation")
        return False
    _ok(f"ffmpeg at {shutil.which('ffmpeg')}")
    return True


def check_supertonic() -> bool:
    try:
        req = urllib.request.Request(
            SUPERTONIC_URL,
            data=json.dumps(
                {
                    "model": "supertonic-3",
                    "input": "test",
                    "voice": SUPERTONIC_VOICE,
                    "response_format": "wav",
                    "lang": "ro",
                }
            ).encode("utf-8"),
            headers={"Content-Type": "application/json"},
            method="POST",
        )
        with urllib.request.urlopen(req, timeout=5) as resp:
            if resp.status == 200:
                _ok(f"Supertonic up at {SUPERTONIC_URL}")
                return True
    except (urllib.error.URLError, ConnectionError) as e:
        _fail(f"Supertonic unreachable at :7788 — {e}. Start: systemctl --user start supertonic-tts")
        return False
    _fail(f"Supertonic returned non-200")
    return False


def warm_whisper() -> bool:
    try:
        from faster_whisper import WhisperModel
    except ImportError:
        _fail("faster-whisper not installed")
        return False

    print("    Warming faster-whisper small int8 (downloads if cold)...")
    t0 = time.perf_counter()
    try:
        WhisperModel("small", device="cpu", compute_type="int8", cpu_threads=4)
        elapsed = time.perf_counter() - t0
        _ok(f"faster-whisper small int8 warm ({elapsed:.1f}s)")
        return True
    except Exception as e:
        _fail(f"faster-whisper warm failed: {e}")
        return False


def warm_silero() -> bool:
    try:
        from silero_vad import load_silero_vad
    except ImportError:
        _fail("silero-vad not installed")
        return False

    print("    Warming silero-vad...")
    t0 = time.perf_counter()
    try:
        load_silero_vad()
        elapsed = time.perf_counter() - t0
        _ok(f"silero-vad warm ({elapsed:.1f}s)")
        return True
    except Exception as e:
        _fail(f"silero-vad warm failed: {e}")
        return False


def _supertonic_synth(text: str, out_path: Path) -> bool:
    payload = {
        "model": "supertonic-3",
        "input": text,
        "voice": SUPERTONIC_VOICE,
        "response_format": "wav",
        "lang": "ro",
    }
    req = urllib.request.Request(
        SUPERTONIC_URL,
        data=json.dumps(payload).encode("utf-8"),
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    try:
        with urllib.request.urlopen(req, timeout=30) as resp:
            wav_bytes = resp.read()
        out_path.parent.mkdir(parents=True, exist_ok=True)
        out_path.write_bytes(wav_bytes)
        return True
    except Exception as e:
        _fail(f"Supertonic synth failed for {out_path.name}: {e}")
        return False


def gen_thinking_wav() -> bool:
    path = ASSETS_DIR / "thinking.wav"
    if path.exists() and path.stat().st_size > 1024:
        _ok(f"thinking.wav exists ({path.stat().st_size} bytes)")
        return True
    print("    Generating thinking.wav via Supertonic...")
    if _supertonic_synth("Stai puțin să-mi adun gândurile.", path):
        _ok(f"thinking.wav generated ({path.stat().st_size} bytes)")
        return True
    return False


def gen_mhm_wav() -> bool:
    path = ASSETS_DIR / "mhm.wav"
    if path.exists() and path.stat().st_size > 512:
        _ok(f"mhm.wav exists ({path.stat().st_size} bytes)")
        return True
    print("    Generating mhm.wav via Supertonic...")
    if _supertonic_synth("Mhm.", path):
        _ok(f"mhm.wav generated ({path.stat().st_size} bytes)")
        return True
    return False


def gen_beep_wav() -> bool:
    path = ASSETS_DIR / "beep_200ms.wav"
    if path.exists() and path.stat().st_size > 512:
        _ok(f"beep_200ms.wav exists ({path.stat().st_size} bytes)")
        return True
    print("    Generating beep_200ms.wav via ffmpeg (880Hz sine, 200ms)...")
    path.parent.mkdir(parents=True, exist_ok=True)
    try:
        subprocess.run(
            [
                "ffmpeg",
                "-y",
                "-loglevel",
                "error",
                "-f",
                "lavfi",
                "-i",
                "sine=frequency=880:duration=0.2:sample_rate=48000",
                "-af",
                "afade=t=out:st=0.15:d=0.05,volume=0.3",
                "-ac",
                "2",
                str(path),
            ],
            check=True,
        )
        _ok(f"beep_200ms.wav generated ({path.stat().st_size} bytes)")
        return True
    except subprocess.CalledProcessError as e:
        _fail(f"ffmpeg beep gen failed: {e}")
        return False


def main() -> int:
    print(f"voice_setup.py — Discord voice pipeline setup\n")

    checks: list[tuple[str, bool]] = []

    checks.append(("libopus", check_libopus()))
    checks.append(("ffmpeg", check_ffmpeg()))
    checks.append(("Supertonic", check_supertonic()))
    checks.append(("faster-whisper", warm_whisper()))
    checks.append(("silero-vad", warm_silero()))

    if checks[2][1]:  # Supertonic OK
        checks.append(("thinking.wav", gen_thinking_wav()))
        checks.append(("mhm.wav", gen_mhm_wav()))
    else:
        _warn("Skipping thinking.wav / mhm.wav generation — Supertonic down")
        checks.append(("thinking.wav", False))
        checks.append(("mhm.wav", False))

    if checks[1][1]:  # ffmpeg OK
        checks.append(("beep_200ms.wav", gen_beep_wav()))
    else:
        _warn("Skipping beep_200ms.wav — ffmpeg missing")
        checks.append(("beep_200ms.wav", False))

    print()
    failed = [name for name, ok in checks if not ok]
    if failed:
        print(f"{RED}FAILED:{RESET} {len(failed)}/{len(checks)} — fix above before /voice join works:")
        for name in failed:
            print(f"  - {name}")
        return 1

    print(f"{GREEN}ALL GREEN{RESET} ({len(checks)} checks). Voice pipeline ready.")
    return 0


if __name__ == "__main__":
    sys.exit(main())