feat(voice): Pas 2 — install voice deps, vendor discord-ext-voice-recv, setup assets
Foundation pentru Discord voice-to-voice pipeline. - requirements.txt: faster-whisper, silero-vad, num2words, numpy, PyNaCl - vendor/discord-ext-voice-recv/: vendored la commit ac04ea7b09 (bump version 0.5.3a) — Discord voice protocol fragil, upstream hobby fork. Adapter layer in src/voice/_discord_voice_adapter.py izolează churn (swap la py-cord = doar acel fișier rescris). VENDOR_INFO.md documentează update procedure. - tools/voice_setup.py: idempotent setup script — libopus check, ffmpeg check, Supertonic reachable, faster-whisper/silero-vad warm, assets generation. Exit 0 = green, 1 = needs human (currently libopus missing needs `sudo apt install -y libopus0`). - assets/voice/: thinking.wav (filler "Stai puțin să-mi adun gândurile", ~2.8s), mhm.wav (listener noise), beep_200ms.wav (wake-up tone 880Hz). - src/voice/__init__.py: package stub. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
273
tools/voice_setup.py
Normal file
273
tools/voice_setup.py
Normal file
@@ -0,0 +1,273 @@
|
||||
"""
|
||||
voice_setup.py — One-shot setup for Discord voice pipeline.
|
||||
|
||||
Run after `pip install -r requirements.txt`. Idempotent.
|
||||
|
||||
Steps:
|
||||
1. Verify libopus0 loaded by discord.py (apt install libopus0 if missing)
|
||||
2. Verify ffmpeg in PATH
|
||||
3. Verify Supertonic TTS reachable at :7788
|
||||
4. Warm faster-whisper small int8 (downloads to ~/.cache/huggingface/ if cold)
|
||||
5. Warm silero-vad
|
||||
6. Generate assets/voice/{beep_200ms,mhm,thinking}.wav via Supertonic + ffmpeg
|
||||
|
||||
Exit code: 0 = all green, 1 = something needs human intervention.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
ASSETS_DIR = REPO_ROOT / "assets" / "voice"
|
||||
SUPERTONIC_URL = "http://127.0.0.1:7788/v1/audio/speech"
|
||||
SUPERTONIC_VOICE = "M2"
|
||||
|
||||
GREEN = "\033[32m"
|
||||
RED = "\033[31m"
|
||||
YELLOW = "\033[33m"
|
||||
RESET = "\033[0m"
|
||||
|
||||
|
||||
def _ok(msg: str) -> None:
|
||||
print(f"{GREEN}[ OK ]{RESET} {msg}")
|
||||
|
||||
|
||||
def _fail(msg: str) -> None:
|
||||
print(f"{RED}[FAIL]{RESET} {msg}")
|
||||
|
||||
|
||||
def _warn(msg: str) -> None:
|
||||
print(f"{YELLOW}[WARN]{RESET} {msg}")
|
||||
|
||||
|
||||
def check_libopus() -> bool:
|
||||
try:
|
||||
import discord
|
||||
except ImportError:
|
||||
_fail("discord.py not installed — run `pip install -r requirements.txt`")
|
||||
return False
|
||||
|
||||
if discord.opus.is_loaded():
|
||||
_ok("libopus loaded (discord.py)")
|
||||
return True
|
||||
|
||||
try:
|
||||
discord.opus._load_default()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if discord.opus.is_loaded():
|
||||
_ok("libopus loaded after fallback")
|
||||
return True
|
||||
|
||||
_fail(
|
||||
"libopus NOT loaded — Discord voice will fail silent. "
|
||||
"Run: sudo apt install -y libopus0"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
def check_ffmpeg() -> bool:
|
||||
if not shutil.which("ffmpeg"):
|
||||
_fail("ffmpeg not in PATH — required for audio asset generation")
|
||||
return False
|
||||
_ok(f"ffmpeg at {shutil.which('ffmpeg')}")
|
||||
return True
|
||||
|
||||
|
||||
def check_supertonic() -> bool:
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
SUPERTONIC_URL,
|
||||
data=json.dumps(
|
||||
{
|
||||
"model": "supertonic-3",
|
||||
"input": "test",
|
||||
"voice": SUPERTONIC_VOICE,
|
||||
"response_format": "wav",
|
||||
"lang": "ro",
|
||||
}
|
||||
).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||
if resp.status == 200:
|
||||
_ok(f"Supertonic up at {SUPERTONIC_URL}")
|
||||
return True
|
||||
except (urllib.error.URLError, ConnectionError) as e:
|
||||
_fail(f"Supertonic unreachable at :7788 — {e}. Start: systemctl --user start supertonic-tts")
|
||||
return False
|
||||
_fail(f"Supertonic returned non-200")
|
||||
return False
|
||||
|
||||
|
||||
def warm_whisper() -> bool:
|
||||
try:
|
||||
from faster_whisper import WhisperModel
|
||||
except ImportError:
|
||||
_fail("faster-whisper not installed")
|
||||
return False
|
||||
|
||||
print(" Warming faster-whisper small int8 (downloads if cold)...")
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
WhisperModel("small", device="cpu", compute_type="int8", cpu_threads=4)
|
||||
elapsed = time.perf_counter() - t0
|
||||
_ok(f"faster-whisper small int8 warm ({elapsed:.1f}s)")
|
||||
return True
|
||||
except Exception as e:
|
||||
_fail(f"faster-whisper warm failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def warm_silero() -> bool:
|
||||
try:
|
||||
from silero_vad import load_silero_vad
|
||||
except ImportError:
|
||||
_fail("silero-vad not installed")
|
||||
return False
|
||||
|
||||
print(" Warming silero-vad...")
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
load_silero_vad()
|
||||
elapsed = time.perf_counter() - t0
|
||||
_ok(f"silero-vad warm ({elapsed:.1f}s)")
|
||||
return True
|
||||
except Exception as e:
|
||||
_fail(f"silero-vad warm failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _supertonic_synth(text: str, out_path: Path) -> bool:
|
||||
payload = {
|
||||
"model": "supertonic-3",
|
||||
"input": text,
|
||||
"voice": SUPERTONIC_VOICE,
|
||||
"response_format": "wav",
|
||||
"lang": "ro",
|
||||
}
|
||||
req = urllib.request.Request(
|
||||
SUPERTONIC_URL,
|
||||
data=json.dumps(payload).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
wav_bytes = resp.read()
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
out_path.write_bytes(wav_bytes)
|
||||
return True
|
||||
except Exception as e:
|
||||
_fail(f"Supertonic synth failed for {out_path.name}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def gen_thinking_wav() -> bool:
|
||||
path = ASSETS_DIR / "thinking.wav"
|
||||
if path.exists() and path.stat().st_size > 1024:
|
||||
_ok(f"thinking.wav exists ({path.stat().st_size} bytes)")
|
||||
return True
|
||||
print(" Generating thinking.wav via Supertonic...")
|
||||
if _supertonic_synth("Stai puțin să-mi adun gândurile.", path):
|
||||
_ok(f"thinking.wav generated ({path.stat().st_size} bytes)")
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def gen_mhm_wav() -> bool:
|
||||
path = ASSETS_DIR / "mhm.wav"
|
||||
if path.exists() and path.stat().st_size > 512:
|
||||
_ok(f"mhm.wav exists ({path.stat().st_size} bytes)")
|
||||
return True
|
||||
print(" Generating mhm.wav via Supertonic...")
|
||||
if _supertonic_synth("Mhm.", path):
|
||||
_ok(f"mhm.wav generated ({path.stat().st_size} bytes)")
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def gen_beep_wav() -> bool:
|
||||
path = ASSETS_DIR / "beep_200ms.wav"
|
||||
if path.exists() and path.stat().st_size > 512:
|
||||
_ok(f"beep_200ms.wav exists ({path.stat().st_size} bytes)")
|
||||
return True
|
||||
print(" Generating beep_200ms.wav via ffmpeg (880Hz sine, 200ms)...")
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-loglevel",
|
||||
"error",
|
||||
"-f",
|
||||
"lavfi",
|
||||
"-i",
|
||||
"sine=frequency=880:duration=0.2:sample_rate=48000",
|
||||
"-af",
|
||||
"afade=t=out:st=0.15:d=0.05,volume=0.3",
|
||||
"-ac",
|
||||
"2",
|
||||
str(path),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
_ok(f"beep_200ms.wav generated ({path.stat().st_size} bytes)")
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
_fail(f"ffmpeg beep gen failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print(f"voice_setup.py — Discord voice pipeline setup\n")
|
||||
|
||||
checks: list[tuple[str, bool]] = []
|
||||
|
||||
checks.append(("libopus", check_libopus()))
|
||||
checks.append(("ffmpeg", check_ffmpeg()))
|
||||
checks.append(("Supertonic", check_supertonic()))
|
||||
checks.append(("faster-whisper", warm_whisper()))
|
||||
checks.append(("silero-vad", warm_silero()))
|
||||
|
||||
if checks[2][1]: # Supertonic OK
|
||||
checks.append(("thinking.wav", gen_thinking_wav()))
|
||||
checks.append(("mhm.wav", gen_mhm_wav()))
|
||||
else:
|
||||
_warn("Skipping thinking.wav / mhm.wav generation — Supertonic down")
|
||||
checks.append(("thinking.wav", False))
|
||||
checks.append(("mhm.wav", False))
|
||||
|
||||
if checks[1][1]: # ffmpeg OK
|
||||
checks.append(("beep_200ms.wav", gen_beep_wav()))
|
||||
else:
|
||||
_warn("Skipping beep_200ms.wav — ffmpeg missing")
|
||||
checks.append(("beep_200ms.wav", False))
|
||||
|
||||
print()
|
||||
failed = [name for name, ok in checks if not ok]
|
||||
if failed:
|
||||
print(f"{RED}FAILED:{RESET} {len(failed)}/{len(checks)} — fix above before /voice join works:")
|
||||
for name in failed:
|
||||
print(f" - {name}")
|
||||
return 1
|
||||
|
||||
print(f"{GREEN}ALL GREEN{RESET} ({len(checks)} checks). Voice pipeline ready.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user