Update cron, dashboard, root +3 more (+1 ~11)
This commit is contained in:
@@ -15,7 +15,7 @@ from src.claude_session import (
|
||||
PROJECT_ROOT,
|
||||
VALID_MODELS,
|
||||
)
|
||||
from src.fast_commands import dispatch as fast_dispatch
|
||||
from src.fast_commands import dispatch as fast_dispatch, split_text_chunks, extract_url_text
|
||||
from src.router import (
|
||||
route_message,
|
||||
_ralph_propose,
|
||||
@@ -916,6 +916,37 @@ def create_bot(config: Config) -> discord.Client:
|
||||
rezumat: bool = False,
|
||||
) -> None:
|
||||
await interaction.response.defer()
|
||||
voice = voce or "M2"
|
||||
|
||||
# URL fără rezumat → fetch + split în chunks + trimite pe rând
|
||||
if text_sau_url and text_sau_url.startswith("http") and not rezumat:
|
||||
text = await asyncio.to_thread(extract_url_text, text_sau_url)
|
||||
if not text:
|
||||
await interaction.followup.send("Nu am putut extrage text din URL.")
|
||||
return
|
||||
chunks = split_text_chunks(text, max_chars=1500)
|
||||
total = len(chunks)
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
result = await asyncio.to_thread(fast_dispatch, "audio", [voice, chunk])
|
||||
if result and result.startswith("__AUDIO__:"):
|
||||
wav_path = result[len("__AUDIO__:"):]
|
||||
try:
|
||||
filename = f"echo-audio-{i}din{total}.wav" if total > 1 else "echo-audio.wav"
|
||||
await interaction.followup.send(
|
||||
content=f"Bucata {i}/{total}" if total > 1 else None,
|
||||
file=discord.File(wav_path, filename=filename),
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
os.unlink(wav_path)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
await interaction.followup.send(result or f"Eroare TTS la bucata {i}.")
|
||||
return
|
||||
return
|
||||
|
||||
# Comportament existent: text direct, gol, sau rezumat URL
|
||||
args: list[str] = []
|
||||
if voce:
|
||||
args.append(voce)
|
||||
|
||||
@@ -285,6 +285,23 @@ def register(tree: app_commands.CommandTree, bot: discord.Client) -> app_command
|
||||
msg = f"Default voce setată {new_voice}. Va intra în vigoare la următorul /voice join."
|
||||
await interaction.followup.send(msg, ephemeral=True)
|
||||
|
||||
@voice_group.command(name="stop", description="Oprește audio-ul curent (golește coada TTS)")
|
||||
async def stop_audio(interaction: discord.Interaction) -> None:
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
guild_id = interaction.guild.id if interaction.guild else None
|
||||
session = _voice_sessions.get(guild_id) if guild_id is not None else None
|
||||
if session is None or session.ttsq is None:
|
||||
await interaction.followup.send("Nu sunt în voice.", ephemeral=True)
|
||||
return
|
||||
try:
|
||||
session.ttsq.clear()
|
||||
log.info("voice stop: TTS queue cleared by user %s", interaction.user)
|
||||
except Exception as e:
|
||||
log.warning("voice stop: ttsq.clear failed: %s", e)
|
||||
await interaction.followup.send(f"Eroare la oprire: {e}", ephemeral=True)
|
||||
return
|
||||
await interaction.followup.send("Audio oprit.", ephemeral=True)
|
||||
|
||||
@voice_group.command(name="doctor", description="Verifică voice stack")
|
||||
async def doctor(interaction: discord.Interaction) -> None:
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
|
||||
@@ -812,6 +812,51 @@ def _tts_synthesize(text: str, voice: str) -> dict:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
|
||||
def split_text_chunks(text: str, max_chars: int = 1500) -> list[str]:
|
||||
"""Împarte text în chunks pe paragrafe fără a depăși max_chars."""
|
||||
import re as _re
|
||||
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
|
||||
if not paragraphs:
|
||||
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
|
||||
|
||||
chunks: list[str] = []
|
||||
current_parts: list[str] = []
|
||||
current_len = 0
|
||||
|
||||
for para in paragraphs:
|
||||
if len(para) > max_chars:
|
||||
if current_parts:
|
||||
chunks.append("\n\n".join(current_parts))
|
||||
current_parts = []
|
||||
current_len = 0
|
||||
sentences = _re.split(r'(?<=[.!?])\s+', para)
|
||||
for sent in sentences:
|
||||
if current_len + len(sent) + 1 > max_chars and current_parts:
|
||||
chunks.append(" ".join(current_parts))
|
||||
current_parts = [sent]
|
||||
current_len = len(sent)
|
||||
else:
|
||||
current_parts.append(sent)
|
||||
current_len += len(sent) + 1
|
||||
elif current_len + len(para) + 2 > max_chars and current_parts:
|
||||
chunks.append("\n\n".join(current_parts))
|
||||
current_parts = [para]
|
||||
current_len = len(para)
|
||||
else:
|
||||
current_parts.append(para)
|
||||
current_len += len(para) + 2
|
||||
|
||||
if current_parts:
|
||||
chunks.append("\n\n".join(current_parts))
|
||||
|
||||
return chunks if chunks else [text[:max_chars]]
|
||||
|
||||
|
||||
def extract_url_text(url: str) -> str | None:
|
||||
"""Extrage textul principal dintr-un URL (publică)."""
|
||||
return _extract_url_text(url)
|
||||
|
||||
|
||||
def _extract_url_text(url: str) -> str | None:
|
||||
"""Extrage textul principal dintr-un URL cu trafilatura."""
|
||||
try:
|
||||
|
||||
@@ -53,6 +53,24 @@ NO_SPEECH_DROP_THRESHOLD = 0.6
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
LOGS_DIR = PROJECT_ROOT / "logs"
|
||||
VOICE_METRICS_PATH = LOGS_DIR / "voice_metrics.jsonl"
|
||||
VOICE_STT_LOG_PATH = LOGS_DIR / "voice_stt_log.jsonl"
|
||||
_stt_log_lock = threading.Lock()
|
||||
|
||||
|
||||
def _append_stt_log(entry: dict) -> None:
|
||||
"""Append one Whisper transcript to ``voice_stt_log.jsonl``.
|
||||
|
||||
Separate from ``record_enabled``/``transcripts_jsonl_path`` (which feed
|
||||
KB). This log is always-on, scoped to STT debugging — used to mine
|
||||
code-switching mistranscriptions (English words in Romanian flow) over
|
||||
several days and build a personal vocabulary correction table.
|
||||
"""
|
||||
try:
|
||||
LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with _stt_log_lock, VOICE_STT_LOG_PATH.open("a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.debug("STT log write failed: %s", e)
|
||||
|
||||
|
||||
# ---------- Lazy model singletons ----------
|
||||
@@ -100,24 +118,31 @@ def _get_silero_vad():
|
||||
def _pcm48_stereo_to_16_mono(pcm: bytes) -> np.ndarray:
|
||||
"""Discord 48kHz s16le stereo bytes -> 16kHz mono float32 in [-1, 1].
|
||||
|
||||
Cheap downsample: average the two channels, then average every 3
|
||||
samples (48k / 3 = 16k). faster-whisper + silero-vad accept the
|
||||
resulting ``np.float32`` array directly.
|
||||
Mix channels to mono, then resample 48k→16k with torchaudio's polyphase
|
||||
Kaiser-windowed sinc (``lowpass_filter_width=16``) instead of a naive
|
||||
every-3-samples average. The previous decimation had no anti-aliasing,
|
||||
which folded HF content (sibilants, fricatives) back into the
|
||||
speech band and degraded Whisper's accuracy on short wake phrases
|
||||
like "Salut, Eco". faster-whisper + silero-vad accept the resulting
|
||||
``np.float32`` array directly.
|
||||
"""
|
||||
if not pcm:
|
||||
return np.zeros(0, dtype=np.float32)
|
||||
samples = np.frombuffer(pcm, dtype=np.int16)
|
||||
if samples.size % 2 != 0:
|
||||
samples = samples[:-1]
|
||||
stereo = samples.reshape(-1, 2)
|
||||
mono = stereo.mean(axis=1).astype(np.float32) / 32768.0
|
||||
if mono.size == 0:
|
||||
return mono
|
||||
trim = (mono.size // 3) * 3
|
||||
if trim == 0:
|
||||
if samples.size == 0:
|
||||
return np.zeros(0, dtype=np.float32)
|
||||
mono = mono[:trim].reshape(-1, 3).mean(axis=1)
|
||||
return mono.astype(np.float32)
|
||||
stereo = samples.reshape(-1, 2)
|
||||
mono48 = stereo.mean(axis=1).astype(np.float32) / 32768.0
|
||||
import torch
|
||||
import torchaudio.functional as taF
|
||||
wav = torch.from_numpy(mono48).unsqueeze(0)
|
||||
mono16 = taF.resample(
|
||||
wav, SAMPLE_RATE_DISCORD, SAMPLE_RATE_WHISPER,
|
||||
lowpass_filter_width=16,
|
||||
).squeeze(0).numpy()
|
||||
return np.ascontiguousarray(mono16, dtype=np.float32)
|
||||
|
||||
|
||||
# ---------- VoiceSession ----------
|
||||
@@ -646,19 +671,25 @@ class EchoVoiceSink(AudioSink):
|
||||
def _flush_to_stt(self, user_id: int, pcm48_stereo: bytes) -> None:
|
||||
"""Downsample, Whisper-transcribe RO, drop hallucinations, dispatch."""
|
||||
try:
|
||||
t_start = time.monotonic()
|
||||
mono16 = _pcm48_stereo_to_16_mono(pcm48_stereo)
|
||||
if mono16.size == 0:
|
||||
return
|
||||
audio_duration_s = float(mono16.size) / float(SAMPLE_RATE_WHISPER)
|
||||
model = _get_whisper_model()
|
||||
segments, _info = model.transcribe(
|
||||
mono16, language="ro", beam_size=5,
|
||||
initial_prompt=(
|
||||
"Echo Core, asistent personal AI românesc al lui Marius. "
|
||||
"Conversație colocvială în română. "
|
||||
"Comenzi voce recunoscute: schimbă vocea pe M1, M2, M3, M4, M5, "
|
||||
"F1, F2, F3, F4, F5. Exemple: vorbește cu vocea M5, voce F3, "
|
||||
"Conversatie in romana cu asistentul Eco (Echo Core). "
|
||||
"Marius i se adreseaza cu 'Salut, Eco', 'Eco' sau 'Echo Core' "
|
||||
"la inceputul mesajului. Exemple: 'Salut, Eco, ce mai faci?', "
|
||||
"'Eco, adauga pe agenda de maine sa sun la Bianca', "
|
||||
"'Echo Core, vreau sa-mi reamintesti diseara'. "
|
||||
"Comenzi voce recunoscute: schimba vocea pe M1, M2, M3, M4, M5, "
|
||||
"F1, F2, F3, F4, F5. Exemple: vorbeste cu vocea M5, voce F3, "
|
||||
"treci pe vocea F1."
|
||||
),
|
||||
hotwords="Eco Echo Core Marius Bianca",
|
||||
condition_on_previous_text=False,
|
||||
)
|
||||
text_parts: list[str] = []
|
||||
@@ -677,6 +708,16 @@ class EchoVoiceSink(AudioSink):
|
||||
text = " ".join(text_parts).strip()
|
||||
if not text:
|
||||
return
|
||||
_append_stt_log({
|
||||
"ts": time.time(),
|
||||
"channel_id": self.session.voice_channel_id,
|
||||
"user_id": int(user_id),
|
||||
"text": text,
|
||||
"no_speech_prob": round(worst_no_speech, 3),
|
||||
"audio_duration_s": round(audio_duration_s, 3),
|
||||
"stt_latency_s": round(time.monotonic() - t_start, 3),
|
||||
"model": "small",
|
||||
})
|
||||
self._schedule_segment_done(user_id, text, worst_no_speech)
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("Whisper transcribe failed: %s", e)
|
||||
|
||||
Reference in New Issue
Block a user