feat(voice): unify Discord voice↔text session (squash of voice/text-unify)
Voice utterances and text messages on the same Discord channel now share one Claude session, and Echo's voice replies are mirrored back into the text channel. Replaces the old voice:<id> session-key split. Changes: - src/adapters/_text_chunks.py: new leaf module for split_message (used by both discord_bot and voice pipeline) - src/router.py: drop voice: prefix from session_key; add [voice] marker; strip leading [speaker:/[voice] tokens from user input (anti-jailbreak); remove dead double-clear of voice: key - src/claude_session.py: include personality/VOICE_MODE.md unconditionally (rules become per-turn-aware via [speaker:] prefix instead of session flag) - src/voice/pipeline.py: VoiceSession splits text_channel_id + voice_channel_id; resolve text channel per-send (no stale refs); mirror Echo's reply text into the text channel after route_message returns - src/adapters/discord_voice.py: /voice join passes both channel ids - src/adapters/discord_bot.py: import split_message from leaf module - personality/VOICE_MODE.md: rewrite as per-turn dynamic rules; add synthesis instructions for text turns after voice turns Tests: - tests/test_router.py: 4 new cases (plain channel_id, anti-jailbreak, text-adapter regression, no-double-clear) - tests/test_pipeline_mirror.py: new — Echo reply mirror chunking, empty guard, mirror_enabled=False, send-raises resilience - tests/test_voice_session_channel_ids.py: new — split-attr contract + metrics payload schema - tests/test_voice_session_cleanup.py: update for new kwargs Plan: /home/moltbot/.claude/plans/vreau-ca-tot-textul-greedy-rivest.md Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -128,9 +128,9 @@ class VoiceSession:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
channel_id: int,
|
||||
text_channel_id: int,
|
||||
voice_channel_id: int,
|
||||
guild_id: int,
|
||||
text_channel: Any,
|
||||
voice_client: Any,
|
||||
bot: Any,
|
||||
ttsq: Any,
|
||||
@@ -141,9 +141,9 @@ class VoiceSession:
|
||||
loop: Optional[asyncio.AbstractEventLoop] = None,
|
||||
router_route_message: Optional[Callable] = None,
|
||||
):
|
||||
self.channel_id = int(channel_id)
|
||||
self.text_channel_id = int(text_channel_id)
|
||||
self.voice_channel_id = int(voice_channel_id)
|
||||
self.guild_id = int(guild_id)
|
||||
self.text_channel = text_channel
|
||||
self.voice_client = voice_client
|
||||
self.bot = bot
|
||||
self.ttsq = ttsq
|
||||
@@ -256,6 +256,29 @@ class VoiceSession:
|
||||
|
||||
# ----- segment completion (scheduled from sink) -----
|
||||
|
||||
async def _resolve_text_channel(self) -> Any:
|
||||
"""Resolve the Discord text channel id to a fresh channel object.
|
||||
|
||||
Re-resolved per-send so a websocket resume that invalidates cached
|
||||
objects doesn't leave us with a stale reference.
|
||||
"""
|
||||
if self.bot is None:
|
||||
return None
|
||||
try:
|
||||
getter = getattr(self.bot, "get_channel", None)
|
||||
ch = getter(self.text_channel_id) if callable(getter) else None
|
||||
if ch is not None:
|
||||
return ch
|
||||
fetch = getattr(self.bot, "fetch_channel", None)
|
||||
if callable(fetch):
|
||||
coro = fetch(self.text_channel_id)
|
||||
if asyncio.iscoroutine(coro):
|
||||
return await coro
|
||||
return coro
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("voice text_channel resolve failed: %s", e)
|
||||
return None
|
||||
|
||||
async def on_segment_done(
|
||||
self,
|
||||
speaker_id: int,
|
||||
@@ -281,10 +304,11 @@ class VoiceSession:
|
||||
await self._handle_voice_change(speaker_name, text, new_voice)
|
||||
return
|
||||
|
||||
# 1. Mirror to text channel (one Unicode 🎤 — exception per plan).
|
||||
if self.mirror_enabled and self.text_channel is not None:
|
||||
# 1. Mirror user utterance to text channel.
|
||||
text_channel = await self._resolve_text_channel() if self.mirror_enabled else None
|
||||
if self.mirror_enabled and text_channel is not None:
|
||||
try:
|
||||
send = getattr(self.text_channel, "send", None)
|
||||
send = getattr(text_channel, "send", None)
|
||||
if callable(send):
|
||||
coro = send(f"\U0001f3a4 {speaker_name}: \"{text}\"")
|
||||
if asyncio.iscoroutine(coro):
|
||||
@@ -321,19 +345,39 @@ class VoiceSession:
|
||||
|
||||
# Dispatch to Claude. send_message is sync subprocess, run on
|
||||
# a worker thread so the loop stays responsive for mirror/TTS.
|
||||
response_text = ""
|
||||
try:
|
||||
await asyncio.to_thread(
|
||||
result = await asyncio.to_thread(
|
||||
self._route_message,
|
||||
str(self.channel_id),
|
||||
str(self.text_channel_id),
|
||||
str(speaker_id),
|
||||
text,
|
||||
None, # model
|
||||
voice_stream_callback, # on_text
|
||||
"discord-voice", # adapter_name
|
||||
)
|
||||
if isinstance(result, tuple) and result:
|
||||
response_text = result[0] or ""
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.error("route_message voice path failed: %s", e)
|
||||
|
||||
# 3. Mirror Echo's reply back into the text channel so voice + text
|
||||
# stay aligned. Resolved per-send to avoid stale refs after reconnect.
|
||||
if self.mirror_enabled and response_text and response_text.strip():
|
||||
reply_channel = await self._resolve_text_channel()
|
||||
if reply_channel is not None:
|
||||
from src.adapters._text_chunks import split_message
|
||||
try:
|
||||
for chunk in split_message(response_text):
|
||||
send = getattr(reply_channel, "send", None)
|
||||
if not callable(send):
|
||||
break
|
||||
coro = send(chunk)
|
||||
if asyncio.iscoroutine(coro):
|
||||
await coro
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("voice echo-reply mirror send failed: %s", e)
|
||||
|
||||
async def _handle_voice_change(
|
||||
self, speaker_name: str, original_text: str, new_voice: str,
|
||||
) -> None:
|
||||
@@ -354,18 +398,20 @@ class VoiceSession:
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("voice default persist failed: %s", e)
|
||||
# 3. Mirror what was heard + show the swap in the text channel.
|
||||
if self.mirror_enabled and self.text_channel is not None:
|
||||
try:
|
||||
send = getattr(self.text_channel, "send", None)
|
||||
if callable(send):
|
||||
coro = send(
|
||||
f"\U0001f3a4 {speaker_name}: \"{original_text}\"\n"
|
||||
f"\U0001f50a Voce → **{new_voice}**"
|
||||
)
|
||||
if asyncio.iscoroutine(coro):
|
||||
await coro
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("voice mirror send failed: %s", e)
|
||||
if self.mirror_enabled:
|
||||
text_channel = await self._resolve_text_channel()
|
||||
if text_channel is not None:
|
||||
try:
|
||||
send = getattr(text_channel, "send", None)
|
||||
if callable(send):
|
||||
coro = send(
|
||||
f"\U0001f3a4 {speaker_name}: \"{original_text}\"\n"
|
||||
f"\U0001f50a Voce → **{new_voice}**"
|
||||
)
|
||||
if asyncio.iscoroutine(coro):
|
||||
await coro
|
||||
except Exception as e: # noqa: BLE001
|
||||
log.warning("voice mirror send failed: %s", e)
|
||||
# 4. Verbal acknowledgment in the NEW voice.
|
||||
try:
|
||||
self.ttsq.push_text(f"Vocea {new_voice}.")
|
||||
@@ -391,8 +437,18 @@ class VoiceSession:
|
||||
return str(speaker_id)
|
||||
|
||||
def _log_metric(self, payload: dict) -> None:
|
||||
"""Append a structured event to ``logs/voice_metrics.jsonl``."""
|
||||
event = {"ts": time.time(), "channel_id": self.channel_id, **payload}
|
||||
"""Append a structured event to ``logs/voice_metrics.jsonl``.
|
||||
|
||||
``claude_session_key`` is the channel id used to key the unified
|
||||
Claude session (text channel where the user invoked /voice join);
|
||||
``voice_channel_id`` is the actual Discord voice channel id.
|
||||
"""
|
||||
event = {
|
||||
"ts": time.time(),
|
||||
"claude_session_key": str(self.text_channel_id),
|
||||
"voice_channel_id": self.voice_channel_id,
|
||||
**payload,
|
||||
}
|
||||
try:
|
||||
LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with open(VOICE_METRICS_PATH, "a", buffering=1, encoding="utf-8") as f:
|
||||
|
||||
Reference in New Issue
Block a user