stage-9: heartbeat system with periodic checks

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
MoltBot Service
2026-02-13 16:40:39 +00:00
parent 24a4d87f8c
commit 0bc4b8cb3e
6 changed files with 527 additions and 1 deletions

10
cli.py
View File

@@ -405,6 +405,12 @@ def _cron_disable(name: str):
print(f"Job '{name}' not found.")
def cmd_heartbeat(args):
"""Run heartbeat health checks."""
from src.heartbeat import run_heartbeat
print(run_heartbeat())
def cmd_secrets(args):
"""Handle secrets subcommand."""
if args.secrets_action == "set":
@@ -509,6 +515,9 @@ def main():
secrets_sub.add_parser("test", help="Check required secrets")
# heartbeat
sub.add_parser("heartbeat", help="Run heartbeat health checks")
# cron
cron_parser = sub.add_parser("cron", help="Manage scheduled jobs")
cron_sub = cron_parser.add_subparsers(dest="cron_action")
@@ -554,6 +563,7 @@ def main():
cmd_channel(a) if a.channel_action else (channel_parser.print_help() or sys.exit(0))
),
"send": cmd_send,
"heartbeat": cmd_heartbeat,
"cron": lambda a: (
cmd_cron(a) if a.cron_action else (cron_parser.print_help() or sys.exit(0))
),

View File

@@ -10,5 +10,6 @@
"2026-02-02": "15:00 UTC - Email OK (nimic nou). Cron jobs funcționale toată ziua.",
"2026-02-03": "12:00 UTC - Calendar: sesiune 15:00 alertată. Emailuri răspuns rapoarte în inbox (deja read).",
"2026-02-04": "06:00 UTC - Toate emailurile deja citite. KB index la zi. Upcoming: morning-report 08:30."
}
},
"last_run": "2026-02-13T16:23:07.411969+00:00"
}

View File

@@ -123,6 +123,7 @@ def create_bot(config: Config) -> discord.Client:
"`/model <choice>` — Change model for this channel's session",
"`/logs [n]` — Show last N log lines (default 10)",
"`/restart` — Restart the bot process (owner only)",
"`/heartbeat` — Run heartbeat health checks",
"",
"**Cron Jobs**",
"`/cron list` — List all scheduled jobs",
@@ -400,6 +401,18 @@ def create_bot(config: Config) -> discord.Client:
tree.add_command(cron_group)
@tree.command(name="heartbeat", description="Run heartbeat health checks")
async def heartbeat_cmd(interaction: discord.Interaction) -> None:
from src.heartbeat import run_heartbeat
await interaction.response.defer(ephemeral=True)
try:
result = await asyncio.to_thread(run_heartbeat)
await interaction.followup.send(result, ephemeral=True)
except Exception as e:
await interaction.followup.send(
f"Heartbeat error: {e}", ephemeral=True
)
@tree.command(name="channels", description="List registered channels")
async def channels(interaction: discord.Interaction) -> None:
ch_map = config.get("channels", {})

163
src/heartbeat.py Normal file
View File

@@ -0,0 +1,163 @@
"""Echo Core heartbeat — periodic health checks."""
import json
import logging
import subprocess
from datetime import datetime, timezone
from pathlib import Path
log = logging.getLogger(__name__)
PROJECT_ROOT = Path(__file__).resolve().parent.parent
STATE_FILE = PROJECT_ROOT / "memory" / "heartbeat-state.json"
TOOLS_DIR = PROJECT_ROOT / "tools"
def run_heartbeat(quiet_hours: tuple[int, int] = (23, 8)) -> str:
"""Run all heartbeat checks. Returns summary string.
During quiet hours, returns "HEARTBEAT_OK" unless something critical.
"""
now = datetime.now(timezone.utc)
hour = datetime.now().hour # local hour
is_quiet = _is_quiet_hour(hour, quiet_hours)
state = _load_state()
results = []
# Check 1: Email
email_result = _check_email(state)
if email_result:
results.append(email_result)
# Check 2: Calendar
cal_result = _check_calendar(state)
if cal_result:
results.append(cal_result)
# Check 3: KB index freshness
kb_result = _check_kb_index()
if kb_result:
results.append(kb_result)
# Check 4: Git status
git_result = _check_git()
if git_result:
results.append(git_result)
# Update state
state["last_run"] = now.isoformat()
_save_state(state)
if not results:
return "HEARTBEAT_OK"
if is_quiet:
return "HEARTBEAT_OK"
return " | ".join(results)
def _is_quiet_hour(hour: int, quiet_hours: tuple[int, int]) -> bool:
"""Check if current hour is in quiet range. Handles overnight (23-08)."""
start, end = quiet_hours
if start > end: # overnight
return hour >= start or hour < end
return start <= hour < end
def _check_email(state: dict) -> str | None:
"""Check for new emails via tools/email_check.py."""
script = TOOLS_DIR / "email_check.py"
if not script.exists():
return None
try:
result = subprocess.run(
["python3", str(script)],
capture_output=True, text=True, timeout=30,
cwd=str(PROJECT_ROOT)
)
if result.returncode == 0:
output = result.stdout.strip()
if output and output != "0":
return f"Email: {output}"
return None
except Exception as e:
log.warning(f"Email check failed: {e}")
return None
def _check_calendar(state: dict) -> str | None:
"""Check upcoming calendar events via tools/calendar_check.py."""
script = TOOLS_DIR / "calendar_check.py"
if not script.exists():
return None
try:
result = subprocess.run(
["python3", str(script), "soon"],
capture_output=True, text=True, timeout=30,
cwd=str(PROJECT_ROOT)
)
if result.returncode == 0:
output = result.stdout.strip()
if output:
return f"Calendar: {output}"
return None
except Exception as e:
log.warning(f"Calendar check failed: {e}")
return None
def _check_kb_index() -> str | None:
"""Check if any .md files in memory/kb/ are newer than index.json."""
index_file = PROJECT_ROOT / "memory" / "kb" / "index.json"
if not index_file.exists():
return "KB: index missing"
index_mtime = index_file.stat().st_mtime
kb_dir = PROJECT_ROOT / "memory" / "kb"
newer = 0
for md in kb_dir.rglob("*.md"):
if md.stat().st_mtime > index_mtime:
newer += 1
if newer > 0:
return f"KB: {newer} files need reindex"
return None
def _check_git() -> str | None:
"""Check for uncommitted files in project."""
try:
result = subprocess.run(
["git", "status", "--porcelain"],
capture_output=True, text=True, timeout=10,
cwd=str(PROJECT_ROOT)
)
if result.returncode == 0:
lines = [l for l in result.stdout.strip().split("\n") if l.strip()]
if lines:
return f"Git: {len(lines)} uncommitted"
return None
except Exception:
return None
def _load_state() -> dict:
"""Load heartbeat state from JSON file."""
if STATE_FILE.exists():
try:
return json.loads(STATE_FILE.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
pass
return {"last_run": None, "checks": {}}
def _save_state(state: dict) -> None:
"""Save heartbeat state to JSON file."""
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
STATE_FILE.write_text(
json.dumps(state, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8"
)

View File

@@ -64,6 +64,33 @@ def main():
scheduler = Scheduler(send_callback=_send_to_channel, config=config)
client.scheduler = scheduler # type: ignore[attr-defined]
# Heartbeat: register as periodic job if enabled
hb_config = config.get("heartbeat", {})
if hb_config.get("enabled"):
from src.heartbeat import run_heartbeat
interval_min = hb_config.get("interval_minutes", 30)
async def _heartbeat_tick() -> None:
"""Run heartbeat and log result."""
try:
result = await asyncio.to_thread(run_heartbeat)
logger.info("Heartbeat: %s", result)
except Exception as exc:
logger.error("Heartbeat failed: %s", exc)
from apscheduler.triggers.interval import IntervalTrigger
scheduler._scheduler.add_job(
_heartbeat_tick,
trigger=IntervalTrigger(minutes=interval_min),
id="__heartbeat__",
max_instances=1,
)
logger.info(
"Heartbeat registered (every %d min)", interval_min
)
# PID file
PID_FILE.write_text(str(os.getpid()))

312
tests/test_heartbeat.py Normal file
View File

@@ -0,0 +1,312 @@
"""Tests for src/heartbeat.py — Periodic health checks."""
import json
import time
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from src.heartbeat import (
_check_calendar,
_check_email,
_check_git,
_check_kb_index,
_is_quiet_hour,
_load_state,
_save_state,
run_heartbeat,
)
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def tmp_env(tmp_path, monkeypatch):
"""Redirect PROJECT_ROOT, STATE_FILE, TOOLS_DIR to tmp_path."""
root = tmp_path / "project"
root.mkdir()
tools = root / "tools"
tools.mkdir()
mem = root / "memory"
mem.mkdir()
state_file = mem / "heartbeat-state.json"
monkeypatch.setattr("src.heartbeat.PROJECT_ROOT", root)
monkeypatch.setattr("src.heartbeat.STATE_FILE", state_file)
monkeypatch.setattr("src.heartbeat.TOOLS_DIR", tools)
return {"root": root, "tools": tools, "memory": mem, "state_file": state_file}
# ---------------------------------------------------------------------------
# _is_quiet_hour
# ---------------------------------------------------------------------------
class TestIsQuietHour:
"""Test quiet hour detection with overnight and daytime ranges."""
def test_overnight_range_before_midnight(self):
assert _is_quiet_hour(23, (23, 8)) is True
def test_overnight_range_after_midnight(self):
assert _is_quiet_hour(3, (23, 8)) is True
def test_overnight_range_outside(self):
assert _is_quiet_hour(12, (23, 8)) is False
def test_overnight_range_at_end_boundary(self):
# hour == end is NOT quiet (end is exclusive)
assert _is_quiet_hour(8, (23, 8)) is False
def test_daytime_range_inside(self):
assert _is_quiet_hour(12, (9, 17)) is True
def test_daytime_range_at_start(self):
assert _is_quiet_hour(9, (9, 17)) is True
def test_daytime_range_at_end(self):
assert _is_quiet_hour(17, (9, 17)) is False
def test_daytime_range_outside(self):
assert _is_quiet_hour(20, (9, 17)) is False
# ---------------------------------------------------------------------------
# _check_email
# ---------------------------------------------------------------------------
class TestCheckEmail:
"""Test email check via tools/email_check.py."""
def test_no_script(self, tmp_env):
"""Returns None when email_check.py does not exist."""
assert _check_email({}) is None
def test_with_output(self, tmp_env):
"""Returns formatted email string when script outputs something."""
script = tmp_env["tools"] / "email_check.py"
script.write_text("pass")
mock_result = MagicMock(returncode=0, stdout="3 new messages\n")
with patch("src.heartbeat.subprocess.run", return_value=mock_result):
assert _check_email({}) == "Email: 3 new messages"
def test_zero_output(self, tmp_env):
"""Returns None when script outputs '0' (no new mail)."""
script = tmp_env["tools"] / "email_check.py"
script.write_text("pass")
mock_result = MagicMock(returncode=0, stdout="0\n")
with patch("src.heartbeat.subprocess.run", return_value=mock_result):
assert _check_email({}) is None
def test_empty_output(self, tmp_env):
"""Returns None when script outputs empty string."""
script = tmp_env["tools"] / "email_check.py"
script.write_text("pass")
mock_result = MagicMock(returncode=0, stdout="\n")
with patch("src.heartbeat.subprocess.run", return_value=mock_result):
assert _check_email({}) is None
def test_nonzero_returncode(self, tmp_env):
"""Returns None when script exits with error."""
script = tmp_env["tools"] / "email_check.py"
script.write_text("pass")
mock_result = MagicMock(returncode=1, stdout="error")
with patch("src.heartbeat.subprocess.run", return_value=mock_result):
assert _check_email({}) is None
def test_subprocess_exception(self, tmp_env):
"""Returns None when subprocess raises (e.g. timeout)."""
script = tmp_env["tools"] / "email_check.py"
script.write_text("pass")
with patch("src.heartbeat.subprocess.run", side_effect=TimeoutError):
assert _check_email({}) is None
# ---------------------------------------------------------------------------
# _check_calendar
# ---------------------------------------------------------------------------
class TestCheckCalendar:
"""Test calendar check via tools/calendar_check.py."""
def test_no_script(self, tmp_env):
"""Returns None when calendar_check.py does not exist."""
assert _check_calendar({}) is None
def test_with_events(self, tmp_env):
"""Returns formatted calendar string when script outputs events."""
script = tmp_env["tools"] / "calendar_check.py"
script.write_text("pass")
mock_result = MagicMock(returncode=0, stdout="Meeting at 3pm\n")
with patch("src.heartbeat.subprocess.run", return_value=mock_result):
assert _check_calendar({}) == "Calendar: Meeting at 3pm"
def test_empty_output(self, tmp_env):
"""Returns None when no upcoming events."""
script = tmp_env["tools"] / "calendar_check.py"
script.write_text("pass")
mock_result = MagicMock(returncode=0, stdout="\n")
with patch("src.heartbeat.subprocess.run", return_value=mock_result):
assert _check_calendar({}) is None
def test_subprocess_exception(self, tmp_env):
"""Returns None when subprocess raises."""
script = tmp_env["tools"] / "calendar_check.py"
script.write_text("pass")
with patch("src.heartbeat.subprocess.run", side_effect=OSError("fail")):
assert _check_calendar({}) is None
# ---------------------------------------------------------------------------
# _check_kb_index
# ---------------------------------------------------------------------------
class TestCheckKbIndex:
"""Test KB index freshness check."""
def test_missing_index(self, tmp_env):
"""Returns warning when index.json does not exist."""
assert _check_kb_index() == "KB: index missing"
def test_up_to_date(self, tmp_env):
"""Returns None when all .md files are older than index."""
kb_dir = tmp_env["root"] / "memory" / "kb"
kb_dir.mkdir(parents=True)
md_file = kb_dir / "notes.md"
md_file.write_text("old notes")
time.sleep(0.05)
index = kb_dir / "index.json"
index.write_text("{}")
assert _check_kb_index() is None
def test_needs_reindex(self, tmp_env):
"""Returns reindex warning when .md files are newer than index."""
kb_dir = tmp_env["root"] / "memory" / "kb"
kb_dir.mkdir(parents=True)
index = kb_dir / "index.json"
index.write_text("{}")
time.sleep(0.05)
md1 = kb_dir / "a.md"
md1.write_text("new")
md2 = kb_dir / "b.md"
md2.write_text("also new")
assert _check_kb_index() == "KB: 2 files need reindex"
# ---------------------------------------------------------------------------
# _check_git
# ---------------------------------------------------------------------------
class TestCheckGit:
"""Test git status check."""
def test_clean(self, tmp_env):
"""Returns None when working tree is clean."""
mock_result = MagicMock(returncode=0, stdout="\n")
with patch("src.heartbeat.subprocess.run", return_value=mock_result):
assert _check_git() is None
def test_dirty(self, tmp_env):
"""Returns uncommitted count when there are changes."""
mock_result = MagicMock(
returncode=0,
stdout=" M file1.py\n?? file2.py\n M file3.py\n",
)
with patch("src.heartbeat.subprocess.run", return_value=mock_result):
assert _check_git() == "Git: 3 uncommitted"
def test_subprocess_exception(self, tmp_env):
"""Returns None when git command fails."""
with patch("src.heartbeat.subprocess.run", side_effect=OSError):
assert _check_git() is None
# ---------------------------------------------------------------------------
# _load_state / _save_state
# ---------------------------------------------------------------------------
class TestState:
"""Test state persistence."""
def test_load_missing_file(self, tmp_env):
"""Returns default state when file does not exist."""
state = _load_state()
assert state == {"last_run": None, "checks": {}}
def test_round_trip(self, tmp_env):
"""State survives save then load."""
original = {"last_run": "2025-01-01T00:00:00", "checks": {"email": True}}
_save_state(original)
loaded = _load_state()
assert loaded == original
def test_load_corrupt_json(self, tmp_env):
"""Returns default state when JSON is corrupt."""
tmp_env["state_file"].write_text("not valid json {{{")
state = _load_state()
assert state == {"last_run": None, "checks": {}}
def test_save_creates_parent_dir(self, tmp_path, monkeypatch):
"""_save_state creates parent directory if missing."""
state_file = tmp_path / "deep" / "nested" / "state.json"
monkeypatch.setattr("src.heartbeat.STATE_FILE", state_file)
_save_state({"last_run": None, "checks": {}})
assert state_file.exists()
# ---------------------------------------------------------------------------
# run_heartbeat (integration)
# ---------------------------------------------------------------------------
class TestRunHeartbeat:
"""Test the top-level run_heartbeat orchestrator."""
def test_all_ok(self, tmp_env):
"""Returns HEARTBEAT_OK when all checks pass with no issues."""
with patch("src.heartbeat._check_email", return_value=None), \
patch("src.heartbeat._check_calendar", return_value=None), \
patch("src.heartbeat._check_kb_index", return_value=None), \
patch("src.heartbeat._check_git", return_value=None):
result = run_heartbeat()
assert result == "HEARTBEAT_OK"
def test_with_results(self, tmp_env):
"""Returns joined results when checks report issues."""
with patch("src.heartbeat._check_email", return_value="Email: 2 new"), \
patch("src.heartbeat._check_calendar", return_value=None), \
patch("src.heartbeat._check_kb_index", return_value="KB: 1 files need reindex"), \
patch("src.heartbeat._check_git", return_value=None), \
patch("src.heartbeat._is_quiet_hour", return_value=False):
result = run_heartbeat()
assert result == "Email: 2 new | KB: 1 files need reindex"
def test_quiet_hours_suppression(self, tmp_env):
"""Returns HEARTBEAT_OK during quiet hours even with issues."""
with patch("src.heartbeat._check_email", return_value="Email: 5 new"), \
patch("src.heartbeat._check_calendar", return_value="Calendar: meeting"), \
patch("src.heartbeat._check_kb_index", return_value=None), \
patch("src.heartbeat._check_git", return_value="Git: 2 uncommitted"), \
patch("src.heartbeat._is_quiet_hour", return_value=True):
result = run_heartbeat()
assert result == "HEARTBEAT_OK"
def test_saves_state_after_run(self, tmp_env):
"""State file is updated after heartbeat runs."""
with patch("src.heartbeat._check_email", return_value=None), \
patch("src.heartbeat._check_calendar", return_value=None), \
patch("src.heartbeat._check_kb_index", return_value=None), \
patch("src.heartbeat._check_git", return_value=None):
run_heartbeat()
state = json.loads(tmp_env["state_file"].read_text())
assert "last_run" in state
assert state["last_run"] is not None