Beehiiv redirects non-existent newsletters to /?404=... with HTTP 302. With follow_redirects=True, the final 200 was misread as "newsletter exists". Fix: disable redirect following so only a direct HTTP 200 = newsletter real. Also reset state back to last_sent=13 (real). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
139 lines
4.8 KiB
Python
139 lines
4.8 KiB
Python
"""Newsletter Cercetași checker — detects new editions and sends WhatsApp summaries."""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import subprocess
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Awaitable, Callable
|
|
|
|
import httpx
|
|
|
|
from src.claude_session import CLAUDE_BIN, PROJECT_ROOT, _safe_env, build_system_prompt
|
|
|
|
log = logging.getLogger("echo-core.newsletter-cercetasi")
|
|
|
|
NEWSLETTER_BASE_URL = "https://cercetaiis-newsletter.beehiiv.com/p/newsletter-{n}-din-{year}"
|
|
STATE_FILE = PROJECT_ROOT / "cron" / "newsletter-cercetasi-state.json"
|
|
KB_PROMPT_FILE = (
|
|
PROJECT_ROOT / "memory" / "kb" / "projects" / "grup-sprijin" / "prompt-newsletter-cercetasi.md"
|
|
)
|
|
CLAUDE_TIMEOUT = 300
|
|
|
|
|
|
def _read_state() -> dict:
|
|
try:
|
|
return json.loads(STATE_FILE.read_text())
|
|
except (FileNotFoundError, json.JSONDecodeError):
|
|
return {"last_sent": 0, "year": datetime.now(timezone.utc).year}
|
|
|
|
|
|
def _write_state(state: dict) -> None:
|
|
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
STATE_FILE.write_text(json.dumps(state, indent=2, ensure_ascii=False) + "\n")
|
|
|
|
|
|
async def _newsletter_exists(n: int, year: int) -> bool:
|
|
"""Return True if newsletter #{n}/{year} exists (HTTP 200, no redirect to 404 page).
|
|
|
|
Beehiiv redirects non-existent newsletters: /p/newsletter-N-din-YEAR → /?404=... (302)
|
|
A real newsletter returns 200 directly without redirect.
|
|
"""
|
|
url = NEWSLETTER_BASE_URL.format(n=n, year=year)
|
|
try:
|
|
async with httpx.AsyncClient(follow_redirects=False) as client:
|
|
resp = await client.get(url, timeout=10)
|
|
return resp.status_code == 200
|
|
except Exception as e:
|
|
log.debug("Newsletter #%d/%d check failed: %s", n, year, e)
|
|
return False
|
|
|
|
|
|
def _generate_summary(n: int, year: int) -> str | None:
|
|
"""Run Claude CLI to generate summary for newsletter #{n}/{year}. Returns text or None."""
|
|
url = NEWSLETTER_BASE_URL.format(n=n, year=year)
|
|
|
|
try:
|
|
kb_prompt = KB_PROMPT_FILE.read_text()
|
|
except FileNotFoundError:
|
|
log.error("KB prompt file not found: %s", KB_PROMPT_FILE)
|
|
return None
|
|
|
|
prompt = (
|
|
f"Newsletter-ul Cercetașilor #{n}/{year} este disponibil la: {url}\n\n"
|
|
f"Urmează instrucțiunile de mai jos pentru a genera rezumatul:\n\n"
|
|
f"{kb_prompt}"
|
|
)
|
|
|
|
cmd = [
|
|
CLAUDE_BIN, "-p", prompt,
|
|
"--model", "sonnet",
|
|
"--output-format", "json",
|
|
"--allowedTools", "WebFetch",
|
|
]
|
|
|
|
try:
|
|
system_prompt = build_system_prompt()
|
|
cmd += ["--system-prompt", system_prompt]
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
try:
|
|
proc = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=CLAUDE_TIMEOUT,
|
|
env=_safe_env(),
|
|
cwd=PROJECT_ROOT,
|
|
)
|
|
if proc.returncode != 0:
|
|
log.error("Claude CLI error (exit %d): %s", proc.returncode, proc.stderr[:300])
|
|
return None
|
|
data = json.loads(proc.stdout)
|
|
return data.get("result", "").strip() or None
|
|
except subprocess.TimeoutExpired:
|
|
log.error("Claude CLI timed out for newsletter #%d", n)
|
|
return None
|
|
except (json.JSONDecodeError, Exception) as e:
|
|
log.error("Failed to generate newsletter summary: %s", e)
|
|
return None
|
|
|
|
|
|
|
|
async def check_and_send(config, send_callback) -> None:
|
|
"""Check for new newsletter; if found, generate summary and send via callback."""
|
|
state = _read_state()
|
|
current_year = datetime.now(timezone.utc).year
|
|
|
|
# New year → reset counter
|
|
if state.get("year", current_year) != current_year:
|
|
log.info("New year detected (%d → %d), resetting newsletter counter", state["year"], current_year)
|
|
state = {"last_sent": 0, "year": current_year}
|
|
|
|
next_n = state["last_sent"] + 1
|
|
log.info("Checking for Cercetasi newsletter #%d/%d...", next_n, current_year)
|
|
|
|
if not await _newsletter_exists(next_n, current_year):
|
|
log.info("Newsletter #%d/%d not yet available", next_n, current_year)
|
|
return
|
|
|
|
log.info("Newsletter #%d/%d found! Generating summary...", next_n, current_year)
|
|
|
|
summary = await asyncio.to_thread(_generate_summary, next_n, current_year)
|
|
if not summary:
|
|
log.error("Failed to generate summary for newsletter #%d/%d", next_n, current_year)
|
|
return
|
|
|
|
channel = config.get("newsletter_cercetasi.channel", "echo-core")
|
|
try:
|
|
await send_callback(channel, summary)
|
|
state["last_sent"] = next_n
|
|
state["year"] = current_year
|
|
state["last_sent_at"] = datetime.now(timezone.utc).isoformat()
|
|
_write_state(state)
|
|
log.info("Newsletter #%d/%d summary sent to channel '%s'", next_n, current_year, channel)
|
|
except Exception as e:
|
|
log.error("Send failed for newsletter #%d/%d — will retry next check: %s", next_n, current_year, e)
|