Files
echo-core/src/newsletter_cercetasi.py
Marius Mutu 9d447b9ff1 fix(newsletter): use follow_redirects=False to avoid false positive on 404 redirect
Beehiiv redirects non-existent newsletters to /?404=... with HTTP 302.
With follow_redirects=True, the final 200 was misread as "newsletter exists".
Fix: disable redirect following so only a direct HTTP 200 = newsletter real.
Also reset state back to last_sent=13 (real).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 17:06:00 +00:00

139 lines
4.8 KiB
Python

"""Newsletter Cercetași checker — detects new editions and sends WhatsApp summaries."""
import asyncio
import json
import logging
import subprocess
from datetime import datetime, timezone
from pathlib import Path
from typing import Awaitable, Callable
import httpx
from src.claude_session import CLAUDE_BIN, PROJECT_ROOT, _safe_env, build_system_prompt
log = logging.getLogger("echo-core.newsletter-cercetasi")
NEWSLETTER_BASE_URL = "https://cercetaiis-newsletter.beehiiv.com/p/newsletter-{n}-din-{year}"
STATE_FILE = PROJECT_ROOT / "cron" / "newsletter-cercetasi-state.json"
KB_PROMPT_FILE = (
PROJECT_ROOT / "memory" / "kb" / "projects" / "grup-sprijin" / "prompt-newsletter-cercetasi.md"
)
CLAUDE_TIMEOUT = 300
def _read_state() -> dict:
try:
return json.loads(STATE_FILE.read_text())
except (FileNotFoundError, json.JSONDecodeError):
return {"last_sent": 0, "year": datetime.now(timezone.utc).year}
def _write_state(state: dict) -> None:
STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
STATE_FILE.write_text(json.dumps(state, indent=2, ensure_ascii=False) + "\n")
async def _newsletter_exists(n: int, year: int) -> bool:
"""Return True if newsletter #{n}/{year} exists (HTTP 200, no redirect to 404 page).
Beehiiv redirects non-existent newsletters: /p/newsletter-N-din-YEAR → /?404=... (302)
A real newsletter returns 200 directly without redirect.
"""
url = NEWSLETTER_BASE_URL.format(n=n, year=year)
try:
async with httpx.AsyncClient(follow_redirects=False) as client:
resp = await client.get(url, timeout=10)
return resp.status_code == 200
except Exception as e:
log.debug("Newsletter #%d/%d check failed: %s", n, year, e)
return False
def _generate_summary(n: int, year: int) -> str | None:
"""Run Claude CLI to generate summary for newsletter #{n}/{year}. Returns text or None."""
url = NEWSLETTER_BASE_URL.format(n=n, year=year)
try:
kb_prompt = KB_PROMPT_FILE.read_text()
except FileNotFoundError:
log.error("KB prompt file not found: %s", KB_PROMPT_FILE)
return None
prompt = (
f"Newsletter-ul Cercetașilor #{n}/{year} este disponibil la: {url}\n\n"
f"Urmează instrucțiunile de mai jos pentru a genera rezumatul:\n\n"
f"{kb_prompt}"
)
cmd = [
CLAUDE_BIN, "-p", prompt,
"--model", "sonnet",
"--output-format", "json",
"--allowedTools", "WebFetch",
]
try:
system_prompt = build_system_prompt()
cmd += ["--system-prompt", system_prompt]
except FileNotFoundError:
pass
try:
proc = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=CLAUDE_TIMEOUT,
env=_safe_env(),
cwd=PROJECT_ROOT,
)
if proc.returncode != 0:
log.error("Claude CLI error (exit %d): %s", proc.returncode, proc.stderr[:300])
return None
data = json.loads(proc.stdout)
return data.get("result", "").strip() or None
except subprocess.TimeoutExpired:
log.error("Claude CLI timed out for newsletter #%d", n)
return None
except (json.JSONDecodeError, Exception) as e:
log.error("Failed to generate newsletter summary: %s", e)
return None
async def check_and_send(config, send_callback) -> None:
"""Check for new newsletter; if found, generate summary and send via callback."""
state = _read_state()
current_year = datetime.now(timezone.utc).year
# New year → reset counter
if state.get("year", current_year) != current_year:
log.info("New year detected (%d%d), resetting newsletter counter", state["year"], current_year)
state = {"last_sent": 0, "year": current_year}
next_n = state["last_sent"] + 1
log.info("Checking for Cercetasi newsletter #%d/%d...", next_n, current_year)
if not await _newsletter_exists(next_n, current_year):
log.info("Newsletter #%d/%d not yet available", next_n, current_year)
return
log.info("Newsletter #%d/%d found! Generating summary...", next_n, current_year)
summary = await asyncio.to_thread(_generate_summary, next_n, current_year)
if not summary:
log.error("Failed to generate summary for newsletter #%d/%d", next_n, current_year)
return
channel = config.get("newsletter_cercetasi.channel", "echo-core")
try:
await send_callback(channel, summary)
state["last_sent"] = next_n
state["year"] = current_year
state["last_sent_at"] = datetime.now(timezone.utc).isoformat()
_write_state(state)
log.info("Newsletter #%d/%d summary sent to channel '%s'", next_n, current_year, channel)
except Exception as e:
log.error("Send failed for newsletter #%d/%d — will retry next check: %s", next_n, current_year, e)