#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ review_queue.py — CLI for the needs_review lifecycle (plan §5c). Rows land in the queue when dedup leaves a borderline pair separate, or when a legacy `.doc` source was converted imperfectly. Each row has a stable content key; a decision written here is stored in data/review_decisions.json (git tracked) and re-applied by build_database.py on every rebuild, so the queue never resurfaces a resolved row. Commands: python scripts/review_queue.py list python scripts/review_queue.py resolve """ from __future__ import annotations import argparse import json import sqlite3 import sys from pathlib import Path from typing import Optional SCRIPT_DIR = Path(__file__).resolve().parent REPO_ROOT = SCRIPT_DIR.parent for _p in (str(SCRIPT_DIR), str(REPO_ROOT)): if _p not in sys.path: sys.path.insert(0, _p) from import_common import content_key, normalize_name # noqa: E402 VALID_DECISIONS = ("merge", "keep-separate", "drop") # -------------------------------------------------------------------------- # review_decisions.json # -------------------------------------------------------------------------- def load_decisions(path: Path) -> dict: if path.is_file(): try: data = json.loads(path.read_text(encoding="utf-8")) if isinstance(data, dict): return data except (json.JSONDecodeError, OSError): pass return {} def save_decisions(decisions: dict, path: Path) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text( json.dumps(decisions, indent=2, ensure_ascii=False, sort_keys=True), encoding="utf-8", ) # -------------------------------------------------------------------------- # queue # -------------------------------------------------------------------------- def list_queue(db_path: Path) -> list[dict]: """Return every needs_review row in the current DB, with its content key.""" if not db_path.is_file(): return [] conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row try: rows = conn.execute( "SELECT name, normalized_name, language, description " "FROM activities WHERE needs_review = 1 ORDER BY normalized_name" ).fetchall() except sqlite3.OperationalError: return [] finally: conn.close() out = [] for row in rows: norm = row["normalized_name"] or normalize_name(row["name"]) key = content_key(norm, row["language"], row["description"] or "") out.append({ "id": key, "name": row["name"], "language": row["language"], "description": row["description"] or "", }) return out def resolve(decisions_path: Path, content_id: str, decision: str) -> dict: """Record a decision for a content key in review_decisions.json.""" if decision not in VALID_DECISIONS: raise ValueError( f"invalid decision {decision!r}; expected one of {VALID_DECISIONS}" ) decisions = load_decisions(decisions_path) decisions[content_id] = {"decision": decision} save_decisions(decisions, decisions_path) return decisions # -------------------------------------------------------------------------- # CLI # -------------------------------------------------------------------------- def main(argv: Optional[list[str]] = None) -> int: parser = argparse.ArgumentParser(description="needs_review queue CLI") parser.add_argument("--db", default="data/activities.db") parser.add_argument("--decisions", default="data/review_decisions.json") sub = parser.add_subparsers(dest="command", required=True) sub.add_parser("list", help="list rows currently flagged needs_review") p_resolve = sub.add_parser("resolve", help="record a decision for a row") p_resolve.add_argument("id", help="content id from `list`") p_resolve.add_argument("decision", choices=VALID_DECISIONS) args = parser.parse_args(argv) if args.command == "list": rows = list_queue(Path(args.db)) if not rows: print("review queue is empty.") return 0 print(f"{len(rows)} row(s) need review:\n") for r in rows: desc = r["description"][:80].replace("\n", " ") print(f" id : {r['id']}") print(f" name : {r['name']} [{r['language']}]") print(f" desc : {desc}") print(f" -> review_queue.py resolve {r['id']} ") print() return 0 if args.command == "resolve": resolve(Path(args.decisions), args.id, args.decision) print(f"recorded: {args.id} -> {args.decision}") print(f"written to {args.decisions} (applied on next build_database --rebuild)") return 0 return 1 if __name__ == "__main__": raise SystemExit(main())