Faza 1 complete: bilingual+enrichment plumbing, UI/filters, frozen DB

Extraction finished (575/588 chunks; 6 content-filter-blocked, 7 await re-extraction). DB rebuilt and frozen at 9418 activities — content_keys are now stable for the enrichment overlay. Part A (plumbing + UI): - database.py: name_ro/description_ro/rules_ro/variations_ro, indoor_outdoor, space_needed, estimated_fields, source_id/source_ids/chunk_key columns; FTS5 indexes the 4 *_ro columns across CREATE + all 3 triggers; new equality filters + category counts for both axes. - activity.py: new fields + bilingual display helpers (get_display_*, is_estimated, axis displays). - config_taxonomy.py: INDOOR_OUTDOOR/SPACE_NEEDED enums + normalizers (None on unrecognised, no fabrication). - search.py / routes.py / config.py / templates / css: new dropdowns, RO-primary rendering with "(estimat)" markers and collapsible original text, and a /source/<id> download route shipped DARK behind SOURCE_DOWNLOAD_ENABLED (copyright opt-in). - build_database.py: source_id/chunk_key in dict_to_activity; merge_cluster unions source_ids without touching enrichment fields. Part B (enrichment pipeline, built not yet run): - build_database.py: load_enrichment + apply_enrichment (post-dedup, keyed on content_key) + --enrichment CLI + stated-vs-estimated QA. - run_enrichment.py (resumable, --source/--limit pilot scoping, --collect), ENRICHMENT_PROMPT.md. Repair: scripts/repair_extractions.py fixes the subagents' systematic unescaped-ASCII-quote bug with a faithful char-scanner (escapes, never truncates) + schema validation + a strictly-more-text guard. json_repair was tried first, truncated silently, and is NOT used. build_database has no repair dependency. Tests: tests/test_enrichment.py added; 99 pass. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 18:10:13 +00:00
parent 46d9592a55
commit bcfb6841eb
18 changed files with 1579 additions and 167 deletions
--- a/tests/test_enrichment.py
+++ b/tests/test_enrichment.py
@@ -0,0 +1,231 @@
+"""
+Tests for the enrichment overlay (plan Part B) and the new filter axes /
+bilingual display helpers (plan Part A).
+
+Covers:
+  * config_taxonomy.normalize_indoor_outdoor / normalize_space_needed
+  * build_database.apply_enrichment keying, field application, estimated tally
+  * DatabaseManager indoor_outdoor / space_needed equality filters
+  * FTS5 indexing of the *_ro columns
+  * Activity bilingual display helpers
+"""
+
+import os
+import sys
+
+import pytest
+
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if PROJECT_ROOT not in sys.path:
+    sys.path.insert(0, PROJECT_ROOT)
+SCRIPTS = os.path.join(PROJECT_ROOT, "scripts")
+if SCRIPTS not in sys.path:
+    sys.path.insert(0, SCRIPTS)
+
+from app.models.activity import Activity  # noqa: E402
+from app.models.database import DatabaseManager  # noqa: E402
+from app.config_taxonomy import (  # noqa: E402
+    normalize_indoor_outdoor,
+    normalize_space_needed,
+)
+from import_common import content_key, normalize_name  # noqa: E402
+from build_database import apply_enrichment  # noqa: E402
+
+
+# --------------------------------------------------------------------------
+# taxonomy normalizers
+# --------------------------------------------------------------------------
+@pytest.mark.parametrize("raw,expected", [
+    ("indoor", "indoor"),
+    ("Outdoor", "outdoor"),
+    ("either", "either"),
+    ("interior", "indoor"),
+    ("aer liber", "outdoor"),
+    ("both", "either"),
+    ("", None),
+    ("nonsense", None),
+    (None, None),
+])
+def test_normalize_indoor_outdoor(raw, expected):
+    assert normalize_indoor_outdoor(raw) == expected
+
+
+@pytest.mark.parametrize("raw,expected", [
+    ("mic", "mic"),
+    ("MEDIU", "mediu"),
+    ("mare", "mare"),
+    ("small", "mic"),
+    ("large", "mare"),
+    ("", None),
+    ("huge", None),
+    (None, None),
+])
+def test_normalize_space_needed(raw, expected):
+    assert normalize_space_needed(raw) == expected
+
+
+# --------------------------------------------------------------------------
+# apply_enrichment
+# --------------------------------------------------------------------------
+def _activity(name="Joc de test", description="O descriere de test.", language="ro"):
+    return Activity(
+        name=name, description=description, category="team-building",
+        content_type="joc", source_file="t.txt", language=language,
+    )
+
+
+def _key_for(act: Activity) -> str:
+    return content_key(
+        act.normalized_name or normalize_name(act.name),
+        act.language,
+        act.description or "",
+    )
+
+
+def test_apply_enrichment_matches_and_applies_fields():
+    act = _activity()
+    key = _key_for(act)
+    enrichment = {
+        key: {
+            "name_ro": "Joc de test (RO)",
+            "description_ro": "Descriere îmbogățită în română.",
+            "indoor_outdoor": "outdoor",
+            "space_needed": "mediu",
+            "participants_min": 4,
+            "participants_max": 12,
+            "estimated_fields": ["space_needed", "participants_min", "participants_max"],
+        }
+    }
+    stats = apply_enrichment([act], enrichment)
+
+    assert act.name_ro == "Joc de test (RO)"
+    assert act.description_ro == "Descriere îmbogățită în română."
+    assert act.indoor_outdoor == "outdoor"
+    assert act.space_needed == "mediu"
+    assert act.participants_min == 4 and act.participants_max == 12
+    assert set(act.estimated_fields) == {"space_needed", "participants_min", "participants_max"}
+
+    assert stats["entries"] == 1
+    assert stats["matched"] == 1
+    assert stats["orphaned"] == 0
+    # indoor_outdoor stated, space_needed estimated
+    assert stats["fields_stated"].get("indoor_outdoor") == 1
+    assert stats["fields_estimated"].get("space_needed") == 1
+
+
+def test_apply_enrichment_orphan_entry_counted():
+    act = _activity()
+    enrichment = {"deadbeef" * 5: {"name_ro": "nu se potrivește"}}
+    stats = apply_enrichment([act], enrichment)
+    assert stats["matched"] == 0
+    assert stats["orphaned"] == 1
+    assert act.name_ro is None  # untouched
+
+
+def test_apply_enrichment_absent_fields_leave_value_untouched():
+    act = _activity()
+    act.participants_min = 5
+    key = _key_for(act)
+    # entry only translates name; participants must be preserved
+    apply_enrichment([act], {key: {"name_ro": "Tradus"}})
+    assert act.participants_min == 5
+    assert act.name_ro == "Tradus"
+
+
+def test_apply_enrichment_drops_unrecognised_enum():
+    act = _activity()
+    key = _key_for(act)
+    apply_enrichment([act], {key: {"indoor_outdoor": "spaceship", "space_needed": "mic"}})
+    assert act.indoor_outdoor is None       # unrecognised → dropped
+    assert act.space_needed == "mic"
+
+
+# --------------------------------------------------------------------------
+# DB equality filters + FTS on *_ro
+# --------------------------------------------------------------------------
+@pytest.fixture
+def db(tmp_path):
+    return DatabaseManager(str(tmp_path / "enrich.db"))
+
+
+def _insert(db, **overrides):
+    base = dict(
+        name="Activitate", description="desc", category="camp-outdoor",
+        content_type="joc", source_file="t.txt", language="ro",
+    )
+    base.update(overrides)
+    return db.insert_activity(Activity(**base))
+
+
+def test_indoor_outdoor_equality_filter(db):
+    _insert(db, name="In casa", indoor_outdoor="indoor")
+    _insert(db, name="Afara", indoor_outdoor="outdoor")
+    res = db.search_activities(indoor_outdoor="outdoor")
+    assert len(res) == 1
+    assert res[0]["name"] == "Afara"
+
+
+def test_space_needed_equality_filter(db):
+    _insert(db, name="Mic", space_needed="mic")
+    _insert(db, name="Mare", space_needed="mare")
+    res = db.search_activities(space_needed="mare")
+    assert len(res) == 1
+    assert res[0]["name"] == "Mare"
+
+
+def test_fts_indexes_name_ro(db):
+    _insert(db, name="Treasure Hunt", name_ro="Vânătoarea de comori")
+    # term only present in the Romanian twin
+    res = db.search_activities(search_text="comori")
+    assert len(res) == 1
+    assert res[0]["name"] == "Treasure Hunt"
+
+
+def test_fts_indexes_description_ro(db):
+    _insert(db, name="Game", description="english desc",
+            description_ro="o activitate de cooperare")
+    res = db.search_activities(search_text="cooperare")
+    assert len(res) == 1
+
+
+def test_ro_columns_round_trip(db):
+    aid = _insert(
+        db, name="X", name_ro="X-ro", description_ro="d-ro",
+        rules_ro="r-ro", variations_ro="v-ro",
+        indoor_outdoor="either", space_needed="mediu",
+        estimated_fields=["duration_min"], source_id="src1",
+        source_ids=["src1", "src2"], chunk_key="src1.part01",
+    )
+    row = db.get_activity_by_id(aid)
+    loaded = Activity.from_dict(row)
+    assert loaded.name_ro == "X-ro"
+    assert loaded.indoor_outdoor == "either"
+    assert loaded.space_needed == "mediu"
+    assert loaded.estimated_fields == ["duration_min"]
+    assert loaded.source_ids == ["src1", "src2"]
+    assert loaded.chunk_key == "src1.part01"
+
+
+# --------------------------------------------------------------------------
+# display helpers
+# --------------------------------------------------------------------------
+def test_display_helpers_prefer_ro_with_fallback():
+    act = _activity(name="Original", description="Original desc")
+    assert act.get_display_name() == "Original"          # no translation yet
+    assert act.get_display_description() == "Original desc"
+    act.name_ro = "Tradus"
+    act.description_ro = "Descriere tradusă"
+    assert act.get_display_name() == "Tradus"
+    assert act.get_display_description() == "Descriere tradusă"
+    assert act.has_translation() is True
+
+
+def test_is_estimated_and_axis_displays():
+    act = _activity()
+    act.indoor_outdoor = "outdoor"
+    act.space_needed = "mare"
+    act.estimated_fields = ["space_needed"]
+    assert act.get_indoor_outdoor_display() == "Exterior"
+    assert act.get_space_needed_display() == "Spațiu mare"
+    assert act.is_estimated("space_needed") is True
+    assert act.is_estimated("indoor_outdoor") is False