Faza 1 complete: bilingual+enrichment plumbing, UI/filters, frozen DB

Extraction finished (575/588 chunks; 6 content-filter-blocked, 7 await
re-extraction). DB rebuilt and frozen at 9418 activities — content_keys
are now stable for the enrichment overlay.

Part A (plumbing + UI):
- database.py: name_ro/description_ro/rules_ro/variations_ro, indoor_outdoor,
  space_needed, estimated_fields, source_id/source_ids/chunk_key columns;
  FTS5 indexes the 4 *_ro columns across CREATE + all 3 triggers; new equality
  filters + category counts for both axes.
- activity.py: new fields + bilingual display helpers (get_display_*,
  is_estimated, axis displays).
- config_taxonomy.py: INDOOR_OUTDOOR/SPACE_NEEDED enums + normalizers
  (None on unrecognised, no fabrication).
- search.py / routes.py / config.py / templates / css: new dropdowns,
  RO-primary rendering with "(estimat)" markers and collapsible original
  text, and a /source/<id> download route shipped DARK behind
  SOURCE_DOWNLOAD_ENABLED (copyright opt-in).
- build_database.py: source_id/chunk_key in dict_to_activity; merge_cluster
  unions source_ids without touching enrichment fields.

Part B (enrichment pipeline, built not yet run):
- build_database.py: load_enrichment + apply_enrichment (post-dedup, keyed on
  content_key) + --enrichment CLI + stated-vs-estimated QA.
- run_enrichment.py (resumable, --source/--limit pilot scoping, --collect),
  ENRICHMENT_PROMPT.md.

Repair: scripts/repair_extractions.py fixes the subagents' systematic
unescaped-ASCII-quote bug with a faithful char-scanner (escapes, never
truncates) + schema validation + a strictly-more-text guard. json_repair was
tried first, truncated silently, and is NOT used. build_database has no repair
dependency.

Tests: tests/test_enrichment.py added; 99 pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-05-29 18:10:13 +00:00
parent 46d9592a55
commit bcfb6841eb
18 changed files with 1579 additions and 167 deletions

View File

@@ -72,6 +72,18 @@ class DatabaseManager:
extraction_confidence TEXT,
needs_review INTEGER DEFAULT 0,
-- Enrichment overlay (bilingual + inferred filters; Part B)
name_ro TEXT,
description_ro TEXT,
rules_ro TEXT,
variations_ro TEXT,
indoor_outdoor TEXT,
space_needed TEXT,
estimated_fields TEXT,
source_id TEXT,
source_ids TEXT,
chunk_key TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
@@ -82,6 +94,7 @@ class DatabaseManager:
CREATE VIRTUAL TABLE IF NOT EXISTS activities_fts USING fts5(
name, description, rules, variations, keywords,
materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro,
content='activities',
content_rowid='id'
)
@@ -106,6 +119,8 @@ class DatabaseManager:
"CREATE INDEX IF NOT EXISTS idx_activities_participants ON activities(participants_min, participants_max)",
"CREATE INDEX IF NOT EXISTS idx_activities_duration ON activities(duration_min, duration_max)",
"CREATE INDEX IF NOT EXISTS idx_activities_normalized_name ON activities(normalized_name)",
"CREATE INDEX IF NOT EXISTS idx_activities_indoor_outdoor ON activities(indoor_outdoor)",
"CREATE INDEX IF NOT EXISTS idx_activities_space_needed ON activities(space_needed)",
"CREATE INDEX IF NOT EXISTS idx_categories_type ON categories(type)"
]
@@ -117,9 +132,11 @@ class DatabaseManager:
CREATE TRIGGER IF NOT EXISTS activities_fts_insert AFTER INSERT ON activities
BEGIN
INSERT INTO activities_fts(rowid, name, description, rules, variations,
keywords, materials_list, skills_developed)
keywords, materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro)
VALUES (new.id, new.name, new.description, new.rules, new.variations,
new.keywords, new.materials_list, new.skills_developed);
new.keywords, new.materials_list, new.skills_developed,
new.name_ro, new.description_ro, new.rules_ro, new.variations_ro);
END
""")
@@ -127,9 +144,11 @@ class DatabaseManager:
CREATE TRIGGER IF NOT EXISTS activities_fts_delete AFTER DELETE ON activities
BEGIN
INSERT INTO activities_fts(activities_fts, rowid, name, description, rules,
variations, keywords, materials_list, skills_developed)
variations, keywords, materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro)
VALUES ('delete', old.id, old.name, old.description, old.rules,
old.variations, old.keywords, old.materials_list, old.skills_developed);
old.variations, old.keywords, old.materials_list, old.skills_developed,
old.name_ro, old.description_ro, old.rules_ro, old.variations_ro);
END
""")
@@ -137,13 +156,17 @@ class DatabaseManager:
CREATE TRIGGER IF NOT EXISTS activities_fts_update AFTER UPDATE ON activities
BEGIN
INSERT INTO activities_fts(activities_fts, rowid, name, description, rules,
variations, keywords, materials_list, skills_developed)
variations, keywords, materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro)
VALUES ('delete', old.id, old.name, old.description, old.rules,
old.variations, old.keywords, old.materials_list, old.skills_developed);
old.variations, old.keywords, old.materials_list, old.skills_developed,
old.name_ro, old.description_ro, old.rules_ro, old.variations_ro);
INSERT INTO activities_fts(rowid, name, description, rules, variations,
keywords, materials_list, skills_developed)
keywords, materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro)
VALUES (new.id, new.name, new.description, new.rules, new.variations,
new.keywords, new.materials_list, new.skills_developed);
new.keywords, new.materials_list, new.skills_developed,
new.name_ro, new.description_ro, new.rules_ro, new.variations_ro);
END
""")
@@ -210,6 +233,10 @@ class DatabaseManager:
('duration', activity.get_duration_display()),
('materials', activity.get_materials_display()),
('difficulty', activity.difficulty_level),
# Enrichment axes — slugs stored as value; UI maps to RO via
# DISPLAY_NAMES. Without these the new dropdowns would be empty.
('indoor_outdoor', activity.indoor_outdoor),
('space_needed', activity.space_needed),
]
for cat_type, cat_value in categories_to_update:
@@ -236,6 +263,8 @@ class DatabaseManager:
duration_max: Optional[int] = None,
materials_category: Optional[str] = None,
difficulty_level: Optional[str] = None,
indoor_outdoor: Optional[str] = None,
space_needed: Optional[str] = None,
limit: int = 100) -> List[Dict[str, Any]]:
"""Enhanced search with FTS5 and filters"""
@@ -293,7 +322,15 @@ class DatabaseManager:
if difficulty_level:
base_query += " AND difficulty_level = ?"
params.append(difficulty_level)
if indoor_outdoor:
base_query += " AND indoor_outdoor = ?"
params.append(indoor_outdoor)
if space_needed:
base_query += " AND space_needed = ?"
params.append(space_needed)
# Add ordering and limit
query = f"{base_query} {order_clause} LIMIT ?"
params.append(limit)