Faza 1 complete: bilingual+enrichment plumbing, UI/filters, frozen DB
Extraction finished (575/588 chunks; 6 content-filter-blocked, 7 await re-extraction). DB rebuilt and frozen at 9418 activities — content_keys are now stable for the enrichment overlay. Part A (plumbing + UI): - database.py: name_ro/description_ro/rules_ro/variations_ro, indoor_outdoor, space_needed, estimated_fields, source_id/source_ids/chunk_key columns; FTS5 indexes the 4 *_ro columns across CREATE + all 3 triggers; new equality filters + category counts for both axes. - activity.py: new fields + bilingual display helpers (get_display_*, is_estimated, axis displays). - config_taxonomy.py: INDOOR_OUTDOOR/SPACE_NEEDED enums + normalizers (None on unrecognised, no fabrication). - search.py / routes.py / config.py / templates / css: new dropdowns, RO-primary rendering with "(estimat)" markers and collapsible original text, and a /source/<id> download route shipped DARK behind SOURCE_DOWNLOAD_ENABLED (copyright opt-in). - build_database.py: source_id/chunk_key in dict_to_activity; merge_cluster unions source_ids without touching enrichment fields. Part B (enrichment pipeline, built not yet run): - build_database.py: load_enrichment + apply_enrichment (post-dedup, keyed on content_key) + --enrichment CLI + stated-vs-estimated QA. - run_enrichment.py (resumable, --source/--limit pilot scoping, --collect), ENRICHMENT_PROMPT.md. Repair: scripts/repair_extractions.py fixes the subagents' systematic unescaped-ASCII-quote bug with a faithful char-scanner (escapes, never truncates) + schema validation + a strictly-more-text guard. json_repair was tried first, truncated silently, and is NOT used. build_database has no repair dependency. Tests: tests/test_enrichment.py added; 99 pass. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -72,6 +72,18 @@ class DatabaseManager:
|
||||
extraction_confidence TEXT,
|
||||
needs_review INTEGER DEFAULT 0,
|
||||
|
||||
-- Enrichment overlay (bilingual + inferred filters; Part B)
|
||||
name_ro TEXT,
|
||||
description_ro TEXT,
|
||||
rules_ro TEXT,
|
||||
variations_ro TEXT,
|
||||
indoor_outdoor TEXT,
|
||||
space_needed TEXT,
|
||||
estimated_fields TEXT,
|
||||
source_id TEXT,
|
||||
source_ids TEXT,
|
||||
chunk_key TEXT,
|
||||
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
@@ -82,6 +94,7 @@ class DatabaseManager:
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS activities_fts USING fts5(
|
||||
name, description, rules, variations, keywords,
|
||||
materials_list, skills_developed,
|
||||
name_ro, description_ro, rules_ro, variations_ro,
|
||||
content='activities',
|
||||
content_rowid='id'
|
||||
)
|
||||
@@ -106,6 +119,8 @@ class DatabaseManager:
|
||||
"CREATE INDEX IF NOT EXISTS idx_activities_participants ON activities(participants_min, participants_max)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_activities_duration ON activities(duration_min, duration_max)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_activities_normalized_name ON activities(normalized_name)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_activities_indoor_outdoor ON activities(indoor_outdoor)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_activities_space_needed ON activities(space_needed)",
|
||||
"CREATE INDEX IF NOT EXISTS idx_categories_type ON categories(type)"
|
||||
]
|
||||
|
||||
@@ -117,9 +132,11 @@ class DatabaseManager:
|
||||
CREATE TRIGGER IF NOT EXISTS activities_fts_insert AFTER INSERT ON activities
|
||||
BEGIN
|
||||
INSERT INTO activities_fts(rowid, name, description, rules, variations,
|
||||
keywords, materials_list, skills_developed)
|
||||
keywords, materials_list, skills_developed,
|
||||
name_ro, description_ro, rules_ro, variations_ro)
|
||||
VALUES (new.id, new.name, new.description, new.rules, new.variations,
|
||||
new.keywords, new.materials_list, new.skills_developed);
|
||||
new.keywords, new.materials_list, new.skills_developed,
|
||||
new.name_ro, new.description_ro, new.rules_ro, new.variations_ro);
|
||||
END
|
||||
""")
|
||||
|
||||
@@ -127,9 +144,11 @@ class DatabaseManager:
|
||||
CREATE TRIGGER IF NOT EXISTS activities_fts_delete AFTER DELETE ON activities
|
||||
BEGIN
|
||||
INSERT INTO activities_fts(activities_fts, rowid, name, description, rules,
|
||||
variations, keywords, materials_list, skills_developed)
|
||||
variations, keywords, materials_list, skills_developed,
|
||||
name_ro, description_ro, rules_ro, variations_ro)
|
||||
VALUES ('delete', old.id, old.name, old.description, old.rules,
|
||||
old.variations, old.keywords, old.materials_list, old.skills_developed);
|
||||
old.variations, old.keywords, old.materials_list, old.skills_developed,
|
||||
old.name_ro, old.description_ro, old.rules_ro, old.variations_ro);
|
||||
END
|
||||
""")
|
||||
|
||||
@@ -137,13 +156,17 @@ class DatabaseManager:
|
||||
CREATE TRIGGER IF NOT EXISTS activities_fts_update AFTER UPDATE ON activities
|
||||
BEGIN
|
||||
INSERT INTO activities_fts(activities_fts, rowid, name, description, rules,
|
||||
variations, keywords, materials_list, skills_developed)
|
||||
variations, keywords, materials_list, skills_developed,
|
||||
name_ro, description_ro, rules_ro, variations_ro)
|
||||
VALUES ('delete', old.id, old.name, old.description, old.rules,
|
||||
old.variations, old.keywords, old.materials_list, old.skills_developed);
|
||||
old.variations, old.keywords, old.materials_list, old.skills_developed,
|
||||
old.name_ro, old.description_ro, old.rules_ro, old.variations_ro);
|
||||
INSERT INTO activities_fts(rowid, name, description, rules, variations,
|
||||
keywords, materials_list, skills_developed)
|
||||
keywords, materials_list, skills_developed,
|
||||
name_ro, description_ro, rules_ro, variations_ro)
|
||||
VALUES (new.id, new.name, new.description, new.rules, new.variations,
|
||||
new.keywords, new.materials_list, new.skills_developed);
|
||||
new.keywords, new.materials_list, new.skills_developed,
|
||||
new.name_ro, new.description_ro, new.rules_ro, new.variations_ro);
|
||||
END
|
||||
""")
|
||||
|
||||
@@ -210,6 +233,10 @@ class DatabaseManager:
|
||||
('duration', activity.get_duration_display()),
|
||||
('materials', activity.get_materials_display()),
|
||||
('difficulty', activity.difficulty_level),
|
||||
# Enrichment axes — slugs stored as value; UI maps to RO via
|
||||
# DISPLAY_NAMES. Without these the new dropdowns would be empty.
|
||||
('indoor_outdoor', activity.indoor_outdoor),
|
||||
('space_needed', activity.space_needed),
|
||||
]
|
||||
|
||||
for cat_type, cat_value in categories_to_update:
|
||||
@@ -236,6 +263,8 @@ class DatabaseManager:
|
||||
duration_max: Optional[int] = None,
|
||||
materials_category: Optional[str] = None,
|
||||
difficulty_level: Optional[str] = None,
|
||||
indoor_outdoor: Optional[str] = None,
|
||||
space_needed: Optional[str] = None,
|
||||
limit: int = 100) -> List[Dict[str, Any]]:
|
||||
"""Enhanced search with FTS5 and filters"""
|
||||
|
||||
@@ -293,7 +322,15 @@ class DatabaseManager:
|
||||
if difficulty_level:
|
||||
base_query += " AND difficulty_level = ?"
|
||||
params.append(difficulty_level)
|
||||
|
||||
|
||||
if indoor_outdoor:
|
||||
base_query += " AND indoor_outdoor = ?"
|
||||
params.append(indoor_outdoor)
|
||||
|
||||
if space_needed:
|
||||
base_query += " AND space_needed = ?"
|
||||
params.append(space_needed)
|
||||
|
||||
# Add ordering and limit
|
||||
query = f"{base_query} {order_clause} LIMIT ?"
|
||||
params.append(limit)
|
||||
|
||||
Reference in New Issue
Block a user