Files
game-library/app/models/database.py
Claude Agent bcfb6841eb Faza 1 complete: bilingual+enrichment plumbing, UI/filters, frozen DB
Extraction finished (575/588 chunks; 6 content-filter-blocked, 7 await
re-extraction). DB rebuilt and frozen at 9418 activities — content_keys
are now stable for the enrichment overlay.

Part A (plumbing + UI):
- database.py: name_ro/description_ro/rules_ro/variations_ro, indoor_outdoor,
  space_needed, estimated_fields, source_id/source_ids/chunk_key columns;
  FTS5 indexes the 4 *_ro columns across CREATE + all 3 triggers; new equality
  filters + category counts for both axes.
- activity.py: new fields + bilingual display helpers (get_display_*,
  is_estimated, axis displays).
- config_taxonomy.py: INDOOR_OUTDOOR/SPACE_NEEDED enums + normalizers
  (None on unrecognised, no fabrication).
- search.py / routes.py / config.py / templates / css: new dropdowns,
  RO-primary rendering with "(estimat)" markers and collapsible original
  text, and a /source/<id> download route shipped DARK behind
  SOURCE_DOWNLOAD_ENABLED (copyright opt-in).
- build_database.py: source_id/chunk_key in dict_to_activity; merge_cluster
  unions source_ids without touching enrichment fields.

Part B (enrichment pipeline, built not yet run):
- build_database.py: load_enrichment + apply_enrichment (post-dedup, keyed on
  content_key) + --enrichment CLI + stated-vs-estimated QA.
- run_enrichment.py (resumable, --source/--limit pilot scoping, --collect),
  ENRICHMENT_PROMPT.md.

Repair: scripts/repair_extractions.py fixes the subagents' systematic
unescaped-ASCII-quote bug with a faithful char-scanner (escapes, never
truncates) + schema validation + a strictly-more-text guard. json_repair was
tried first, truncated silently, and is NOT used. build_database has no repair
dependency.

Tests: tests/test_enrichment.py added; 99 pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 18:10:13 +00:00

410 lines
18 KiB
Python

"""
Database manager for INDEX-SISTEM-JOCURI v2.0
Implements SQLite with FTS5 for full-text search
"""
import sqlite3
import json
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
from app.models.activity import Activity
class DatabaseManager:
"""Enhanced database manager with FTS5 support"""
def __init__(self, db_path: str):
"""Initialize database manager"""
self.db_path = Path(db_path)
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self._init_database()
def _get_connection(self) -> sqlite3.Connection:
"""Get database connection with row factory"""
conn = sqlite3.connect(self.db_path)
conn.row_factory = sqlite3.Row
# Enable FTS5
conn.execute("PRAGMA table_info=sqlite_master")
return conn
def _init_database(self):
"""Initialize database with v2.0 schema"""
with self._get_connection() as conn:
# Main activities table
# NOTE: schema is rebuilt from scratch (plan §6) — no in-place
# migration. The old DB is deleted and recreated by build_database.
conn.execute("""
CREATE TABLE IF NOT EXISTS activities (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
description TEXT NOT NULL,
rules TEXT,
variations TEXT,
category TEXT NOT NULL,
subcategory TEXT,
content_type TEXT,
source_file TEXT NOT NULL,
source_files TEXT,
page_reference TEXT,
source_excerpt TEXT,
-- Structured parameters
age_group_min INTEGER,
age_group_max INTEGER,
participants_min INTEGER,
participants_max INTEGER,
duration_min INTEGER,
duration_max INTEGER,
-- Categories for filtering
materials_category TEXT,
materials_list TEXT,
skills_developed TEXT,
difficulty_level TEXT,
-- Metadata
keywords TEXT,
tags TEXT,
popularity_score INTEGER DEFAULT 0,
-- Extraction / language metadata
language TEXT,
normalized_name TEXT,
extraction_confidence TEXT,
needs_review INTEGER DEFAULT 0,
-- Enrichment overlay (bilingual + inferred filters; Part B)
name_ro TEXT,
description_ro TEXT,
rules_ro TEXT,
variations_ro TEXT,
indoor_outdoor TEXT,
space_needed TEXT,
estimated_fields TEXT,
source_id TEXT,
source_ids TEXT,
chunk_key TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
# FTS5 virtual table for search
conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS activities_fts USING fts5(
name, description, rules, variations, keywords,
materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro,
content='activities',
content_rowid='id'
)
""")
# Categories table for dynamic filters
conn.execute("""
CREATE TABLE IF NOT EXISTS categories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
type TEXT NOT NULL,
value TEXT NOT NULL,
display_name TEXT,
usage_count INTEGER DEFAULT 0,
UNIQUE(type, value)
)
""")
# Create indexes for performance
indexes = [
"CREATE INDEX IF NOT EXISTS idx_activities_category ON activities(category)",
"CREATE INDEX IF NOT EXISTS idx_activities_age ON activities(age_group_min, age_group_max)",
"CREATE INDEX IF NOT EXISTS idx_activities_participants ON activities(participants_min, participants_max)",
"CREATE INDEX IF NOT EXISTS idx_activities_duration ON activities(duration_min, duration_max)",
"CREATE INDEX IF NOT EXISTS idx_activities_normalized_name ON activities(normalized_name)",
"CREATE INDEX IF NOT EXISTS idx_activities_indoor_outdoor ON activities(indoor_outdoor)",
"CREATE INDEX IF NOT EXISTS idx_activities_space_needed ON activities(space_needed)",
"CREATE INDEX IF NOT EXISTS idx_categories_type ON categories(type)"
]
for index_sql in indexes:
conn.execute(index_sql)
# Triggers to keep FTS in sync
conn.execute("""
CREATE TRIGGER IF NOT EXISTS activities_fts_insert AFTER INSERT ON activities
BEGIN
INSERT INTO activities_fts(rowid, name, description, rules, variations,
keywords, materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro)
VALUES (new.id, new.name, new.description, new.rules, new.variations,
new.keywords, new.materials_list, new.skills_developed,
new.name_ro, new.description_ro, new.rules_ro, new.variations_ro);
END
""")
conn.execute("""
CREATE TRIGGER IF NOT EXISTS activities_fts_delete AFTER DELETE ON activities
BEGIN
INSERT INTO activities_fts(activities_fts, rowid, name, description, rules,
variations, keywords, materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro)
VALUES ('delete', old.id, old.name, old.description, old.rules,
old.variations, old.keywords, old.materials_list, old.skills_developed,
old.name_ro, old.description_ro, old.rules_ro, old.variations_ro);
END
""")
conn.execute("""
CREATE TRIGGER IF NOT EXISTS activities_fts_update AFTER UPDATE ON activities
BEGIN
INSERT INTO activities_fts(activities_fts, rowid, name, description, rules,
variations, keywords, materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro)
VALUES ('delete', old.id, old.name, old.description, old.rules,
old.variations, old.keywords, old.materials_list, old.skills_developed,
old.name_ro, old.description_ro, old.rules_ro, old.variations_ro);
INSERT INTO activities_fts(rowid, name, description, rules, variations,
keywords, materials_list, skills_developed,
name_ro, description_ro, rules_ro, variations_ro)
VALUES (new.id, new.name, new.description, new.rules, new.variations,
new.keywords, new.materials_list, new.skills_developed,
new.name_ro, new.description_ro, new.rules_ro, new.variations_ro);
END
""")
conn.commit()
def insert_activity(self, activity: Activity) -> int:
"""Insert new activity and return ID"""
with self._get_connection() as conn:
data = activity.to_dict()
columns = ', '.join(data.keys())
placeholders = ', '.join(['?' for _ in data])
values = list(data.values())
cursor = conn.execute(
f"INSERT INTO activities ({columns}) VALUES ({placeholders})",
values
)
activity_id = cursor.lastrowid
# Update category counts
self._update_category_counts(conn, activity)
conn.commit()
return activity_id
def bulk_insert_activities(self, activities: List[Activity]) -> int:
"""Bulk insert activities for better performance"""
if not activities:
return 0
with self._get_connection() as conn:
data_list = [activity.to_dict() for activity in activities]
if not data_list:
return 0
columns = ', '.join(data_list[0].keys())
placeholders = ', '.join(['?' for _ in data_list[0]])
values_list = [list(data.values()) for data in data_list]
conn.executemany(
f"INSERT INTO activities ({columns}) VALUES ({placeholders})",
values_list
)
# Update category counts
for activity in activities:
self._update_category_counts(conn, activity)
conn.commit()
return len(activities)
def _update_category_counts(self, conn: sqlite3.Connection, activity: Activity):
"""Update category usage counts"""
categories_to_update = [
('category', activity.category),
('content_type', activity.content_type),
('language', activity.language),
('age_group', activity.get_age_range_display()),
('participants', activity.get_participants_display()),
('duration', activity.get_duration_display()),
('materials', activity.get_materials_display()),
('difficulty', activity.difficulty_level),
# Enrichment axes — slugs stored as value; UI maps to RO via
# DISPLAY_NAMES. Without these the new dropdowns would be empty.
('indoor_outdoor', activity.indoor_outdoor),
('space_needed', activity.space_needed),
]
for cat_type, cat_value in categories_to_update:
if cat_value and cat_value.strip():
conn.execute("""
INSERT OR IGNORE INTO categories (type, value, display_name, usage_count)
VALUES (?, ?, ?, 0)
""", (cat_type, cat_value, cat_value))
conn.execute("""
UPDATE categories
SET usage_count = usage_count + 1
WHERE type = ? AND value = ?
""", (cat_type, cat_value))
def search_activities(self,
search_text: Optional[str] = None,
category: Optional[str] = None,
age_group_min: Optional[int] = None,
age_group_max: Optional[int] = None,
participants_min: Optional[int] = None,
participants_max: Optional[int] = None,
duration_min: Optional[int] = None,
duration_max: Optional[int] = None,
materials_category: Optional[str] = None,
difficulty_level: Optional[str] = None,
indoor_outdoor: Optional[str] = None,
space_needed: Optional[str] = None,
limit: int = 100) -> List[Dict[str, Any]]:
"""Enhanced search with FTS5 and filters"""
with self._get_connection() as conn:
if search_text and search_text.strip():
# Use FTS5 for text search
base_query = """
SELECT a.*,
activities_fts.rank as search_rank
FROM activities a
JOIN activities_fts ON a.id = activities_fts.rowid
WHERE activities_fts MATCH ?
"""
params = [search_text.strip()]
order_clause = "ORDER BY search_rank, a.popularity_score DESC"
else:
# Regular query without FTS
base_query = "SELECT * FROM activities WHERE 1=1"
params = []
order_clause = "ORDER BY popularity_score DESC, name ASC"
# Add filters
if category:
base_query += " AND category LIKE ?"
params.append(f"%{category}%")
if age_group_min is not None:
base_query += " AND (age_group_min IS NULL OR age_group_min <= ?)"
params.append(age_group_min)
if age_group_max is not None:
base_query += " AND (age_group_max IS NULL OR age_group_max >= ?)"
params.append(age_group_max)
if participants_min is not None:
base_query += " AND (participants_min IS NULL OR participants_min <= ?)"
params.append(participants_min)
if participants_max is not None:
base_query += " AND (participants_max IS NULL OR participants_max >= ?)"
params.append(participants_max)
if duration_min is not None:
base_query += " AND (duration_min IS NULL OR duration_min >= ?)"
params.append(duration_min)
if duration_max is not None:
base_query += " AND (duration_max IS NULL OR duration_max <= ?)"
params.append(duration_max)
if materials_category:
base_query += " AND materials_category LIKE ?"
params.append(f"%{materials_category}%")
if difficulty_level:
base_query += " AND difficulty_level = ?"
params.append(difficulty_level)
if indoor_outdoor:
base_query += " AND indoor_outdoor = ?"
params.append(indoor_outdoor)
if space_needed:
base_query += " AND space_needed = ?"
params.append(space_needed)
# Add ordering and limit
query = f"{base_query} {order_clause} LIMIT ?"
params.append(limit)
cursor = conn.execute(query, params)
return [dict(row) for row in cursor.fetchall()]
def get_activity_by_id(self, activity_id: int) -> Optional[Dict[str, Any]]:
"""Get single activity by ID"""
with self._get_connection() as conn:
cursor = conn.execute("SELECT * FROM activities WHERE id = ?", (activity_id,))
row = cursor.fetchone()
return dict(row) if row else None
def get_filter_options(self) -> Dict[str, List[str]]:
"""Get dynamic filter options from categories table"""
with self._get_connection() as conn:
cursor = conn.execute("""
SELECT type, value, usage_count
FROM categories
WHERE usage_count > 0
ORDER BY type, usage_count DESC, value ASC
""")
options = {}
for row in cursor.fetchall():
cat_type, value, count = row
if cat_type not in options:
options[cat_type] = []
options[cat_type].append(value)
return options
def get_statistics(self) -> Dict[str, Any]:
"""Get database statistics"""
with self._get_connection() as conn:
# Total activities
cursor = conn.execute("SELECT COUNT(*) FROM activities")
total_activities = cursor.fetchone()[0]
# Activities by category
cursor = conn.execute("""
SELECT category, COUNT(*) as count
FROM activities
GROUP BY category
ORDER BY count DESC
""")
categories = dict(cursor.fetchall())
# Database size
cursor = conn.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
size_row = cursor.fetchone()
db_size = size_row[0] if size_row else 0
return {
'total_activities': total_activities,
'categories': categories,
'database_size_bytes': db_size,
'database_path': str(self.db_path)
}
def clear_database(self):
"""Clear all data from database"""
with self._get_connection() as conn:
# Deleting from activities fires the delete trigger, which removes
# the matching FTS rows. The explicit 'delete-all' command then
# guarantees the external-content FTS index is fully cleared.
conn.execute("DELETE FROM activities")
conn.execute("INSERT INTO activities_fts(activities_fts) VALUES('delete-all')")
conn.execute("DELETE FROM categories")
conn.commit()
def rebuild_fts_index(self):
"""Rebuild FTS5 index"""
with self._get_connection() as conn:
conn.execute("INSERT INTO activities_fts(activities_fts) VALUES('rebuild')")
conn.commit()