Rebuild extraction pipeline infrastructure (Faza 0 prep)

Implements the approved plan to replace the broken regex/index-master extraction with an LLM-subagent pipeline. Four parallel lanes: Lane A — scripts/extract_common.py (PDF/docx/doc/pptx/html/zip, no max_pages truncation), normalize_sources.py, chunk_sources.py (~20pg chunks + overlap, manifest registry), activity_schema.json. Lane B — app/config_taxonomy.py (16 fixed category slugs), schema rebuilt from scratch in app/models/ with content_type, language, source_files, source_excerpt, normalized_name, extraction_confidence, needs_review; FTS5 + 3 triggers extended with materials_list and skills_developed. Lane C — build_database.py (--rebuild, atomic swap, schema + fuzzy source_excerpt validation, dedup with needs_review band), validate_extractions.py, review_queue.py, new run_extraction.py orchestrator, SUBAGENT_PROMPT.md. Lane D — search.py content_type/language filters (default search excludes non-game content), E7 schema-compat audit; fixed a NULL keywords AttributeError in _boost_search_relevance. Removes 8 orphaned/dead scripts and app/services/parser.py + indexer.py. Adds tests/ (70 passing, 1 skipped — libreoffice absent). Note: Lane D made one additive edit to app/models/database.py (_update_category_counts) to surface content_type/language in get_filter_options, outside its nominal lane boundary but after Lane B completed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 17:43:38 +00:00
parent e0080edf85
commit 66ae831c36
37 changed files with 4101 additions and 1881 deletions
--- a/app/config_taxonomy.py
+++ b/app/config_taxonomy.py
@@ -0,0 +1,230 @@
+"""
+Controlled category taxonomy for game-library.
+
+Single source of truth for activity categories. The DB stores the *slug*;
+the UI displays the Romanian name. `category` (thematic domain) and
+`content_type` (form of the content) are INDEPENDENT axes — see plan §2.
+"""
+
+import unicodedata
+import re
+from typing import Dict, List
+
+# --- Categories (thematic domain) --------------------------------------------
+# slug -> Romanian display name. ~16 fixed slugs; `altele` is the mandatory
+# fallback and MUST always be present.
+CATEGORIES: Dict[str, str] = {
+    "jocuri-cercetasesti": "Jocuri cercetășești",
+    "team-building": "Team-building",
+    "icebreakers": "Icebreakers / spargerea gheții",
+    "camp-outdoor": "Tabără și activități în aer liber",
+    "wide-games": "Wide games / jocuri de teren",
+    "orientare": "Orientare",
+    "prim-ajutor": "Prim ajutor",
+    "escape-room-puzzle": "Escape room și puzzle",
+    "creative-stem": "Creativitate și STEM",
+    "sports-active": "Sport și activități fizice",
+    "cantece-ceremonii": "Cântece și ceremonii",
+    "retete": "Rețete",
+    "supravietuire": "Supraviețuire",
+    "integrare-incluziune": "Integrare și incluziune",
+    "conflict-empatie": "Conflict și empatie",
+    "altele": "Altele",
+}
+
+# Mandatory fallback slug.
+FALLBACK_CATEGORY = "altele"
+
+# Ordered list of valid slugs.
+CATEGORY_SLUGS: List[str] = list(CATEGORIES.keys())
+
+# --- Content type (form of the content) --------------------------------------
+# Independent axis from `category`. The UI default search excludes the
+# non-game content types (see plan §6).
+CONTENT_TYPES: Dict[str, str] = {
+    "joc": "Joc",
+    "activitate": "Activitate",
+    "reteta": "Rețetă",
+    "cantec": "Cântec",
+    "ceremonie": "Ceremonie",
+}
+
+CONTENT_TYPE_SLUGS: List[str] = list(CONTENT_TYPES.keys())
+
+# Content types considered "non-game" — excluded from the default UI search.
+NON_GAME_CONTENT_TYPES: List[str] = ["reteta", "cantec", "ceremonie"]
+
+DEFAULT_CONTENT_TYPE = "activitate"
+
+# --- Aliases -----------------------------------------------------------------
+# Map of normalized arbitrary strings -> canonical slug. Keys are already
+# diacritic-stripped, lowercased and hyphenated (see _slugify). This catches
+# legacy / messy values from the old DB and common English/Romanian variants.
+_CATEGORY_ALIASES: Dict[str, str] = {
+    # legacy junk
+    "general-activity": "altele",
+    "general": "altele",
+    "educational": "creative-stem",
+    "d": "altele",
+    "a": "altele",
+    "b": "altele",
+    "c": "altele",
+    # scouting
+    "cercetasie": "jocuri-cercetasesti",
+    "cercetasesti": "jocuri-cercetasesti",
+    "scout": "jocuri-cercetasesti",
+    "scouting": "jocuri-cercetasesti",
+    "scout-games": "jocuri-cercetasesti",
+    "jocuri-cercetasesti": "jocuri-cercetasesti",
+    # team building
+    "teambuilding": "team-building",
+    "team": "team-building",
+    "cooperare": "team-building",
+    # icebreakers
+    "icebreaker": "icebreakers",
+    "spargerea-ghetii": "icebreakers",
+    "cunoastere": "icebreakers",
+    "energizers": "icebreakers",
+    "energizer": "icebreakers",
+    # camp / outdoor
+    "camp": "camp-outdoor",
+    "tabara": "camp-outdoor",
+    "outdoor": "camp-outdoor",
+    "aer-liber": "camp-outdoor",
+    # wide games
+    "wide-game": "wide-games",
+    "jocuri-de-teren": "wide-games",
+    "joc-de-teren": "wide-games",
+    "big-games": "wide-games",
+    # orientare
+    "orienteering": "orientare",
+    "navigatie": "orientare",
+    # prim ajutor
+    "first-aid": "prim-ajutor",
+    "primul-ajutor": "prim-ajutor",
+    # escape room / puzzle
+    "escape-room": "escape-room-puzzle",
+    "escaperoom": "escape-room-puzzle",
+    "puzzle": "escape-room-puzzle",
+    "puzzles": "escape-room-puzzle",
+    "ghicitori": "escape-room-puzzle",
+    # creative / stem
+    "creative": "creative-stem",
+    "creativitate": "creative-stem",
+    "stem": "creative-stem",
+    "arts-and-crafts": "creative-stem",
+    "craft": "creative-stem",
+    "crafts": "creative-stem",
+    "stiinta": "creative-stem",
+    # sports
+    "sport": "sports-active",
+    "sports": "sports-active",
+    "sportive": "sports-active",
+    "active": "sports-active",
+    "miscare": "sports-active",
+    "physical": "sports-active",
+    # songs / ceremonies
+    "cantece": "cantece-ceremonii",
+    "cantec": "cantece-ceremonii",
+    "songs": "cantece-ceremonii",
+    "ceremonii": "cantece-ceremonii",
+    "ceremonie": "cantece-ceremonii",
+    "ceremony": "cantece-ceremonii",
+    # recipes
+    "reteta": "retete",
+    "recipe": "retete",
+    "recipes": "retete",
+    "cooking": "retete",
+    "gatit": "retete",
+    # survival
+    "survival": "supravietuire",
+    "supravietuire": "supravietuire",
+    # inclusion
+    "integrare": "integrare-incluziune",
+    "incluziune": "integrare-incluziune",
+    "inclusion": "integrare-incluziune",
+    # conflict / empathy
+    "conflict": "conflict-empatie",
+    "empatie": "conflict-empatie",
+    "empathy": "conflict-empatie",
+    "rezolvarea-conflictelor": "conflict-empatie",
+    # fallback
+    "altele": "altele",
+    "other": "altele",
+    "others": "altele",
+    "misc": "altele",
+}
+
+
+def _slugify(value: str) -> str:
+    """Lowercase, strip diacritics, collapse non-alphanumerics to hyphens."""
+    if not value:
+        return ""
+    # Decompose accents (ă -> a, ș -> s, ț -> t, etc.)
+    decomposed = unicodedata.normalize("NFKD", value)
+    ascii_str = "".join(c for c in decomposed if not unicodedata.combining(c))
+    ascii_str = ascii_str.lower().strip()
+    ascii_str = re.sub(r"[^a-z0-9]+", "-", ascii_str)
+    return ascii_str.strip("-")
+
+
+def normalize_category(value: str) -> str:
+    """Map an arbitrary string to a valid category slug.
+
+    Returns one of CATEGORY_SLUGS, falling back to `altele` for anything
+    unrecognised or empty.
+    """
+    if not value:
+        return FALLBACK_CATEGORY
+    slug = _slugify(str(value))
+    if not slug:
+        return FALLBACK_CATEGORY
+    # Exact slug match.
+    if slug in CATEGORIES:
+        return slug
+    # Alias match.
+    if slug in _CATEGORY_ALIASES:
+        return _CATEGORY_ALIASES[slug]
+    return FALLBACK_CATEGORY
+
+
+def normalize_content_type(value: str) -> str:
+    """Map an arbitrary string to a valid content_type slug.
+
+    Returns one of CONTENT_TYPE_SLUGS, falling back to `activitate`.
+    """
+    if not value:
+        return DEFAULT_CONTENT_TYPE
+    slug = _slugify(str(value))
+    if slug in CONTENT_TYPES:
+        return slug
+    # Light alias handling for plural / English forms.
+    aliases = {
+        "jocuri": "joc",
+        "game": "joc",
+        "games": "joc",
+        "activitati": "activitate",
+        "activity": "activitate",
+        "retete": "reteta",
+        "recipe": "reteta",
+        "cantece": "cantec",
+        "song": "cantec",
+        "ceremonii": "ceremonie",
+        "ceremony": "ceremonie",
+    }
+    return aliases.get(slug, DEFAULT_CONTENT_TYPE)
+
+
+def is_valid_category(slug: str) -> bool:
+    """True if `slug` is a valid category slug."""
+    return slug in CATEGORIES
+
+
+def category_display_name(slug: str) -> str:
+    """Romanian display name for a slug (fallback to the slug itself)."""
+    return CATEGORIES.get(slug, slug)
+
+
+def content_type_display_name(slug: str) -> str:
+    """Romanian display name for a content_type slug."""
+    return CONTENT_TYPES.get(slug, slug)
--- a/app/models/activity.py
+++ b/app/models/activity.py
@@ -5,6 +5,22 @@ Activity data model for INDEX-SISTEM-JOCURI v2.0
 from dataclasses import dataclass, field
 from typing import List, Optional, Dict, Any
 import json
+import re
+import unicodedata
+
+
+def normalize_name(name: str) -> str:
+    """Diacritic-free, lowercased, whitespace-collapsed form of a name.
+
+    Used as the exact-match key for dedup grouping (see plan §4).
+    """
+    if not name:
+        return ""
+    decomposed = unicodedata.normalize("NFKD", name)
+    ascii_str = "".join(c for c in decomposed if not unicodedata.combining(c))
+    ascii_str = ascii_str.lower().strip()
+    ascii_str = re.sub(r"\s+", " ", ascii_str)
+    return ascii_str

@dataclass
 class Activity:
@@ -19,10 +35,19 @@ class Activity:
    # Categories
    category: str = ""
    subcategory: Optional[str] = None
-    
+    # content_type is an axis INDEPENDENT of category:
+    # one of joc/activitate/reteta/cantec/ceremonie (see config_taxonomy).
+    content_type: Optional[str] = None
+
    # Source information
    source_file: str = ""
    page_reference: Optional[str] = None
+    # source_files: JSON-encoded list of every source the activity was seen in.
+    # `source_file` (singular) stays as the primary/original source; build_database
+    # (Lane C) accumulates the full list here on dedup-merge.
+    source_files: List[str] = field(default_factory=list)
+    # Short verbatim quote from the source — anti-hallucination anchor.
+    source_excerpt: Optional[str] = None
    
    # Age and participants
    age_group_min: Optional[int] = None
@@ -44,11 +69,22 @@ class Activity:
    keywords: Optional[str] = None
    tags: List[str] = field(default_factory=list)
    popularity_score: int = 0
-    
+
+    # Extraction / language metadata
+    language: Optional[str] = None          # 'ro' / 'en'
+    normalized_name: Optional[str] = None   # dedup key; auto-derived from name
+    extraction_confidence: Optional[str] = None  # 'high' / 'med' / 'low'
+    needs_review: int = 0
+
    # Database fields
    id: Optional[int] = None
    created_at: Optional[str] = None
    updated_at: Optional[str] = None
+
+    def __post_init__(self):
+        """Derive normalized_name from name when not explicitly provided."""
+        if not self.normalized_name:
+            self.normalized_name = normalize_name(self.name)
    
    def to_dict(self) -> Dict[str, Any]:
        """Convert activity to dictionary for database storage"""
@@ -59,8 +95,11 @@ class Activity:
            'variations': self.variations,
            'category': self.category,
            'subcategory': self.subcategory,
+            'content_type': self.content_type,
            'source_file': self.source_file,
+            'source_files': json.dumps(self.source_files) if self.source_files else None,
            'page_reference': self.page_reference,
+            'source_excerpt': self.source_excerpt,
            'age_group_min': self.age_group_min,
            'age_group_max': self.age_group_max,
            'participants_min': self.participants_min,
@@ -73,7 +112,11 @@ class Activity:
            'difficulty_level': self.difficulty_level,
            'keywords': self.keywords,
            'tags': json.dumps(self.tags) if self.tags else None,
-            'popularity_score': self.popularity_score
+            'popularity_score': self.popularity_score,
+            'language': self.language,
+            'normalized_name': self.normalized_name or normalize_name(self.name),
+            'extraction_confidence': self.extraction_confidence,
+            'needs_review': self.needs_review,
        }
    
    @classmethod
@@ -86,7 +129,17 @@ class Activity:
                tags = json.loads(data['tags'])
            except (json.JSONDecodeError, TypeError):
                tags = []
-        
+
+        # source_files may arrive as a JSON string (DB) or a list (extraction)
+        source_files = data.get('source_files')
+        if isinstance(source_files, str):
+            try:
+                source_files = json.loads(source_files)
+            except (json.JSONDecodeError, TypeError):
+                source_files = []
+        elif source_files is None:
+            source_files = []
+
        return cls(
            id=data.get('id'),
            name=data.get('name', ''),
@@ -95,8 +148,11 @@ class Activity:
            variations=data.get('variations'),
            category=data.get('category', ''),
            subcategory=data.get('subcategory'),
+            content_type=data.get('content_type'),
            source_file=data.get('source_file', ''),
+            source_files=source_files,
            page_reference=data.get('page_reference'),
+            source_excerpt=data.get('source_excerpt'),
            age_group_min=data.get('age_group_min'),
            age_group_max=data.get('age_group_max'),
            participants_min=data.get('participants_min'),
@@ -110,6 +166,10 @@ class Activity:
            keywords=data.get('keywords'),
            tags=tags,
            popularity_score=data.get('popularity_score', 0),
+            language=data.get('language'),
+            normalized_name=data.get('normalized_name'),
+            extraction_confidence=data.get('extraction_confidence'),
+            needs_review=data.get('needs_review', 0) or 0,
            created_at=data.get('created_at'),
            updated_at=data.get('updated_at')
        )
--- a/app/models/database.py
+++ b/app/models/database.py
@@ -30,6 +30,8 @@ class DatabaseManager:
        """Initialize database with v2.0 schema"""
        with self._get_connection() as conn:
            # Main activities table
+            # NOTE: schema is rebuilt from scratch (plan §6) — no in-place
+            # migration. The old DB is deleted and recreated by build_database.
            conn.execute("""
                CREATE TABLE IF NOT EXISTS activities (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -39,9 +41,12 @@ class DatabaseManager:
                    variations TEXT,
                    category TEXT NOT NULL,
                    subcategory TEXT,
+                    content_type TEXT,
                    source_file TEXT NOT NULL,
+                    source_files TEXT,
                    page_reference TEXT,
-                    
+                    source_excerpt TEXT,
+
                    -- Structured parameters
                    age_group_min INTEGER,
                    age_group_max INTEGER,
@@ -49,26 +54,34 @@ class DatabaseManager:
                    participants_max INTEGER,
                    duration_min INTEGER,
                    duration_max INTEGER,
-                    
+
                    -- Categories for filtering
                    materials_category TEXT,
                    materials_list TEXT,
                    skills_developed TEXT,
                    difficulty_level TEXT,
-                    
+
                    -- Metadata
                    keywords TEXT,
                    tags TEXT,
                    popularity_score INTEGER DEFAULT 0,
+
+                    -- Extraction / language metadata
+                    language TEXT,
+                    normalized_name TEXT,
+                    extraction_confidence TEXT,
+                    needs_review INTEGER DEFAULT 0,
+
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                )
            """)
-            
+
            # FTS5 virtual table for search
            conn.execute("""
                CREATE VIRTUAL TABLE IF NOT EXISTS activities_fts USING fts5(
                    name, description, rules, variations, keywords,
+                    materials_list, skills_developed,
                    content='activities',
                    content_rowid='id'
                )
@@ -92,6 +105,7 @@ class DatabaseManager:
                "CREATE INDEX IF NOT EXISTS idx_activities_age ON activities(age_group_min, age_group_max)",
                "CREATE INDEX IF NOT EXISTS idx_activities_participants ON activities(participants_min, participants_max)",
                "CREATE INDEX IF NOT EXISTS idx_activities_duration ON activities(duration_min, duration_max)",
+                "CREATE INDEX IF NOT EXISTS idx_activities_normalized_name ON activities(normalized_name)",
                "CREATE INDEX IF NOT EXISTS idx_categories_type ON categories(type)"
            ]
            
@@ -102,24 +116,34 @@ class DatabaseManager:
            conn.execute("""
                CREATE TRIGGER IF NOT EXISTS activities_fts_insert AFTER INSERT ON activities
                BEGIN
-                    INSERT INTO activities_fts(rowid, name, description, rules, variations, keywords)
-                    VALUES (new.id, new.name, new.description, new.rules, new.variations, new.keywords);
+                    INSERT INTO activities_fts(rowid, name, description, rules, variations,
+                                               keywords, materials_list, skills_developed)
+                    VALUES (new.id, new.name, new.description, new.rules, new.variations,
+                            new.keywords, new.materials_list, new.skills_developed);
                END
            """)
-            
+
            conn.execute("""
                CREATE TRIGGER IF NOT EXISTS activities_fts_delete AFTER DELETE ON activities
                BEGIN
-                    DELETE FROM activities_fts WHERE rowid = old.id;
+                    INSERT INTO activities_fts(activities_fts, rowid, name, description, rules,
+                                               variations, keywords, materials_list, skills_developed)
+                    VALUES ('delete', old.id, old.name, old.description, old.rules,
+                            old.variations, old.keywords, old.materials_list, old.skills_developed);
                END
            """)
-            
+
            conn.execute("""
                CREATE TRIGGER IF NOT EXISTS activities_fts_update AFTER UPDATE ON activities
                BEGIN
-                    DELETE FROM activities_fts WHERE rowid = old.id;
-                    INSERT INTO activities_fts(rowid, name, description, rules, variations, keywords)
-                    VALUES (new.id, new.name, new.description, new.rules, new.variations, new.keywords);
+                    INSERT INTO activities_fts(activities_fts, rowid, name, description, rules,
+                                               variations, keywords, materials_list, skills_developed)
+                    VALUES ('delete', old.id, old.name, old.description, old.rules,
+                            old.variations, old.keywords, old.materials_list, old.skills_developed);
+                    INSERT INTO activities_fts(rowid, name, description, rules, variations,
+                                               keywords, materials_list, skills_developed)
+                    VALUES (new.id, new.name, new.description, new.rules, new.variations,
+                            new.keywords, new.materials_list, new.skills_developed);
                END
            """)
            
@@ -179,6 +203,8 @@ class DatabaseManager:
        """Update category usage counts"""
        categories_to_update = [
            ('category', activity.category),
+            ('content_type', activity.content_type),
+            ('language', activity.language),
            ('age_group', activity.get_age_range_display()),
            ('participants', activity.get_participants_display()),
            ('duration', activity.get_duration_display()),
@@ -332,8 +358,11 @@ class DatabaseManager:
    def clear_database(self):
        """Clear all data from database"""
        with self._get_connection() as conn:
+            # Deleting from activities fires the delete trigger, which removes
+            # the matching FTS rows. The explicit 'delete-all' command then
+            # guarantees the external-content FTS index is fully cleared.
            conn.execute("DELETE FROM activities")
-            conn.execute("DELETE FROM activities_fts")
+            conn.execute("INSERT INTO activities_fts(activities_fts) VALUES('delete-all')")
            conn.execute("DELETE FROM categories")
            conn.commit()
    
--- a/app/services/init.py
+++ b/app/services/init.py
@@ -2,8 +2,6 @@
 Services for INDEX-SISTEM-JOCURI v2.0
 """

-from .parser import IndexMasterParser
-from .indexer import ActivityIndexer
 from .search import SearchService

-__all__ = ['IndexMasterParser', 'ActivityIndexer', 'SearchService']
+__all__ = ['SearchService']
--- a/app/services/indexer.py
+++ b/app/services/indexer.py
@@ -1,248 +0,0 @@
-"""
-Activity indexer service for INDEX-SISTEM-JOCURI v2.0
-Coordinates parsing and database indexing
-"""
-
-from typing import List, Dict, Any
-from pathlib import Path
-from app.models.database import DatabaseManager
-from app.models.activity import Activity
-from app.services.parser import IndexMasterParser
-import time
-
-class ActivityIndexer:
-    """Service for indexing activities from INDEX_MASTER into database"""
-    
-    def __init__(self, db_manager: DatabaseManager, index_master_path: str):
-        """Initialize indexer with database manager and INDEX_MASTER path"""
-        self.db = db_manager
-        self.parser = IndexMasterParser(index_master_path)
-        self.indexing_stats = {}
-    
-    def index_all_activities(self, clear_existing: bool = False) -> Dict[str, Any]:
-        """Index all activities from INDEX_MASTER into database"""
-        
-        print("🚀 Starting activity indexing process...")
-        start_time = time.time()
-        
-        # Clear existing data if requested
-        if clear_existing:
-            print("🗑️  Clearing existing database...")
-            self.db.clear_database()
-        
-        # Parse activities from INDEX_MASTER
-        print("📖 Parsing INDEX_MASTER file...")
-        activities = self.parser.parse_all_categories()
-        
-        if not activities:
-            print("❌ No activities were parsed!")
-            return {'success': False, 'error': 'No activities parsed'}
-        
-        # Filter valid activities
-        valid_activities = []
-        for activity in activities:
-            if self.parser.validate_activity_completeness(activity):
-                valid_activities.append(activity)
-            else:
-                print(f"⚠️  Skipping incomplete activity: {activity.name[:50]}...")
-        
-        print(f"✅ Validated {len(valid_activities)} activities out of {len(activities)} parsed")
-        
-        if len(valid_activities) < 100:
-            print(f"⚠️  Warning: Only {len(valid_activities)} valid activities found. Expected 500+")
-        
-        # Bulk insert into database
-        print("💾 Inserting activities into database...")
-        try:
-            inserted_count = self.db.bulk_insert_activities(valid_activities)
-            
-            # Rebuild FTS index for optimal search performance
-            print("🔍 Rebuilding search index...")
-            self.db.rebuild_fts_index()
-            
-            end_time = time.time()
-            indexing_time = end_time - start_time
-            
-            # Generate final statistics (with error handling)
-            try:
-                stats = self._generate_indexing_stats(valid_activities, indexing_time)
-                stats['inserted_count'] = inserted_count
-                stats['success'] = True
-            except Exception as e:
-                print(f"⚠️  Error generating statistics: {e}")
-                stats = {
-                    'success': True,
-                    'inserted_count': inserted_count,
-                    'indexing_time_seconds': indexing_time,
-                    'error': f'Stats generation failed: {str(e)}'
-                }
-            
-            print(f"✅ Indexing complete! {inserted_count} activities indexed in {indexing_time:.2f}s")
-            
-            # Verify database state (with error handling)
-            try:
-                db_stats = self.db.get_statistics()
-                print(f"📊 Database now contains {db_stats['total_activities']} activities")
-            except Exception as e:
-                print(f"⚠️  Error getting database statistics: {e}")
-                print(f"📊 Database insertion completed, statistics unavailable")
-            
-            return stats
-            
-        except Exception as e:
-            print(f"❌ Error during database insertion: {e}")
-            return {'success': False, 'error': str(e)}
-    
-    def index_specific_category(self, category_code: str) -> Dict[str, Any]:
-        """Index activities from a specific category only"""
-        
-        print(f"🎯 Indexing specific category: {category_code}")
-        
-        # Load content and parse specific category
-        if not self.parser.load_content():
-            return {'success': False, 'error': 'Could not load INDEX_MASTER'}
-        
-        category_name = self.parser.category_mapping.get(category_code)
-        if not category_name:
-            return {'success': False, 'error': f'Unknown category code: {category_code}'}
-        
-        activities = self.parser.parse_category_section(category_code, category_name)
-        
-        if not activities:
-            return {'success': False, 'error': f'No activities found in category {category_code}'}
-        
-        # Filter valid activities
-        valid_activities = [a for a in activities if self.parser.validate_activity_completeness(a)]
-        
-        try:
-            inserted_count = self.db.bulk_insert_activities(valid_activities)
-            return {
-                'success': True,
-                'category': category_name,
-                'inserted_count': inserted_count,
-                'total_parsed': len(activities),
-                'valid_activities': len(valid_activities)
-            }
-        except Exception as e:
-            return {'success': False, 'error': str(e)}
-    
-    def _generate_indexing_stats(self, activities: List[Activity], indexing_time: float) -> Dict[str, Any]:
-        """Generate comprehensive indexing statistics"""
-        
-        # Get parser statistics
-        parser_stats = self.parser.get_parsing_statistics()
-        
-        # Calculate additional metrics
-        categories = {}
-        age_ranges = {}
-        durations = {}
-        materials = {}
-        
-        for activity in activities:
-            # Category breakdown
-            if activity.category in categories:
-                categories[activity.category] += 1
-            else:
-                categories[activity.category] = 1
-            
-            # Age range analysis (with safety check)
-            try:
-                age_key = activity.get_age_range_display() or "nespecificat"
-                age_ranges[age_key] = age_ranges.get(age_key, 0) + 1
-            except Exception as e:
-                print(f"Warning: Error getting age range for activity {activity.name}: {e}")
-                age_ranges["nespecificat"] = age_ranges.get("nespecificat", 0) + 1
-            
-            # Duration analysis (with safety check)
-            try:
-                duration_key = activity.get_duration_display() or "nespecificat"
-                durations[duration_key] = durations.get(duration_key, 0) + 1
-            except Exception as e:
-                print(f"Warning: Error getting duration for activity {activity.name}: {e}")
-                durations["nespecificat"] = durations.get("nespecificat", 0) + 1
-            
-            # Materials analysis (with safety check)
-            try:
-                materials_key = activity.get_materials_display() or "nespecificat"
-                materials[materials_key] = materials.get(materials_key, 0) + 1
-            except Exception as e:
-                print(f"Warning: Error getting materials for activity {activity.name}: {e}")
-                materials["nespecificat"] = materials.get("nespecificat", 0) + 1
-        
-        return {
-            'indexing_time_seconds': indexing_time,
-            'parsing_stats': parser_stats,
-            'distribution': {
-                'categories': categories,
-                'age_ranges': age_ranges,
-                'durations': durations,
-                'materials': materials
-            },
-            'quality_metrics': {
-                'completion_rate': parser_stats.get('completion_rate', 0),
-                'average_description_length': parser_stats.get('average_description_length', 0),
-                'activities_with_metadata': sum(1 for a in activities if a.age_group_min or a.participants_min or a.duration_min)
-            }
-        }
-    
-    def verify_indexing_quality(self) -> Dict[str, Any]:
-        """Verify the quality of indexed data"""
-        
-        try:
-            # Get database statistics
-            db_stats = self.db.get_statistics()
-            
-            # Check for minimum activity count
-            total_activities = db_stats['total_activities']
-            meets_minimum = total_activities >= 500
-            
-            # Check category distribution
-            categories = db_stats.get('categories', {})
-            category_coverage = len(categories)
-            
-            # Sample some activities to check quality
-            sample_activities = self.db.search_activities(limit=10)
-            
-            quality_issues = []
-            for activity in sample_activities:
-                if not activity.get('description') or len(activity['description']) < 10:
-                    quality_issues.append(f"Activity {activity.get('name', 'Unknown')} has insufficient description")
-                
-                if not activity.get('category'):
-                    quality_issues.append(f"Activity {activity.get('name', 'Unknown')} missing category")
-            
-            return {
-                'total_activities': total_activities,
-                'meets_minimum_requirement': meets_minimum,
-                'minimum_target': 500,
-                'category_coverage': category_coverage,
-                'expected_categories': len(self.parser.category_mapping),
-                'quality_issues': quality_issues,
-                'quality_score': max(0, 100 - len(quality_issues) * 10),
-                'database_stats': db_stats
-            }
-            
-        except Exception as e:
-            return {'error': str(e), 'quality_score': 0}
-    
-    def get_indexing_progress(self) -> Dict[str, Any]:
-        """Get current indexing progress and status"""
-        try:
-            db_stats = self.db.get_statistics()
-            
-            # Calculate progress towards 500+ activities goal
-            total_activities = db_stats['total_activities']
-            target_activities = 500
-            progress_percentage = min(100, (total_activities / target_activities) * 100)
-            
-            return {
-                'current_activities': total_activities,
-                'target_activities': target_activities,
-                'progress_percentage': progress_percentage,
-                'status': 'completed' if total_activities >= target_activities else 'in_progress',
-                'categories_indexed': list(db_stats.get('categories', {}).keys()),
-                'database_size_mb': db_stats.get('database_size_bytes', 0) / (1024 * 1024)
-            }
-            
-        except Exception as e:
-            return {'error': str(e), 'status': 'error'}
--- a/app/services/parser.py
+++ b/app/services/parser.py
@@ -1,340 +0,0 @@
-"""
-Advanced parser for INDEX_MASTER_JOCURI_ACTIVITATI.md
-Extracts 500+ individual activities with full details
-"""
-
-import re
-from pathlib import Path
-from typing import List, Dict, Optional, Tuple
-from app.models.activity import Activity
-
-class IndexMasterParser:
-    """Advanced parser for extracting real activities from INDEX_MASTER"""
-    
-    def __init__(self, index_file_path: str):
-        """Initialize parser with INDEX_MASTER file path"""
-        self.index_file_path = Path(index_file_path)
-        self.content = ""
-        self.activities = []
-        
-        # Category mapping for main sections (exact match from file)
-        self.category_mapping = {
-            '[A]': 'JOCURI CERCETĂȘEȘTI ȘI SCOUT',
-            '[B]': 'TEAM BUILDING ȘI COMUNICARE',
-            '[C]': 'CAMPING ȘI ACTIVITĂȚI EXTERIOR', 
-            '[D]': 'ESCAPE ROOM ȘI PUZZLE-URI',
-            '[E]': 'ORIENTARE ȘI BUSOLE',
-            '[F]': 'PRIMUL AJUTOR ȘI SIGURANȚA',
-            '[G]': 'ACTIVITĂȚI EDUCAȚIONALE',
-            '[H]': 'RESURSE SPECIALE'
-        }
-    
-    def load_content(self) -> bool:
-        """Load and validate INDEX_MASTER content"""
-        try:
-            if not self.index_file_path.exists():
-                print(f"❌ INDEX_MASTER file not found: {self.index_file_path}")
-                return False
-            
-            with open(self.index_file_path, 'r', encoding='utf-8') as f:
-                self.content = f.read()
-            
-            if len(self.content) < 1000:  # Sanity check
-                print(f"⚠️  INDEX_MASTER file seems too small: {len(self.content)} chars")
-                return False
-            
-            print(f"✅ Loaded INDEX_MASTER: {len(self.content)} characters")
-            return True
-            
-        except Exception as e:
-            print(f"❌ Error loading INDEX_MASTER: {e}")
-            return False
-    
-    def parse_all_categories(self) -> List[Activity]:
-        """Parse all categories and extract individual activities"""
-        if not self.load_content():
-            return []
-        
-        print("🔍 Starting comprehensive parsing of INDEX_MASTER...")
-        
-        # Parse each main category
-        for category_code, category_name in self.category_mapping.items():
-            print(f"\n📂 Processing category {category_code}: {category_name}")
-            category_activities = self.parse_category_section(category_code, category_name)
-            self.activities.extend(category_activities)
-            print(f"   ✅ Extracted {len(category_activities)} activities")
-        
-        print(f"\n🎯 Total activities extracted: {len(self.activities)}")
-        return self.activities
-    
-    def parse_category_section(self, category_code: str, category_name: str) -> List[Activity]:
-        """Parse a specific category section"""
-        activities = []
-        
-        # Find the category section - exact pattern match
-        # Look for the actual section, not the table of contents
-        pattern = rf"^## {re.escape(category_code)} {re.escape(category_name)}\s*$"
-        matches = list(re.finditer(pattern, self.content, re.MULTILINE | re.IGNORECASE))
-        
-        if not matches:
-            print(f"   ⚠️  Category section not found: {category_code}")
-            return activities
-        
-        # Take the last match (should be the actual section, not TOC)
-        match = matches[-1]
-        print(f"   📍 Found section at position {match.start()}")
-        
-        # Extract content until next main category or end
-        start_pos = match.end()
-        
-        # Find next main category (look for complete header)
-        next_category_pattern = r"^## \[[A-H]\] [A-ZĂÂÎȘȚ]"
-        next_match = re.search(next_category_pattern, self.content[start_pos:], re.MULTILINE)
-        
-        if next_match:
-            end_pos = start_pos + next_match.start()
-            section_content = self.content[start_pos:end_pos]
-        else:
-            section_content = self.content[start_pos:]
-        
-        # Parse subsections within the category
-        activities.extend(self._parse_subsections(section_content, category_name))
-        
-        return activities
-    
-    def _parse_subsections(self, section_content: str, category_name: str) -> List[Activity]:
-        """Parse subsections within a category"""
-        activities = []
-        
-        # Find all subsections (### markers)
-        subsection_pattern = r"^### (.+?)$"
-        subsections = re.finditer(subsection_pattern, section_content, re.MULTILINE)
-        
-        subsection_list = list(subsections)
-        
-        for i, subsection in enumerate(subsection_list):
-            subsection_title = subsection.group(1).strip()
-            subsection_start = subsection.end()
-            
-            # Find end of subsection
-            if i + 1 < len(subsection_list):
-                subsection_end = subsection_list[i + 1].start()
-            else:
-                subsection_end = len(section_content)
-            
-            subsection_text = section_content[subsection_start:subsection_end]
-            
-            # Parse individual games in this subsection
-            subsection_activities = self._parse_games_in_subsection(
-                subsection_text, category_name, subsection_title
-            )
-            activities.extend(subsection_activities)
-        
-        return activities
-    
-    def _parse_games_in_subsection(self, subsection_text: str, category_name: str, subsection_title: str) -> List[Activity]:
-        """Parse individual games within a subsection"""
-        activities = []
-        
-        # Look for "Exemple de jocuri:" sections
-        examples_pattern = r"\*\*Exemple de jocuri:\*\*\s*\n(.*?)(?=\n\*\*|$)"
-        examples_matches = re.finditer(examples_pattern, subsection_text, re.DOTALL)
-        
-        for examples_match in examples_matches:
-            examples_text = examples_match.group(1)
-            
-            # Extract individual games (numbered list)
-            game_pattern = r"^(\d+)\.\s*\*\*(.+?)\*\*\s*-\s*(.+?)$"
-            games = re.finditer(game_pattern, examples_text, re.MULTILINE)
-            
-            for game_match in games:
-                game_number = game_match.group(1)
-                game_name = game_match.group(2).strip()
-                game_description = game_match.group(3).strip()
-                
-                # Extract metadata from subsection
-                metadata = self._extract_subsection_metadata(subsection_text)
-                
-                # Create activity
-                activity = Activity(
-                    name=game_name,
-                    description=game_description,
-                    category=category_name,
-                    subcategory=subsection_title,
-                    source_file=f"INDEX_MASTER_JOCURI_ACTIVITATI.md",
-                    page_reference=f"{category_name} > {subsection_title} > #{game_number}",
-                    **metadata
-                )
-                
-                activities.append(activity)
-        
-        # Also extract from direct activity descriptions without "Exemple de jocuri"
-        activities.extend(self._parse_direct_activities(subsection_text, category_name, subsection_title))
-        
-        return activities
-    
-    def _extract_subsection_metadata(self, subsection_text: str) -> Dict:
-        """Extract metadata from subsection text"""
-        metadata = {}
-        
-        # Extract participants info
-        participants_pattern = r"\*\*Participanți:\*\*\s*(.+?)(?:\n|\*\*)"
-        participants_match = re.search(participants_pattern, subsection_text)
-        if participants_match:
-            participants_text = participants_match.group(1).strip()
-            participants = self._parse_participants(participants_text)
-            metadata.update(participants)
-        
-        # Extract duration
-        duration_pattern = r"\*\*Durata:\*\*\s*(.+?)(?:\n|\*\*)"
-        duration_match = re.search(duration_pattern, subsection_text)
-        if duration_match:
-            duration_text = duration_match.group(1).strip()
-            duration = self._parse_duration(duration_text)
-            metadata.update(duration)
-        
-        # Extract materials
-        materials_pattern = r"\*\*Materiale:\*\*\s*(.+?)(?:\n|\*\*)"
-        materials_match = re.search(materials_pattern, subsection_text)
-        if materials_match:
-            materials_text = materials_match.group(1).strip()
-            metadata['materials_list'] = materials_text
-            metadata['materials_category'] = self._categorize_materials(materials_text)
-        
-        # Extract keywords
-        keywords_pattern = r"\*\*Cuvinte cheie:\*\*\s*(.+?)(?:\n|\*\*)"
-        keywords_match = re.search(keywords_pattern, subsection_text)
-        if keywords_match:
-            metadata['keywords'] = keywords_match.group(1).strip()
-        
-        return metadata
-    
-    def _parse_participants(self, participants_text: str) -> Dict:
-        """Parse participants information"""
-        result = {}
-        
-        # Look for number ranges like "8-30 copii" or "5-15 persoane"
-        range_pattern = r"(\d+)-(\d+)"
-        range_match = re.search(range_pattern, participants_text)
-        
-        if range_match:
-            result['participants_min'] = int(range_match.group(1))
-            result['participants_max'] = int(range_match.group(2))
-        else:
-            # Look for single numbers
-            number_pattern = r"(\d+)\+"
-            number_match = re.search(number_pattern, participants_text)
-            if number_match:
-                result['participants_min'] = int(number_match.group(1))
-        
-        # Extract age information
-        age_pattern = r"(\d+)-(\d+)\s*ani"
-        age_match = re.search(age_pattern, participants_text)
-        if age_match:
-            result['age_group_min'] = int(age_match.group(1))
-            result['age_group_max'] = int(age_match.group(2))
-        
-        return result
-    
-    def _parse_duration(self, duration_text: str) -> Dict:
-        """Parse duration information"""
-        result = {}
-        
-        # Look for time ranges like "5-20 minute" or "15-30min"
-        range_pattern = r"(\d+)-(\d+)\s*(?:minute|min)"
-        range_match = re.search(range_pattern, duration_text)
-        
-        if range_match:
-            result['duration_min'] = int(range_match.group(1))
-            result['duration_max'] = int(range_match.group(2))
-        else:
-            # Look for single duration
-            single_pattern = r"(\d+)\+?\s*(?:minute|min)"
-            single_match = re.search(single_pattern, duration_text)
-            if single_match:
-                result['duration_min'] = int(single_match.group(1))
-        
-        return result
-    
-    def _categorize_materials(self, materials_text: str) -> str:
-        """Categorize materials into simple categories"""
-        materials_lower = materials_text.lower()
-        
-        if any(word in materials_lower for word in ['fără', 'nu necesare', 'nimic', 'minime']):
-            return 'Fără materiale'
-        elif any(word in materials_lower for word in ['hârtie', 'creion', 'marker', 'simple']):
-            return 'Materiale simple'
-        elif any(word in materials_lower for word in ['computer', 'proiector', 'echipament', 'complexe']):
-            return 'Materiale complexe'
-        else:
-            return 'Materiale variate'
-    
-    def _parse_direct_activities(self, subsection_text: str, category_name: str, subsection_title: str) -> List[Activity]:
-        """Parse activities that are described directly without 'Exemple de jocuri' section"""
-        activities = []
-        
-        # Look for activity descriptions in sections that don't have "Exemple de jocuri"
-        if "**Exemple de jocuri:**" not in subsection_text:
-            # Try to extract from file descriptions
-            file_pattern = r"\*\*Fișier:\*\*\s*`([^`]+)`.*?\*\*(.+?)\*\*"
-            file_matches = re.finditer(file_pattern, subsection_text, re.DOTALL)
-            
-            for file_match in file_matches:
-                file_name = file_match.group(1)
-                description_part = file_match.group(2)
-                
-                # Create a general activity for this file
-                activity = Activity(
-                    name=f"Activități din {file_name}",
-                    description=f"Colecție de activități din fișierul {file_name}. {description_part[:200]}...",
-                    category=category_name,
-                    subcategory=subsection_title,
-                    source_file=file_name,
-                    page_reference=f"{category_name} > {subsection_title}",
-                    **self._extract_subsection_metadata(subsection_text)
-                )
-                
-                activities.append(activity)
-        
-        return activities
-    
-    def validate_activity_completeness(self, activity: Activity) -> bool:
-        """Validate that an activity has all necessary fields"""
-        required_fields = ['name', 'description', 'category', 'source_file']
-        
-        for field in required_fields:
-            if not getattr(activity, field) or not getattr(activity, field).strip():
-                return False
-        
-        # Check minimum description length
-        if len(activity.description) < 10:
-            return False
-        
-        return True
-    
-    def get_parsing_statistics(self) -> Dict:
-        """Get statistics about the parsing process"""
-        if not self.activities:
-            return {'total_activities': 0}
-        
-        category_counts = {}
-        valid_activities = 0
-        
-        for activity in self.activities:
-            # Count by category
-            if activity.category in category_counts:
-                category_counts[activity.category] += 1
-            else:
-                category_counts[activity.category] = 1
-            
-            # Count valid activities
-            if self.validate_activity_completeness(activity):
-                valid_activities += 1
-        
-        return {
-            'total_activities': len(self.activities),
-            'valid_activities': valid_activities,
-            'completion_rate': (valid_activities / len(self.activities)) * 100 if self.activities else 0,
-            'category_breakdown': category_counts,
-            'average_description_length': sum(len(a.description) for a in self.activities) / len(self.activities) if self.activities else 0
-        }
--- a/app/services/search.py
+++ b/app/services/search.py
@@ -5,8 +5,19 @@ Enhanced search with FTS5 and intelligent filtering

 from typing import List, Dict, Any, Optional
 from app.models.database import DatabaseManager
+from app.config_taxonomy import NON_GAME_CONTENT_TYPES
 import re

+# Category slugs that are themselves "non-game" — selecting one of these as a
+# category filter also lifts the default non-game content_type exclusion.
+NON_GAME_CATEGORIES = {"retete", "cantece-ceremonii"}
+
+# When a Python-side post-filter is active the DB LIMIT is applied *before*
+# filtering, so we over-fetch to still satisfy the caller's `limit`.
+_OVERSCAN_FACTOR = 5
+_OVERSCAN_CAP = 2000
+
+
 class SearchService:
    """Enhanced search service with intelligent query processing"""
    
@@ -24,22 +35,72 @@ class SearchService:
        
        if filters is None:
            filters = {}
-        
+
        # Process and normalize search text
        processed_search = self._process_search_text(search_text)
-        
+
        # Map web filters to database fields
        db_filters = self._map_filters_to_db_fields(filters)
-        
+
+        # content_type and language are filtered in Python: the DB layer does
+        # not expose them as query parameters. The DEFAULT search excludes the
+        # non-game content types (rețete / cântece / ceremonii) — they surface
+        # only when the user explicitly filters that content_type, or picks a
+        # non-game category. See plan §6.
+        content_type, exclude_non_game = self._resolve_content_type_filter(filters)
+        language = (filters.get('language') or '').strip().lower() or None
+        post_filtering = bool(content_type or exclude_non_game or language)
+
+        # Over-fetch when post-filtering so the final list can still reach `limit`.
+        fetch_limit = min(limit * _OVERSCAN_FACTOR, _OVERSCAN_CAP) if post_filtering else limit
+
        # Perform database search
        results = self.db.search_activities(
            search_text=processed_search,
            **db_filters,
-            limit=limit
+            limit=fetch_limit
        )
-        
-        # Post-process results for relevance and ranking
-        return self._post_process_results(results, processed_search, filters)
+
+        # Apply content_type / language post-filters
+        results = self._apply_content_type_filter(results, content_type, exclude_non_game)
+        if language:
+            results = [r for r in results
+                       if (r.get('language') or '').strip().lower() == language]
+
+        # Post-process results for relevance and ranking, then honour `limit`
+        results = self._post_process_results(results, processed_search, filters)
+        return results[:limit]
+
+    def _resolve_content_type_filter(self, filters: Dict[str, str]):
+        """Determine the content_type post-filter.
+
+        Returns (explicit_content_type | None, exclude_non_game: bool):
+        - an explicit `content_type` filter → that value, no exclusion;
+        - a `category` filter on a non-game category → no exclusion;
+        - otherwise → default search, exclude non-game content types.
+        """
+        content_type = (filters.get('content_type') or '').strip()
+        if content_type:
+            return content_type, False
+        category = (filters.get('category') or '').strip()
+        if category in NON_GAME_CATEGORIES:
+            return None, False
+        return None, True
+
+    def _apply_content_type_filter(self,
+                                   results: List[Dict[str, Any]],
+                                   content_type: Optional[str],
+                                   exclude_non_game: bool) -> List[Dict[str, Any]]:
+        """Filter results by content_type (explicit include vs default exclude)."""
+        if content_type:
+            return [r for r in results
+                    if (r.get('content_type') or '') == content_type]
+        if exclude_non_game:
+            # Rows with NULL/unknown content_type are kept — only the known
+            # non-game types are dropped from the default search.
+            return [r for r in results
+                    if (r.get('content_type') or '') not in NON_GAME_CONTENT_TYPES]
+        return results
    
    def _process_search_text(self, search_text: Optional[str]) -> Optional[str]:
        """Process and enhance search text for better FTS5 results"""
@@ -83,10 +144,16 @@ class SearchService:
            if not filter_value or not filter_value.strip():
                continue
            
+            # content_type / language are NOT database query params — they are
+            # applied as Python post-filters in search_activities(). Skip them
+            # here so they never reach DatabaseManager.search_activities().
+            if filter_key in ('content_type', 'language'):
+                continue
+
            # Map filter types to database fields
            if filter_key == 'category':
                db_filters['category'] = filter_value
-            
+
            elif filter_key == 'age_group':
                # Parse age range (e.g., "5-8 ani", "12+ ani")
                age_match = re.search(r'(\d+)(?:-(\d+))?\s*ani?', filter_value)
@@ -177,21 +244,22 @@ class SearchService:
            boost_score = 0
            
            # Check name matches (highest priority)
-            name_lower = result.get('name', '').lower()
+            # NB: use `or ''` — nullable columns come back as None, not ''.
+            name_lower = (result.get('name') or '').lower()
            for term in search_terms:
                if term in name_lower:
                    boost_score += 10
                    if name_lower.startswith(term):
                        boost_score += 5  # Extra boost for name starts with term
-            
+
            # Check description matches
-            desc_lower = result.get('description', '').lower()
+            desc_lower = (result.get('description') or '').lower()
            for term in search_terms:
                if term in desc_lower:
                    boost_score += 3
-            
+
            # Check keywords matches
-            keywords_lower = result.get('keywords', '').lower()
+            keywords_lower = (result.get('keywords') or '').lower()
            for term in search_terms:
                if term in keywords_lower:
                    boost_score += 5
@@ -280,11 +348,14 @@ class SearchService:
            return []
        
        try:
-            # Search for activities that match the partial query
+            # Search for activities that match the partial query.
+            # Over-fetch then drop non-game content types so autocomplete
+            # mirrors the default search (no rețete / cântece / ceremonii).
            results = self.db.search_activities(
                search_text=f'"{partial_query}"',
-                limit=limit * 2
+                limit=limit * 6
            )
+            results = self._apply_content_type_filter(results, None, True)
            
            suggestions = []
            seen = set()
--- a/app/templates/activity.html
+++ b/app/templates/activity.html
@@ -15,7 +15,13 @@
    <header class="activity-detail-header">
        <div class="activity-title-section">
            <h1 class="activity-detail-title">{{ activity.name }}</h1>
-            <span class="activity-category-badge">{{ activity.category }}</span>
+            <span class="activity-category-badge">{{ display_names.get(activity.category, activity.category) }}</span>
+            {% if activity.content_type %}
+            <span class="activity-content-type-badge">{{ display_names.get(activity.content_type, activity.content_type) }}</span>
+            {% endif %}
+            {% if activity.needs_review %}
+            <span class="activity-badge needs-review" title="Această activitate necesită verificare">⚠ De verificat</span>
+            {% endif %}
        </div>
        
        {% if activity.subcategory %}
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -36,7 +36,31 @@
                    <select name="category" id="category" class="filter-select">
                        <option value="">Toate categoriile</option>
                        {% for category in filters.category %}
-                        <option value="{{ category }}">{{ category }}</option>
+                        <option value="{{ category }}">{{ display_names.get(category, category) }}</option>
+                        {% endfor %}
+                    </select>
+                </div>
+                {% endif %}
+
+                {% if filters.content_type %}
+                <div class="filter-group">
+                    <label for="content_type" class="filter-label">Tip conținut</label>
+                    <select name="content_type" id="content_type" class="filter-select">
+                        <option value="">Doar jocuri și activități</option>
+                        {% for content_type in filters.content_type %}
+                        <option value="{{ content_type }}">{{ display_names.get(content_type, content_type) }}</option>
+                        {% endfor %}
+                    </select>
+                </div>
+                {% endif %}
+
+                {% if filters.language %}
+                <div class="filter-group">
+                    <label for="language" class="filter-label">Limbă</label>
+                    <select name="language" id="language" class="filter-select">
+                        <option value="">Toate limbile</option>
+                        {% for language in filters.language %}
+                        <option value="{{ language }}">{{ display_names.get(language, language) }}</option>
                        {% endfor %}
                    </select>
                </div>
--- a/app/templates/results.html
+++ b/app/templates/results.html
@@ -24,7 +24,29 @@
                <option value="">Toate categoriile</option>
                {% for category in filters.category %}
                <option value="{{ category }}" {% if applied_filters.category == category %}selected{% endif %}>
-                    {{ category }}
+                    {{ display_names.get(category, category) }}
+                </option>
+                {% endfor %}
+            </select>
+            {% endif %}
+
+            {% if filters.content_type %}
+            <select name="content_type" class="filter-select compact">
+                <option value="">Doar jocuri și activități</option>
+                {% for content_type in filters.content_type %}
+                <option value="{{ content_type }}" {% if applied_filters.content_type == content_type %}selected{% endif %}>
+                    {{ display_names.get(content_type, content_type) }}
+                </option>
+                {% endfor %}
+            </select>
+            {% endif %}
+
+            {% if filters.language %}
+            <select name="language" class="filter-select compact">
+                <option value="">Toate limbile</option>
+                {% for language in filters.language %}
+                <option value="{{ language }}" {% if applied_filters.language == language %}selected{% endif %}>
+                    {{ display_names.get(language, language) }}
                </option>
                {% endfor %}
            </select>
@@ -109,7 +131,10 @@
                        {{ activity.name }}
                    </a>
                </h3>
-                <span class="activity-category">{{ activity.category }}</span>
+                <span class="activity-category">{{ display_names.get(activity.category, activity.category) }}</span>
+                {% if activity.needs_review %}
+                <span class="activity-badge needs-review" title="Această activitate necesită verificare">⚠ De verificat</span>
+                {% endif %}
            </header>

            <div class="activity-content">
--- a/app/web/routes.py
+++ b/app/web/routes.py
@@ -7,11 +7,17 @@ from flask import Blueprint, request, render_template, jsonify, current_app
 from app.models.database import DatabaseManager
 from app.models.activity import Activity
 from app.services.search import SearchService
+from app.config_taxonomy import CATEGORIES, CONTENT_TYPES
 import os
 from pathlib import Path

 bp = Blueprint('main', __name__)

+# Slug -> Romanian display name. Category and content_type slugs never collide,
+# so a single flat map is enough for the UI filter labels.
+LANGUAGE_NAMES = {'ro': 'Română', 'en': 'Engleză'}
+DISPLAY_NAMES = {**CATEGORIES, **CONTENT_TYPES, **LANGUAGE_NAMES}
+
 # Initialize database manager (will be configured in application factory)
 def get_db_manager():
    """Get database manager instance"""
@@ -36,15 +42,17 @@ def index():
        # Get database statistics for the interface
        stats = db.get_statistics()
        
-        return render_template('index.html', 
+        return render_template('index.html',
                             filters=filter_options,
+                             display_names=DISPLAY_NAMES,
                             stats=stats)
-    
+
    except Exception as e:
        print(f"Error loading main page: {e}")
        # Fallback with empty filters
-        return render_template('index.html', 
+        return render_template('index.html',
                             filters={},
+                             display_names=DISPLAY_NAMES,
                             stats={'total_activities': 0})

@bp.route('/search', methods=['GET', 'POST'])
@@ -82,8 +90,9 @@ def search():
                             search_query=search_query,
                             applied_filters=filters,
                             filters=filter_options,
+                             display_names=DISPLAY_NAMES,
                             results_count=len(activities))
-    
+
    except Exception as e:
        print(f"Search error: {e}")
        return render_template('results.html',
@@ -91,6 +100,7 @@ def search():
                             search_query='',
                             applied_filters={},
                             filters={},
+                             display_names=DISPLAY_NAMES,
                             results_count=0,
                             error=str(e))

@@ -121,6 +131,7 @@ def activity_detail(activity_id):
        
        return render_template('activity.html',
                             activity=activity,
+                             display_names=DISPLAY_NAMES,
                             similar_activities=similar_activities)
    
    except Exception as e: