Rebuild extraction pipeline infrastructure (Faza 0 prep)

Implements the approved plan to replace the broken regex/index-master
extraction with an LLM-subagent pipeline. Four parallel lanes:

Lane A — scripts/extract_common.py (PDF/docx/doc/pptx/html/zip, no
  max_pages truncation), normalize_sources.py, chunk_sources.py
  (~20pg chunks + overlap, manifest registry), activity_schema.json.
Lane B — app/config_taxonomy.py (16 fixed category slugs), schema
  rebuilt from scratch in app/models/ with content_type, language,
  source_files, source_excerpt, normalized_name, extraction_confidence,
  needs_review; FTS5 + 3 triggers extended with materials_list and
  skills_developed.
Lane C — build_database.py (--rebuild, atomic swap, schema + fuzzy
  source_excerpt validation, dedup with needs_review band),
  validate_extractions.py, review_queue.py, new run_extraction.py
  orchestrator, SUBAGENT_PROMPT.md.
Lane D — search.py content_type/language filters (default search
  excludes non-game content), E7 schema-compat audit; fixed a NULL
  keywords AttributeError in _boost_search_relevance.

Removes 8 orphaned/dead scripts and app/services/parser.py +
indexer.py. Adds tests/ (70 passing, 1 skipped — libreoffice absent).

Note: Lane D made one additive edit to app/models/database.py
(_update_category_counts) to surface content_type/language in
get_filter_options, outside its nominal lane boundary but after
Lane B completed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Claude Agent
2026-05-19 17:43:38 +00:00
parent e0080edf85
commit 66ae831c36
37 changed files with 4101 additions and 1881 deletions

View File

@@ -2,8 +2,6 @@
Services for INDEX-SISTEM-JOCURI v2.0
"""
from .parser import IndexMasterParser
from .indexer import ActivityIndexer
from .search import SearchService
__all__ = ['IndexMasterParser', 'ActivityIndexer', 'SearchService']
__all__ = ['SearchService']

View File

@@ -1,248 +0,0 @@
"""
Activity indexer service for INDEX-SISTEM-JOCURI v2.0
Coordinates parsing and database indexing
"""
from typing import List, Dict, Any
from pathlib import Path
from app.models.database import DatabaseManager
from app.models.activity import Activity
from app.services.parser import IndexMasterParser
import time
class ActivityIndexer:
"""Service for indexing activities from INDEX_MASTER into database"""
def __init__(self, db_manager: DatabaseManager, index_master_path: str):
"""Initialize indexer with database manager and INDEX_MASTER path"""
self.db = db_manager
self.parser = IndexMasterParser(index_master_path)
self.indexing_stats = {}
def index_all_activities(self, clear_existing: bool = False) -> Dict[str, Any]:
"""Index all activities from INDEX_MASTER into database"""
print("🚀 Starting activity indexing process...")
start_time = time.time()
# Clear existing data if requested
if clear_existing:
print("🗑️ Clearing existing database...")
self.db.clear_database()
# Parse activities from INDEX_MASTER
print("📖 Parsing INDEX_MASTER file...")
activities = self.parser.parse_all_categories()
if not activities:
print("❌ No activities were parsed!")
return {'success': False, 'error': 'No activities parsed'}
# Filter valid activities
valid_activities = []
for activity in activities:
if self.parser.validate_activity_completeness(activity):
valid_activities.append(activity)
else:
print(f"⚠️ Skipping incomplete activity: {activity.name[:50]}...")
print(f"✅ Validated {len(valid_activities)} activities out of {len(activities)} parsed")
if len(valid_activities) < 100:
print(f"⚠️ Warning: Only {len(valid_activities)} valid activities found. Expected 500+")
# Bulk insert into database
print("💾 Inserting activities into database...")
try:
inserted_count = self.db.bulk_insert_activities(valid_activities)
# Rebuild FTS index for optimal search performance
print("🔍 Rebuilding search index...")
self.db.rebuild_fts_index()
end_time = time.time()
indexing_time = end_time - start_time
# Generate final statistics (with error handling)
try:
stats = self._generate_indexing_stats(valid_activities, indexing_time)
stats['inserted_count'] = inserted_count
stats['success'] = True
except Exception as e:
print(f"⚠️ Error generating statistics: {e}")
stats = {
'success': True,
'inserted_count': inserted_count,
'indexing_time_seconds': indexing_time,
'error': f'Stats generation failed: {str(e)}'
}
print(f"✅ Indexing complete! {inserted_count} activities indexed in {indexing_time:.2f}s")
# Verify database state (with error handling)
try:
db_stats = self.db.get_statistics()
print(f"📊 Database now contains {db_stats['total_activities']} activities")
except Exception as e:
print(f"⚠️ Error getting database statistics: {e}")
print(f"📊 Database insertion completed, statistics unavailable")
return stats
except Exception as e:
print(f"❌ Error during database insertion: {e}")
return {'success': False, 'error': str(e)}
def index_specific_category(self, category_code: str) -> Dict[str, Any]:
"""Index activities from a specific category only"""
print(f"🎯 Indexing specific category: {category_code}")
# Load content and parse specific category
if not self.parser.load_content():
return {'success': False, 'error': 'Could not load INDEX_MASTER'}
category_name = self.parser.category_mapping.get(category_code)
if not category_name:
return {'success': False, 'error': f'Unknown category code: {category_code}'}
activities = self.parser.parse_category_section(category_code, category_name)
if not activities:
return {'success': False, 'error': f'No activities found in category {category_code}'}
# Filter valid activities
valid_activities = [a for a in activities if self.parser.validate_activity_completeness(a)]
try:
inserted_count = self.db.bulk_insert_activities(valid_activities)
return {
'success': True,
'category': category_name,
'inserted_count': inserted_count,
'total_parsed': len(activities),
'valid_activities': len(valid_activities)
}
except Exception as e:
return {'success': False, 'error': str(e)}
def _generate_indexing_stats(self, activities: List[Activity], indexing_time: float) -> Dict[str, Any]:
"""Generate comprehensive indexing statistics"""
# Get parser statistics
parser_stats = self.parser.get_parsing_statistics()
# Calculate additional metrics
categories = {}
age_ranges = {}
durations = {}
materials = {}
for activity in activities:
# Category breakdown
if activity.category in categories:
categories[activity.category] += 1
else:
categories[activity.category] = 1
# Age range analysis (with safety check)
try:
age_key = activity.get_age_range_display() or "nespecificat"
age_ranges[age_key] = age_ranges.get(age_key, 0) + 1
except Exception as e:
print(f"Warning: Error getting age range for activity {activity.name}: {e}")
age_ranges["nespecificat"] = age_ranges.get("nespecificat", 0) + 1
# Duration analysis (with safety check)
try:
duration_key = activity.get_duration_display() or "nespecificat"
durations[duration_key] = durations.get(duration_key, 0) + 1
except Exception as e:
print(f"Warning: Error getting duration for activity {activity.name}: {e}")
durations["nespecificat"] = durations.get("nespecificat", 0) + 1
# Materials analysis (with safety check)
try:
materials_key = activity.get_materials_display() or "nespecificat"
materials[materials_key] = materials.get(materials_key, 0) + 1
except Exception as e:
print(f"Warning: Error getting materials for activity {activity.name}: {e}")
materials["nespecificat"] = materials.get("nespecificat", 0) + 1
return {
'indexing_time_seconds': indexing_time,
'parsing_stats': parser_stats,
'distribution': {
'categories': categories,
'age_ranges': age_ranges,
'durations': durations,
'materials': materials
},
'quality_metrics': {
'completion_rate': parser_stats.get('completion_rate', 0),
'average_description_length': parser_stats.get('average_description_length', 0),
'activities_with_metadata': sum(1 for a in activities if a.age_group_min or a.participants_min or a.duration_min)
}
}
def verify_indexing_quality(self) -> Dict[str, Any]:
"""Verify the quality of indexed data"""
try:
# Get database statistics
db_stats = self.db.get_statistics()
# Check for minimum activity count
total_activities = db_stats['total_activities']
meets_minimum = total_activities >= 500
# Check category distribution
categories = db_stats.get('categories', {})
category_coverage = len(categories)
# Sample some activities to check quality
sample_activities = self.db.search_activities(limit=10)
quality_issues = []
for activity in sample_activities:
if not activity.get('description') or len(activity['description']) < 10:
quality_issues.append(f"Activity {activity.get('name', 'Unknown')} has insufficient description")
if not activity.get('category'):
quality_issues.append(f"Activity {activity.get('name', 'Unknown')} missing category")
return {
'total_activities': total_activities,
'meets_minimum_requirement': meets_minimum,
'minimum_target': 500,
'category_coverage': category_coverage,
'expected_categories': len(self.parser.category_mapping),
'quality_issues': quality_issues,
'quality_score': max(0, 100 - len(quality_issues) * 10),
'database_stats': db_stats
}
except Exception as e:
return {'error': str(e), 'quality_score': 0}
def get_indexing_progress(self) -> Dict[str, Any]:
"""Get current indexing progress and status"""
try:
db_stats = self.db.get_statistics()
# Calculate progress towards 500+ activities goal
total_activities = db_stats['total_activities']
target_activities = 500
progress_percentage = min(100, (total_activities / target_activities) * 100)
return {
'current_activities': total_activities,
'target_activities': target_activities,
'progress_percentage': progress_percentage,
'status': 'completed' if total_activities >= target_activities else 'in_progress',
'categories_indexed': list(db_stats.get('categories', {}).keys()),
'database_size_mb': db_stats.get('database_size_bytes', 0) / (1024 * 1024)
}
except Exception as e:
return {'error': str(e), 'status': 'error'}

View File

@@ -1,340 +0,0 @@
"""
Advanced parser for INDEX_MASTER_JOCURI_ACTIVITATI.md
Extracts 500+ individual activities with full details
"""
import re
from pathlib import Path
from typing import List, Dict, Optional, Tuple
from app.models.activity import Activity
class IndexMasterParser:
"""Advanced parser for extracting real activities from INDEX_MASTER"""
def __init__(self, index_file_path: str):
"""Initialize parser with INDEX_MASTER file path"""
self.index_file_path = Path(index_file_path)
self.content = ""
self.activities = []
# Category mapping for main sections (exact match from file)
self.category_mapping = {
'[A]': 'JOCURI CERCETĂȘEȘTI ȘI SCOUT',
'[B]': 'TEAM BUILDING ȘI COMUNICARE',
'[C]': 'CAMPING ȘI ACTIVITĂȚI EXTERIOR',
'[D]': 'ESCAPE ROOM ȘI PUZZLE-URI',
'[E]': 'ORIENTARE ȘI BUSOLE',
'[F]': 'PRIMUL AJUTOR ȘI SIGURANȚA',
'[G]': 'ACTIVITĂȚI EDUCAȚIONALE',
'[H]': 'RESURSE SPECIALE'
}
def load_content(self) -> bool:
"""Load and validate INDEX_MASTER content"""
try:
if not self.index_file_path.exists():
print(f"❌ INDEX_MASTER file not found: {self.index_file_path}")
return False
with open(self.index_file_path, 'r', encoding='utf-8') as f:
self.content = f.read()
if len(self.content) < 1000: # Sanity check
print(f"⚠️ INDEX_MASTER file seems too small: {len(self.content)} chars")
return False
print(f"✅ Loaded INDEX_MASTER: {len(self.content)} characters")
return True
except Exception as e:
print(f"❌ Error loading INDEX_MASTER: {e}")
return False
def parse_all_categories(self) -> List[Activity]:
"""Parse all categories and extract individual activities"""
if not self.load_content():
return []
print("🔍 Starting comprehensive parsing of INDEX_MASTER...")
# Parse each main category
for category_code, category_name in self.category_mapping.items():
print(f"\n📂 Processing category {category_code}: {category_name}")
category_activities = self.parse_category_section(category_code, category_name)
self.activities.extend(category_activities)
print(f" ✅ Extracted {len(category_activities)} activities")
print(f"\n🎯 Total activities extracted: {len(self.activities)}")
return self.activities
def parse_category_section(self, category_code: str, category_name: str) -> List[Activity]:
"""Parse a specific category section"""
activities = []
# Find the category section - exact pattern match
# Look for the actual section, not the table of contents
pattern = rf"^## {re.escape(category_code)} {re.escape(category_name)}\s*$"
matches = list(re.finditer(pattern, self.content, re.MULTILINE | re.IGNORECASE))
if not matches:
print(f" ⚠️ Category section not found: {category_code}")
return activities
# Take the last match (should be the actual section, not TOC)
match = matches[-1]
print(f" 📍 Found section at position {match.start()}")
# Extract content until next main category or end
start_pos = match.end()
# Find next main category (look for complete header)
next_category_pattern = r"^## \[[A-H]\] [A-ZĂÂÎȘȚ]"
next_match = re.search(next_category_pattern, self.content[start_pos:], re.MULTILINE)
if next_match:
end_pos = start_pos + next_match.start()
section_content = self.content[start_pos:end_pos]
else:
section_content = self.content[start_pos:]
# Parse subsections within the category
activities.extend(self._parse_subsections(section_content, category_name))
return activities
def _parse_subsections(self, section_content: str, category_name: str) -> List[Activity]:
"""Parse subsections within a category"""
activities = []
# Find all subsections (### markers)
subsection_pattern = r"^### (.+?)$"
subsections = re.finditer(subsection_pattern, section_content, re.MULTILINE)
subsection_list = list(subsections)
for i, subsection in enumerate(subsection_list):
subsection_title = subsection.group(1).strip()
subsection_start = subsection.end()
# Find end of subsection
if i + 1 < len(subsection_list):
subsection_end = subsection_list[i + 1].start()
else:
subsection_end = len(section_content)
subsection_text = section_content[subsection_start:subsection_end]
# Parse individual games in this subsection
subsection_activities = self._parse_games_in_subsection(
subsection_text, category_name, subsection_title
)
activities.extend(subsection_activities)
return activities
def _parse_games_in_subsection(self, subsection_text: str, category_name: str, subsection_title: str) -> List[Activity]:
"""Parse individual games within a subsection"""
activities = []
# Look for "Exemple de jocuri:" sections
examples_pattern = r"\*\*Exemple de jocuri:\*\*\s*\n(.*?)(?=\n\*\*|$)"
examples_matches = re.finditer(examples_pattern, subsection_text, re.DOTALL)
for examples_match in examples_matches:
examples_text = examples_match.group(1)
# Extract individual games (numbered list)
game_pattern = r"^(\d+)\.\s*\*\*(.+?)\*\*\s*-\s*(.+?)$"
games = re.finditer(game_pattern, examples_text, re.MULTILINE)
for game_match in games:
game_number = game_match.group(1)
game_name = game_match.group(2).strip()
game_description = game_match.group(3).strip()
# Extract metadata from subsection
metadata = self._extract_subsection_metadata(subsection_text)
# Create activity
activity = Activity(
name=game_name,
description=game_description,
category=category_name,
subcategory=subsection_title,
source_file=f"INDEX_MASTER_JOCURI_ACTIVITATI.md",
page_reference=f"{category_name} > {subsection_title} > #{game_number}",
**metadata
)
activities.append(activity)
# Also extract from direct activity descriptions without "Exemple de jocuri"
activities.extend(self._parse_direct_activities(subsection_text, category_name, subsection_title))
return activities
def _extract_subsection_metadata(self, subsection_text: str) -> Dict:
"""Extract metadata from subsection text"""
metadata = {}
# Extract participants info
participants_pattern = r"\*\*Participanți:\*\*\s*(.+?)(?:\n|\*\*)"
participants_match = re.search(participants_pattern, subsection_text)
if participants_match:
participants_text = participants_match.group(1).strip()
participants = self._parse_participants(participants_text)
metadata.update(participants)
# Extract duration
duration_pattern = r"\*\*Durata:\*\*\s*(.+?)(?:\n|\*\*)"
duration_match = re.search(duration_pattern, subsection_text)
if duration_match:
duration_text = duration_match.group(1).strip()
duration = self._parse_duration(duration_text)
metadata.update(duration)
# Extract materials
materials_pattern = r"\*\*Materiale:\*\*\s*(.+?)(?:\n|\*\*)"
materials_match = re.search(materials_pattern, subsection_text)
if materials_match:
materials_text = materials_match.group(1).strip()
metadata['materials_list'] = materials_text
metadata['materials_category'] = self._categorize_materials(materials_text)
# Extract keywords
keywords_pattern = r"\*\*Cuvinte cheie:\*\*\s*(.+?)(?:\n|\*\*)"
keywords_match = re.search(keywords_pattern, subsection_text)
if keywords_match:
metadata['keywords'] = keywords_match.group(1).strip()
return metadata
def _parse_participants(self, participants_text: str) -> Dict:
"""Parse participants information"""
result = {}
# Look for number ranges like "8-30 copii" or "5-15 persoane"
range_pattern = r"(\d+)-(\d+)"
range_match = re.search(range_pattern, participants_text)
if range_match:
result['participants_min'] = int(range_match.group(1))
result['participants_max'] = int(range_match.group(2))
else:
# Look for single numbers
number_pattern = r"(\d+)\+"
number_match = re.search(number_pattern, participants_text)
if number_match:
result['participants_min'] = int(number_match.group(1))
# Extract age information
age_pattern = r"(\d+)-(\d+)\s*ani"
age_match = re.search(age_pattern, participants_text)
if age_match:
result['age_group_min'] = int(age_match.group(1))
result['age_group_max'] = int(age_match.group(2))
return result
def _parse_duration(self, duration_text: str) -> Dict:
"""Parse duration information"""
result = {}
# Look for time ranges like "5-20 minute" or "15-30min"
range_pattern = r"(\d+)-(\d+)\s*(?:minute|min)"
range_match = re.search(range_pattern, duration_text)
if range_match:
result['duration_min'] = int(range_match.group(1))
result['duration_max'] = int(range_match.group(2))
else:
# Look for single duration
single_pattern = r"(\d+)\+?\s*(?:minute|min)"
single_match = re.search(single_pattern, duration_text)
if single_match:
result['duration_min'] = int(single_match.group(1))
return result
def _categorize_materials(self, materials_text: str) -> str:
"""Categorize materials into simple categories"""
materials_lower = materials_text.lower()
if any(word in materials_lower for word in ['fără', 'nu necesare', 'nimic', 'minime']):
return 'Fără materiale'
elif any(word in materials_lower for word in ['hârtie', 'creion', 'marker', 'simple']):
return 'Materiale simple'
elif any(word in materials_lower for word in ['computer', 'proiector', 'echipament', 'complexe']):
return 'Materiale complexe'
else:
return 'Materiale variate'
def _parse_direct_activities(self, subsection_text: str, category_name: str, subsection_title: str) -> List[Activity]:
"""Parse activities that are described directly without 'Exemple de jocuri' section"""
activities = []
# Look for activity descriptions in sections that don't have "Exemple de jocuri"
if "**Exemple de jocuri:**" not in subsection_text:
# Try to extract from file descriptions
file_pattern = r"\*\*Fișier:\*\*\s*`([^`]+)`.*?\*\*(.+?)\*\*"
file_matches = re.finditer(file_pattern, subsection_text, re.DOTALL)
for file_match in file_matches:
file_name = file_match.group(1)
description_part = file_match.group(2)
# Create a general activity for this file
activity = Activity(
name=f"Activități din {file_name}",
description=f"Colecție de activități din fișierul {file_name}. {description_part[:200]}...",
category=category_name,
subcategory=subsection_title,
source_file=file_name,
page_reference=f"{category_name} > {subsection_title}",
**self._extract_subsection_metadata(subsection_text)
)
activities.append(activity)
return activities
def validate_activity_completeness(self, activity: Activity) -> bool:
"""Validate that an activity has all necessary fields"""
required_fields = ['name', 'description', 'category', 'source_file']
for field in required_fields:
if not getattr(activity, field) or not getattr(activity, field).strip():
return False
# Check minimum description length
if len(activity.description) < 10:
return False
return True
def get_parsing_statistics(self) -> Dict:
"""Get statistics about the parsing process"""
if not self.activities:
return {'total_activities': 0}
category_counts = {}
valid_activities = 0
for activity in self.activities:
# Count by category
if activity.category in category_counts:
category_counts[activity.category] += 1
else:
category_counts[activity.category] = 1
# Count valid activities
if self.validate_activity_completeness(activity):
valid_activities += 1
return {
'total_activities': len(self.activities),
'valid_activities': valid_activities,
'completion_rate': (valid_activities / len(self.activities)) * 100 if self.activities else 0,
'category_breakdown': category_counts,
'average_description_length': sum(len(a.description) for a in self.activities) / len(self.activities) if self.activities else 0
}

View File

@@ -5,8 +5,19 @@ Enhanced search with FTS5 and intelligent filtering
from typing import List, Dict, Any, Optional
from app.models.database import DatabaseManager
from app.config_taxonomy import NON_GAME_CONTENT_TYPES
import re
# Category slugs that are themselves "non-game" — selecting one of these as a
# category filter also lifts the default non-game content_type exclusion.
NON_GAME_CATEGORIES = {"retete", "cantece-ceremonii"}
# When a Python-side post-filter is active the DB LIMIT is applied *before*
# filtering, so we over-fetch to still satisfy the caller's `limit`.
_OVERSCAN_FACTOR = 5
_OVERSCAN_CAP = 2000
class SearchService:
"""Enhanced search service with intelligent query processing"""
@@ -24,22 +35,72 @@ class SearchService:
if filters is None:
filters = {}
# Process and normalize search text
processed_search = self._process_search_text(search_text)
# Map web filters to database fields
db_filters = self._map_filters_to_db_fields(filters)
# content_type and language are filtered in Python: the DB layer does
# not expose them as query parameters. The DEFAULT search excludes the
# non-game content types (rețete / cântece / ceremonii) — they surface
# only when the user explicitly filters that content_type, or picks a
# non-game category. See plan §6.
content_type, exclude_non_game = self._resolve_content_type_filter(filters)
language = (filters.get('language') or '').strip().lower() or None
post_filtering = bool(content_type or exclude_non_game or language)
# Over-fetch when post-filtering so the final list can still reach `limit`.
fetch_limit = min(limit * _OVERSCAN_FACTOR, _OVERSCAN_CAP) if post_filtering else limit
# Perform database search
results = self.db.search_activities(
search_text=processed_search,
**db_filters,
limit=limit
limit=fetch_limit
)
# Post-process results for relevance and ranking
return self._post_process_results(results, processed_search, filters)
# Apply content_type / language post-filters
results = self._apply_content_type_filter(results, content_type, exclude_non_game)
if language:
results = [r for r in results
if (r.get('language') or '').strip().lower() == language]
# Post-process results for relevance and ranking, then honour `limit`
results = self._post_process_results(results, processed_search, filters)
return results[:limit]
def _resolve_content_type_filter(self, filters: Dict[str, str]):
"""Determine the content_type post-filter.
Returns (explicit_content_type | None, exclude_non_game: bool):
- an explicit `content_type` filter → that value, no exclusion;
- a `category` filter on a non-game category → no exclusion;
- otherwise → default search, exclude non-game content types.
"""
content_type = (filters.get('content_type') or '').strip()
if content_type:
return content_type, False
category = (filters.get('category') or '').strip()
if category in NON_GAME_CATEGORIES:
return None, False
return None, True
def _apply_content_type_filter(self,
results: List[Dict[str, Any]],
content_type: Optional[str],
exclude_non_game: bool) -> List[Dict[str, Any]]:
"""Filter results by content_type (explicit include vs default exclude)."""
if content_type:
return [r for r in results
if (r.get('content_type') or '') == content_type]
if exclude_non_game:
# Rows with NULL/unknown content_type are kept — only the known
# non-game types are dropped from the default search.
return [r for r in results
if (r.get('content_type') or '') not in NON_GAME_CONTENT_TYPES]
return results
def _process_search_text(self, search_text: Optional[str]) -> Optional[str]:
"""Process and enhance search text for better FTS5 results"""
@@ -83,10 +144,16 @@ class SearchService:
if not filter_value or not filter_value.strip():
continue
# content_type / language are NOT database query params — they are
# applied as Python post-filters in search_activities(). Skip them
# here so they never reach DatabaseManager.search_activities().
if filter_key in ('content_type', 'language'):
continue
# Map filter types to database fields
if filter_key == 'category':
db_filters['category'] = filter_value
elif filter_key == 'age_group':
# Parse age range (e.g., "5-8 ani", "12+ ani")
age_match = re.search(r'(\d+)(?:-(\d+))?\s*ani?', filter_value)
@@ -177,21 +244,22 @@ class SearchService:
boost_score = 0
# Check name matches (highest priority)
name_lower = result.get('name', '').lower()
# NB: use `or ''` — nullable columns come back as None, not ''.
name_lower = (result.get('name') or '').lower()
for term in search_terms:
if term in name_lower:
boost_score += 10
if name_lower.startswith(term):
boost_score += 5 # Extra boost for name starts with term
# Check description matches
desc_lower = result.get('description', '').lower()
desc_lower = (result.get('description') or '').lower()
for term in search_terms:
if term in desc_lower:
boost_score += 3
# Check keywords matches
keywords_lower = result.get('keywords', '').lower()
keywords_lower = (result.get('keywords') or '').lower()
for term in search_terms:
if term in keywords_lower:
boost_score += 5
@@ -280,11 +348,14 @@ class SearchService:
return []
try:
# Search for activities that match the partial query
# Search for activities that match the partial query.
# Over-fetch then drop non-game content types so autocomplete
# mirrors the default search (no rețete / cântece / ceremonii).
results = self.db.search_activities(
search_text=f'"{partial_query}"',
limit=limit * 2
limit=limit * 6
)
results = self._apply_content_type_filter(results, None, True)
suggestions = []
seen = set()