""" Activity data model for INDEX-SISTEM-JOCURI v2.0 """ from dataclasses import dataclass, field from typing import List, Optional, Dict, Any import json import re import unicodedata def normalize_name(name: str) -> str: """Diacritic-free, lowercased, whitespace-collapsed form of a name. Used as the exact-match key for dedup grouping (see plan §4). """ if not name: return "" decomposed = unicodedata.normalize("NFKD", name) ascii_str = "".join(c for c in decomposed if not unicodedata.combining(c)) ascii_str = ascii_str.lower().strip() ascii_str = re.sub(r"\s+", " ", ascii_str) return ascii_str @dataclass class Activity: """Activity data model with comprehensive fields""" # Basic information name: str description: str rules: Optional[str] = None variations: Optional[str] = None # Categories category: str = "" subcategory: Optional[str] = None # content_type is an axis INDEPENDENT of category: # one of joc/activitate/reteta/cantec/ceremonie (see config_taxonomy). content_type: Optional[str] = None # Source information source_file: str = "" page_reference: Optional[str] = None # source_files: JSON-encoded list of every source the activity was seen in. # `source_file` (singular) stays as the primary/original source; build_database # (Lane C) accumulates the full list here on dedup-merge. source_files: List[str] = field(default_factory=list) # Short verbatim quote from the source — anti-hallucination anchor. source_excerpt: Optional[str] = None # Age and participants age_group_min: Optional[int] = None age_group_max: Optional[int] = None participants_min: Optional[int] = None participants_max: Optional[int] = None # Duration duration_min: Optional[int] = None # minutes duration_max: Optional[int] = None # minutes # Materials and setup materials_category: Optional[str] = None materials_list: Optional[str] = None skills_developed: Optional[str] = None difficulty_level: Optional[str] = None # Search and metadata keywords: Optional[str] = None tags: List[str] = field(default_factory=list) popularity_score: int = 0 # Extraction / language metadata language: Optional[str] = None # 'ro' / 'en' normalized_name: Optional[str] = None # dedup key; auto-derived from name extraction_confidence: Optional[str] = None # 'high' / 'med' / 'low' needs_review: int = 0 # Database fields id: Optional[int] = None created_at: Optional[str] = None updated_at: Optional[str] = None def __post_init__(self): """Derive normalized_name from name when not explicitly provided.""" if not self.normalized_name: self.normalized_name = normalize_name(self.name) def to_dict(self) -> Dict[str, Any]: """Convert activity to dictionary for database storage""" return { 'name': self.name, 'description': self.description, 'rules': self.rules, 'variations': self.variations, 'category': self.category, 'subcategory': self.subcategory, 'content_type': self.content_type, 'source_file': self.source_file, 'source_files': json.dumps(self.source_files) if self.source_files else None, 'page_reference': self.page_reference, 'source_excerpt': self.source_excerpt, 'age_group_min': self.age_group_min, 'age_group_max': self.age_group_max, 'participants_min': self.participants_min, 'participants_max': self.participants_max, 'duration_min': self.duration_min, 'duration_max': self.duration_max, 'materials_category': self.materials_category, 'materials_list': self.materials_list, 'skills_developed': self.skills_developed, 'difficulty_level': self.difficulty_level, 'keywords': self.keywords, 'tags': json.dumps(self.tags) if self.tags else None, 'popularity_score': self.popularity_score, 'language': self.language, 'normalized_name': self.normalized_name or normalize_name(self.name), 'extraction_confidence': self.extraction_confidence, 'needs_review': self.needs_review, } @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'Activity': """Create activity from dictionary""" # Parse tags from JSON if present tags = [] if data.get('tags'): try: tags = json.loads(data['tags']) except (json.JSONDecodeError, TypeError): tags = [] # source_files may arrive as a JSON string (DB) or a list (extraction) source_files = data.get('source_files') if isinstance(source_files, str): try: source_files = json.loads(source_files) except (json.JSONDecodeError, TypeError): source_files = [] elif source_files is None: source_files = [] return cls( id=data.get('id'), name=data.get('name', ''), description=data.get('description', ''), rules=data.get('rules'), variations=data.get('variations'), category=data.get('category', ''), subcategory=data.get('subcategory'), content_type=data.get('content_type'), source_file=data.get('source_file', ''), source_files=source_files, page_reference=data.get('page_reference'), source_excerpt=data.get('source_excerpt'), age_group_min=data.get('age_group_min'), age_group_max=data.get('age_group_max'), participants_min=data.get('participants_min'), participants_max=data.get('participants_max'), duration_min=data.get('duration_min'), duration_max=data.get('duration_max'), materials_category=data.get('materials_category'), materials_list=data.get('materials_list'), skills_developed=data.get('skills_developed'), difficulty_level=data.get('difficulty_level'), keywords=data.get('keywords'), tags=tags, popularity_score=data.get('popularity_score', 0), language=data.get('language'), normalized_name=data.get('normalized_name'), extraction_confidence=data.get('extraction_confidence'), needs_review=data.get('needs_review', 0) or 0, created_at=data.get('created_at'), updated_at=data.get('updated_at') ) def get_age_range_display(self) -> str: """Get formatted age range for display""" if self.age_group_min and self.age_group_max: return f"{self.age_group_min}-{self.age_group_max} ani" elif self.age_group_min: return f"{self.age_group_min}+ ani" elif self.age_group_max: return f"până la {self.age_group_max} ani" return "toate vârstele" def get_participants_display(self) -> str: """Get formatted participants range for display""" if self.participants_min and self.participants_max: return f"{self.participants_min}-{self.participants_max} persoane" elif self.participants_min: return f"{self.participants_min}+ persoane" elif self.participants_max: return f"până la {self.participants_max} persoane" return "orice număr" def get_duration_display(self) -> str: """Get formatted duration for display""" if self.duration_min and self.duration_max: return f"{self.duration_min}-{self.duration_max} minute" elif self.duration_min: return f"{self.duration_min}+ minute" elif self.duration_max: return f"până la {self.duration_max} minute" return "durată variabilă" def get_materials_display(self) -> str: """Get formatted materials for display""" if self.materials_category: return self.materials_category elif self.materials_list: return self.materials_list[:100] + "..." if len(self.materials_list) > 100 else self.materials_list return "nu specificate"