""" Controlled category taxonomy for game-library. Single source of truth for activity categories. The DB stores the *slug*; the UI displays the Romanian name. `category` (thematic domain) and `content_type` (form of the content) are INDEPENDENT axes — see plan §2. """ import unicodedata import re from typing import Dict, List, Optional # --- Categories (thematic domain) -------------------------------------------- # slug -> Romanian display name. ~16 fixed slugs; `altele` is the mandatory # fallback and MUST always be present. CATEGORIES: Dict[str, str] = { "jocuri-cercetasesti": "Jocuri cercetășești", "team-building": "Team-building", "icebreakers": "Icebreakers / spargerea gheții", "camp-outdoor": "Tabără și activități în aer liber", "wide-games": "Wide games / jocuri de teren", "orientare": "Orientare", "prim-ajutor": "Prim ajutor", "escape-room-puzzle": "Escape room și puzzle", "creative-stem": "Creativitate și STEM", "sports-active": "Sport și activități fizice", "cantece-ceremonii": "Cântece și ceremonii", "retete": "Rețete", "supravietuire": "Supraviețuire", "integrare-incluziune": "Integrare și incluziune", "conflict-empatie": "Conflict și empatie", "altele": "Altele", } # Mandatory fallback slug. FALLBACK_CATEGORY = "altele" # Ordered list of valid slugs. CATEGORY_SLUGS: List[str] = list(CATEGORIES.keys()) # --- Content type (form of the content) -------------------------------------- # Independent axis from `category`. The UI default search excludes the # non-game content types (see plan §6). CONTENT_TYPES: Dict[str, str] = { "joc": "Joc", "activitate": "Activitate", "reteta": "Rețetă", "cantec": "Cântec", "ceremonie": "Ceremonie", } CONTENT_TYPE_SLUGS: List[str] = list(CONTENT_TYPES.keys()) # Content types considered "non-game" — excluded from the default UI search. NON_GAME_CONTENT_TYPES: List[str] = ["reteta", "cantec", "ceremonie"] DEFAULT_CONTENT_TYPE = "activitate" # --- Aliases ----------------------------------------------------------------- # Map of normalized arbitrary strings -> canonical slug. Keys are already # diacritic-stripped, lowercased and hyphenated (see _slugify). This catches # legacy / messy values from the old DB and common English/Romanian variants. _CATEGORY_ALIASES: Dict[str, str] = { # legacy junk "general-activity": "altele", "general": "altele", "educational": "creative-stem", "d": "altele", "a": "altele", "b": "altele", "c": "altele", # scouting "cercetasie": "jocuri-cercetasesti", "cercetasesti": "jocuri-cercetasesti", "scout": "jocuri-cercetasesti", "scouting": "jocuri-cercetasesti", "scout-games": "jocuri-cercetasesti", "jocuri-cercetasesti": "jocuri-cercetasesti", # team building "teambuilding": "team-building", "team": "team-building", "cooperare": "team-building", # icebreakers "icebreaker": "icebreakers", "spargerea-ghetii": "icebreakers", "cunoastere": "icebreakers", "energizers": "icebreakers", "energizer": "icebreakers", # camp / outdoor "camp": "camp-outdoor", "tabara": "camp-outdoor", "outdoor": "camp-outdoor", "aer-liber": "camp-outdoor", # wide games "wide-game": "wide-games", "jocuri-de-teren": "wide-games", "joc-de-teren": "wide-games", "big-games": "wide-games", # orientare "orienteering": "orientare", "navigatie": "orientare", # prim ajutor "first-aid": "prim-ajutor", "primul-ajutor": "prim-ajutor", # escape room / puzzle "escape-room": "escape-room-puzzle", "escaperoom": "escape-room-puzzle", "puzzle": "escape-room-puzzle", "puzzles": "escape-room-puzzle", "ghicitori": "escape-room-puzzle", # creative / stem "creative": "creative-stem", "creativitate": "creative-stem", "stem": "creative-stem", "arts-and-crafts": "creative-stem", "craft": "creative-stem", "crafts": "creative-stem", "stiinta": "creative-stem", # sports "sport": "sports-active", "sports": "sports-active", "sportive": "sports-active", "active": "sports-active", "miscare": "sports-active", "physical": "sports-active", # songs / ceremonies "cantece": "cantece-ceremonii", "cantec": "cantece-ceremonii", "songs": "cantece-ceremonii", "ceremonii": "cantece-ceremonii", "ceremonie": "cantece-ceremonii", "ceremony": "cantece-ceremonii", # recipes "reteta": "retete", "recipe": "retete", "recipes": "retete", "cooking": "retete", "gatit": "retete", # survival "survival": "supravietuire", "supravietuire": "supravietuire", # inclusion "integrare": "integrare-incluziune", "incluziune": "integrare-incluziune", "inclusion": "integrare-incluziune", # conflict / empathy "conflict": "conflict-empatie", "empatie": "conflict-empatie", "empathy": "conflict-empatie", "rezolvarea-conflictelor": "conflict-empatie", # fallback "altele": "altele", "other": "altele", "others": "altele", "misc": "altele", } def _slugify(value: str) -> str: """Lowercase, strip diacritics, collapse non-alphanumerics to hyphens.""" if not value: return "" # Decompose accents (ă -> a, ș -> s, ț -> t, etc.) decomposed = unicodedata.normalize("NFKD", value) ascii_str = "".join(c for c in decomposed if not unicodedata.combining(c)) ascii_str = ascii_str.lower().strip() ascii_str = re.sub(r"[^a-z0-9]+", "-", ascii_str) return ascii_str.strip("-") def normalize_category(value: str) -> str: """Map an arbitrary string to a valid category slug. Returns one of CATEGORY_SLUGS, falling back to `altele` for anything unrecognised or empty. """ if not value: return FALLBACK_CATEGORY slug = _slugify(str(value)) if not slug: return FALLBACK_CATEGORY # Exact slug match. if slug in CATEGORIES: return slug # Alias match. if slug in _CATEGORY_ALIASES: return _CATEGORY_ALIASES[slug] return FALLBACK_CATEGORY def normalize_content_type(value: str) -> str: """Map an arbitrary string to a valid content_type slug. Returns one of CONTENT_TYPE_SLUGS, falling back to `activitate`. """ if not value: return DEFAULT_CONTENT_TYPE slug = _slugify(str(value)) if slug in CONTENT_TYPES: return slug # Light alias handling for plural / English forms. aliases = { "jocuri": "joc", "game": "joc", "games": "joc", "activitati": "activitate", "activity": "activitate", "retete": "reteta", "recipe": "reteta", "cantece": "cantec", "song": "cantec", "ceremonii": "ceremonie", "ceremony": "ceremonie", } return aliases.get(slug, DEFAULT_CONTENT_TYPE) # --- Indoor / outdoor (enrichment axis) -------------------------------------- # Where the activity is run. Inferred during enrichment when the source is # silent — such inferences are flagged in `estimated_fields`. slug -> RO label. INDOOR_OUTDOOR: Dict[str, str] = { "indoor": "Interior", "outdoor": "Exterior", "either": "Interior sau exterior", } # --- Space needed (enrichment axis) ------------------------------------------ # Rough footprint the activity requires. slug -> RO label. SPACE_NEEDED: Dict[str, str] = { "mic": "Spațiu mic", "mediu": "Spațiu mediu", "mare": "Spațiu mare", } # Aliases for robustness against LLM output variation. Keys are _slugify'd. _INDOOR_OUTDOOR_ALIASES: Dict[str, str] = { "interior": "indoor", "inside": "indoor", "in": "indoor", "exterior": "outdoor", "outside": "outdoor", "out": "outdoor", "aer-liber": "outdoor", "both": "either", "any": "either", "ambele": "either", "interior-exterior": "either", "indoor-outdoor": "either", } _SPACE_NEEDED_ALIASES: Dict[str, str] = { "small": "mic", "redus": "mic", "putin": "mic", "medium": "mediu", "moderat": "mediu", "large": "mare", "big": "mare", "mult": "mare", "spatiu-mic": "mic", "spatiu-mediu": "mediu", "spatiu-mare": "mare", } def normalize_indoor_outdoor(value: str) -> Optional[str]: """Map an arbitrary string to an indoor_outdoor slug, or None. Unlike categories, this has NO mandatory fallback: an unrecognised or empty value yields None (field simply absent), so we never fabricate a location the enrichment did not assert. """ if not value: return None slug = _slugify(str(value)) if slug in INDOOR_OUTDOOR: return slug return _INDOOR_OUTDOOR_ALIASES.get(slug) def normalize_space_needed(value: str) -> Optional[str]: """Map an arbitrary string to a space_needed slug, or None (no fallback).""" if not value: return None slug = _slugify(str(value)) if slug in SPACE_NEEDED: return slug return _SPACE_NEEDED_ALIASES.get(slug) def indoor_outdoor_display_name(slug: str) -> str: """RO display name for an indoor_outdoor slug.""" return INDOOR_OUTDOOR.get(slug, slug) def space_needed_display_name(slug: str) -> str: """RO display name for a space_needed slug.""" return SPACE_NEEDED.get(slug, slug) def is_valid_category(slug: str) -> bool: """True if `slug` is a valid category slug.""" return slug in CATEGORIES def category_display_name(slug: str) -> str: """Romanian display name for a slug (fallback to the slug itself).""" return CATEGORIES.get(slug, slug) def content_type_display_name(slug: str) -> str: """Romanian display name for a content_type slug.""" return CONTENT_TYPES.get(slug, slug)