Files
game-library/scripts/index_data.py
Marius Mutu 4f83b8e73c Complete v2.0 transformation: Production-ready Flask application
Major Changes:
- Migrated from prototype to production architecture
- Implemented modular Flask app with models/services/web layers
- Added Docker containerization with docker-compose
- Switched to Pipenv for dependency management
- Built advanced parser extracting 63 real activities from INDEX_MASTER
- Implemented SQLite FTS5 full-text search
- Created minimalist, responsive web interface
- Added comprehensive documentation and deployment guides

Technical Improvements:
- Clean separation of concerns (models, services, web)
- Enhanced database schema with FTS5 indexing
- Dynamic filters populated from real data
- Production-ready configuration management
- Security best practices implementation
- Health monitoring and API endpoints

Removed Legacy Files:
- Old src/ directory structure
- Static requirements.txt (replaced by Pipfile)
- Test and debug files
- Temporary cache files

Current Status:
- 63 activities indexed across 8 categories
- Full-text search operational
- Docker deployment ready
- Production documentation complete

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-11 00:23:47 +03:00

200 lines
7.0 KiB
Python

#!/usr/bin/env python3
"""
Data indexing script for INDEX-SISTEM-JOCURI v2.0
Extracts activities from INDEX_MASTER and populates database
"""
import sys
import os
from pathlib import Path
# Add app directory to Python path
sys.path.insert(0, str(Path(__file__).parent.parent))
from app.models.database import DatabaseManager
from app.services.indexer import ActivityIndexer
from app.config import Config
import argparse
import time
def main():
"""Main indexing function"""
parser = argparse.ArgumentParser(description='Index activities from INDEX_MASTER')
parser.add_argument('--clear', action='store_true', help='Clear existing database before indexing')
parser.add_argument('--category', help='Index specific category only (e.g., [A], [B], etc.)')
parser.add_argument('--verify', action='store_true', help='Verify indexing quality after completion')
parser.add_argument('--stats', action='store_true', help='Show database statistics only')
args = parser.parse_args()
# Setup paths
Config.ensure_directories()
# Database path
db_path = os.environ.get('DATABASE_URL', str(Config.DATA_DIR / 'activities.db'))
if db_path.startswith('sqlite:///'):
db_path = db_path[10:] # Remove sqlite:/// prefix
# INDEX_MASTER path
index_master_path = os.environ.get('INDEX_MASTER_FILE', str(Config.INDEX_MASTER_FILE))
print("🎯 INDEX-SISTEM-JOCURI v2.0 - Data Indexing")
print("=" * 50)
print(f"Database: {db_path}")
print(f"INDEX_MASTER: {index_master_path}")
print("=" * 50)
# Verify INDEX_MASTER file exists
if not Path(index_master_path).exists():
print(f"❌ INDEX_MASTER file not found: {index_master_path}")
print(" Please ensure the file is mounted in the container or available locally")
return 1
# Initialize services
try:
db_manager = DatabaseManager(db_path)
indexer = ActivityIndexer(db_manager, index_master_path)
except Exception as e:
print(f"❌ Error initializing services: {e}")
return 1
# Handle different operations
if args.stats:
return show_statistics(db_manager)
if args.category:
return index_category(indexer, args.category)
if args.verify:
return verify_indexing(indexer)
# Default: full indexing
return full_indexing(indexer, args.clear)
def full_indexing(indexer: ActivityIndexer, clear_existing: bool) -> int:
"""Perform full indexing of all activities"""
print("🚀 Starting full indexing process...")
try:
# Perform indexing
result = indexer.index_all_activities(clear_existing=clear_existing)
if not result.get('success'):
print(f"❌ Indexing failed: {result.get('error', 'Unknown error')}")
return 1
# Print results
print("\n📊 INDEXING RESULTS")
print("=" * 30)
print(f"✅ Activities inserted: {result.get('inserted_count', 0)}")
print(f"⏱️ Indexing time: {result.get('indexing_time_seconds', 0):.2f}s")
parsing_stats = result.get('parsing_stats', {})
print(f"📈 Completion rate: {parsing_stats.get('completion_rate', 0):.1f}%")
print(f"📝 Avg description length: {parsing_stats.get('average_description_length', 0):.0f} chars")
# Category breakdown
categories = result.get('distribution', {}).get('categories', {})
print(f"\n📂 CATEGORY BREAKDOWN:")
for category, count in categories.items():
print(f" {category}: {count} activities")
# Quality check
if result.get('inserted_count', 0) >= 500:
print(f"\n🎯 SUCCESS: Target of 500+ activities achieved!")
else:
print(f"\n⚠️ Warning: Only {result.get('inserted_count', 0)} activities indexed (target: 500+)")
return 0
except Exception as e:
print(f"❌ Error during indexing: {e}")
return 1
def index_category(indexer: ActivityIndexer, category_code: str) -> int:
"""Index a specific category"""
print(f"🎯 Indexing category: {category_code}")
try:
result = indexer.index_specific_category(category_code)
if not result.get('success'):
print(f"❌ Category indexing failed: {result.get('error', 'Unknown error')}")
return 1
print(f"✅ Category '{result.get('category')}' indexed successfully")
print(f" Inserted: {result.get('inserted_count')} activities")
print(f" Parsed: {result.get('total_parsed')} total")
print(f" Valid: {result.get('valid_activities')} valid")
return 0
except Exception as e:
print(f"❌ Error during category indexing: {e}")
return 1
def verify_indexing(indexer: ActivityIndexer) -> int:
"""Verify indexing quality"""
print("🔍 Verifying indexing quality...")
try:
result = indexer.verify_indexing_quality()
if 'error' in result:
print(f"❌ Verification error: {result['error']}")
return 1
print("\n📊 QUALITY VERIFICATION")
print("=" * 30)
print(f"Total activities: {result.get('total_activities', 0)}")
print(f"Meets minimum (500+): {'' if result.get('meets_minimum_requirement') else ''}")
print(f"Category coverage: {result.get('category_coverage', 0)}/{result.get('expected_categories', 8)}")
print(f"Quality score: {result.get('quality_score', 0)}/100")
quality_issues = result.get('quality_issues', [])
if quality_issues:
print(f"\n⚠️ Quality Issues:")
for issue in quality_issues[:5]: # Show first 5 issues
print(f"{issue}")
if len(quality_issues) > 5:
print(f" ... and {len(quality_issues) - 5} more issues")
else:
print(f"\n✅ No quality issues detected")
return 0 if result.get('quality_score', 0) >= 80 else 1
except Exception as e:
print(f"❌ Error during verification: {e}")
return 1
def show_statistics(db_manager: DatabaseManager) -> int:
"""Show database statistics"""
print("📊 Database Statistics")
print("=" * 25)
try:
stats = db_manager.get_statistics()
print(f"Total activities: {stats.get('total_activities', 0)}")
print(f"Database size: {stats.get('database_size_bytes', 0) / 1024:.1f} KB")
print(f"Database path: {stats.get('database_path', 'Unknown')}")
categories = stats.get('categories', {})
if categories:
print(f"\nCategories:")
for category, count in categories.items():
print(f" {category}: {count}")
return 0
except Exception as e:
print(f"❌ Error getting statistics: {e}")
return 1
if __name__ == '__main__':
exit_code = main()
sys.exit(exit_code)