Refactor extraction system and reorganize project structure

- Remove obsolete documentation files (DEPLOYMENT.md, PLAN_IMPLEMENTARE_S8_DETALIAT.md, README.md)
- Add comprehensive extraction pipeline with multiple format support (PDF, HTML, text)
- Implement Claude-based activity extraction with structured templates
- Update dependencies and Docker configuration
- Reorganize scripts directory with modular extraction components
- Move example documentation to appropriate location

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-09-11 23:32:37 +03:00
parent 1b6b7e06ad
commit a19ddf0b71
119 changed files with 91074 additions and 1859 deletions

View File

@@ -0,0 +1,78 @@
#!/usr/bin/env python3
"""
Import activities extracted by Claude from JSON files
"""
import json
import sqlite3
from pathlib import Path
from datetime import datetime
class ClaudeActivityImporter:
def __init__(self, db_path='data/activities.db'):
self.db_path = db_path
self.json_dir = Path('scripts/extracted_activities')
self.json_dir.mkdir(exist_ok=True)
def import_json_file(self, json_path):
"""Import activities from a single JSON file"""
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
source_file = data.get('source_file', str(json_path))
activities = data.get('activities', [])
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
imported = 0
for activity in activities:
try:
# Add source file and timestamp
activity['source_file'] = source_file
activity['created_at'] = datetime.now().isoformat()
# Prepare insert
columns = list(activity.keys())
values = list(activity.values())
placeholders = ['?' for _ in values]
# Check for duplicate
cursor.execute(
"SELECT id FROM activities WHERE name = ? AND source_file = ?",
(activity.get('name'), source_file)
)
if not cursor.fetchone():
query = f"INSERT INTO activities ({', '.join(columns)}) VALUES ({', '.join(placeholders)})"
cursor.execute(query, values)
imported += 1
except Exception as e:
print(f"Error importing activity: {e}")
conn.commit()
conn.close()
print(f"Imported {imported} activities from {json_path.name}")
return imported
def import_all_json_files(self):
"""Import all JSON files from the extracted_activities directory"""
json_files = list(self.json_dir.glob("*.json"))
if not json_files:
print("No JSON files found in extracted_activities directory")
return 0
total_imported = 0
for json_file in json_files:
imported = self.import_json_file(json_file)
total_imported += imported
print(f"\nTotal imported: {total_imported} activities from {len(json_files)} files")
return total_imported
if __name__ == "__main__":
importer = ClaudeActivityImporter()
importer.import_all_json_files()