- Remove obsolete documentation files (DEPLOYMENT.md, PLAN_IMPLEMENTARE_S8_DETALIAT.md, README.md) - Add comprehensive extraction pipeline with multiple format support (PDF, HTML, text) - Implement Claude-based activity extraction with structured templates - Update dependencies and Docker configuration - Reorganize scripts directory with modular extraction components - Move example documentation to appropriate location 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
78 lines
2.7 KiB
Python
78 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Import activities extracted by Claude from JSON files
|
|
"""
|
|
|
|
import json
|
|
import sqlite3
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
class ClaudeActivityImporter:
|
|
def __init__(self, db_path='data/activities.db'):
|
|
self.db_path = db_path
|
|
self.json_dir = Path('scripts/extracted_activities')
|
|
self.json_dir.mkdir(exist_ok=True)
|
|
|
|
def import_json_file(self, json_path):
|
|
"""Import activities from a single JSON file"""
|
|
with open(json_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
source_file = data.get('source_file', str(json_path))
|
|
activities = data.get('activities', [])
|
|
|
|
conn = sqlite3.connect(self.db_path)
|
|
cursor = conn.cursor()
|
|
|
|
imported = 0
|
|
for activity in activities:
|
|
try:
|
|
# Add source file and timestamp
|
|
activity['source_file'] = source_file
|
|
activity['created_at'] = datetime.now().isoformat()
|
|
|
|
# Prepare insert
|
|
columns = list(activity.keys())
|
|
values = list(activity.values())
|
|
placeholders = ['?' for _ in values]
|
|
|
|
# Check for duplicate
|
|
cursor.execute(
|
|
"SELECT id FROM activities WHERE name = ? AND source_file = ?",
|
|
(activity.get('name'), source_file)
|
|
)
|
|
|
|
if not cursor.fetchone():
|
|
query = f"INSERT INTO activities ({', '.join(columns)}) VALUES ({', '.join(placeholders)})"
|
|
cursor.execute(query, values)
|
|
imported += 1
|
|
|
|
except Exception as e:
|
|
print(f"Error importing activity: {e}")
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
print(f"Imported {imported} activities from {json_path.name}")
|
|
return imported
|
|
|
|
def import_all_json_files(self):
|
|
"""Import all JSON files from the extracted_activities directory"""
|
|
json_files = list(self.json_dir.glob("*.json"))
|
|
|
|
if not json_files:
|
|
print("No JSON files found in extracted_activities directory")
|
|
return 0
|
|
|
|
total_imported = 0
|
|
for json_file in json_files:
|
|
imported = self.import_json_file(json_file)
|
|
total_imported += imported
|
|
|
|
print(f"\nTotal imported: {total_imported} activities from {len(json_files)} files")
|
|
return total_imported
|
|
|
|
if __name__ == "__main__":
|
|
importer = ClaudeActivityImporter()
|
|
importer.import_all_json_files() |