- Remove obsolete documentation files (DEPLOYMENT.md, PLAN_IMPLEMENTARE_S8_DETALIAT.md, README.md) - Add comprehensive extraction pipeline with multiple format support (PDF, HTML, text) - Implement Claude-based activity extraction with structured templates - Update dependencies and Docker configuration - Reorganize scripts directory with modular extraction components - Move example documentation to appropriate location 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
50 lines
1.4 KiB
Python
50 lines
1.4 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Main extraction orchestrator
|
|
Ruleaza intregul proces de extractie
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from unified_processor import UnifiedProcessor
|
|
from import_claude_activities import ClaudeActivityImporter
|
|
|
|
def main():
|
|
print("="*60)
|
|
print("ACTIVITY EXTRACTION SYSTEM")
|
|
print("Strategy S8: Hybrid Claude + Scripts")
|
|
print("="*60)
|
|
|
|
# Step 1: Run automated extraction
|
|
print("\nSTEP 1: Automated Extraction")
|
|
print("-"*40)
|
|
processor = UnifiedProcessor()
|
|
processor.process_automated_formats()
|
|
|
|
# Step 2: Wait for Claude processing
|
|
print("\n" + "="*60)
|
|
print("STEP 2: Manual Claude Processing Required")
|
|
print("-"*40)
|
|
print("Please process PDF/DOC files with Claude using the template.")
|
|
print("Files are listed in: pdf_doc_for_claude.txt")
|
|
print("Save extracted activities as JSON in: scripts/extracted_activities/")
|
|
print("="*60)
|
|
|
|
response = input("\nHave you completed Claude processing? (y/n): ")
|
|
|
|
if response.lower() == 'y':
|
|
# Step 3: Import Claude-extracted activities
|
|
print("\nSTEP 3: Importing Claude-extracted activities")
|
|
print("-"*40)
|
|
importer = ClaudeActivityImporter()
|
|
importer.import_all_json_files()
|
|
|
|
print("\n" + "="*60)
|
|
print("EXTRACTION COMPLETE!")
|
|
print("="*60)
|
|
|
|
if __name__ == "__main__":
|
|
main() |