Refactor extraction system and reorganize project structure
- Remove obsolete documentation files (DEPLOYMENT.md, PLAN_IMPLEMENTARE_S8_DETALIAT.md, README.md) - Add comprehensive extraction pipeline with multiple format support (PDF, HTML, text) - Implement Claude-based activity extraction with structured templates - Update dependencies and Docker configuration - Reorganize scripts directory with modular extraction components - Move example documentation to appropriate location 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
50
scripts/run_extraction.py
Normal file
50
scripts/run_extraction.py
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Main extraction orchestrator
|
||||
Ruleaza intregul proces de extractie
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from unified_processor import UnifiedProcessor
|
||||
from import_claude_activities import ClaudeActivityImporter
|
||||
|
||||
def main():
|
||||
print("="*60)
|
||||
print("ACTIVITY EXTRACTION SYSTEM")
|
||||
print("Strategy S8: Hybrid Claude + Scripts")
|
||||
print("="*60)
|
||||
|
||||
# Step 1: Run automated extraction
|
||||
print("\nSTEP 1: Automated Extraction")
|
||||
print("-"*40)
|
||||
processor = UnifiedProcessor()
|
||||
processor.process_automated_formats()
|
||||
|
||||
# Step 2: Wait for Claude processing
|
||||
print("\n" + "="*60)
|
||||
print("STEP 2: Manual Claude Processing Required")
|
||||
print("-"*40)
|
||||
print("Please process PDF/DOC files with Claude using the template.")
|
||||
print("Files are listed in: pdf_doc_for_claude.txt")
|
||||
print("Save extracted activities as JSON in: scripts/extracted_activities/")
|
||||
print("="*60)
|
||||
|
||||
response = input("\nHave you completed Claude processing? (y/n): ")
|
||||
|
||||
if response.lower() == 'y':
|
||||
# Step 3: Import Claude-extracted activities
|
||||
print("\nSTEP 3: Importing Claude-extracted activities")
|
||||
print("-"*40)
|
||||
importer = ClaudeActivityImporter()
|
||||
importer.import_all_json_files()
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("EXTRACTION COMPLETE!")
|
||||
print("="*60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user