clawd/dashboard/api.py

#!/usr/bin/env python3
"""
Simple API server for Echo Task Board.
Handles YouTube summarization requests.
"""

import json
import subprocess
import sys
import re
import os
from http.server import HTTPServer, SimpleHTTPRequestHandler
from urllib.parse import parse_qs, urlparse
from datetime import datetime
from pathlib import Path

BASE_DIR = Path(__file__).parent.parent
TOOLS_DIR = BASE_DIR / 'tools'
NOTES_DIR = BASE_DIR / 'kb' / 'youtube'
KANBAN_DIR = BASE_DIR / 'dashboard'

class TaskBoardHandler(SimpleHTTPRequestHandler):

    def do_POST(self):
        if self.path == '/api/youtube':
            self.handle_youtube()
        elif self.path == '/api/files':
            self.handle_files_post()
        elif self.path == '/api/refresh-index':
            self.handle_refresh_index()
        else:
            self.send_error(404)

    def handle_refresh_index(self):
        """Regenerate kb/index.json"""
        try:
            script = TOOLS_DIR / 'update_notes_index.py'
            result = subprocess.run(
                [sys.executable, str(script)],
                capture_output=True,
                text=True,
                timeout=30
            )

            if result.returncode == 0:
                # Parse output for stats
                output = result.stdout
                total_match = re.search(r'with (\d+) notes', output)
                total = int(total_match.group(1)) if total_match else 0

                self.send_json({
                    'success': True,
                    'message': f'Index regenerat cu {total} notițe',
                    'total': total,
                    'output': output
                })
            else:
                self.send_json({
                    'success': False,
                    'error': result.stderr or 'Unknown error'
                }, 500)
        except subprocess.TimeoutExpired:
            self.send_json({'success': False, 'error': 'Timeout'}, 500)
        except Exception as e:
            self.send_json({'success': False, 'error': str(e)}, 500)

    def handle_files_post(self):
        """Save file content."""
        try:
            content_length = int(self.headers['Content-Length'])
            post_data = self.rfile.read(content_length).decode('utf-8')
            data = json.loads(post_data)

            path = data.get('path', '')
            content = data.get('content', '')

            workspace = Path('/home/moltbot/clawd')
            target = (workspace / path).resolve()

            if not str(target).startswith(str(workspace)):
                self.send_json({'error': 'Access denied'}, 403)
                return

            # Create parent dirs if needed
            target.parent.mkdir(parents=True, exist_ok=True)

            # Write file
            target.write_text(content, encoding='utf-8')

            self.send_json({
                'status': 'saved',
                'path': path,
                'size': len(content)
            })
        except Exception as e:
            self.send_json({'error': str(e)}, 500)

    def do_GET(self):
        if self.path == '/api/status':
            self.send_json({'status': 'ok', 'time': datetime.now().isoformat()})
        elif self.path == '/api/git' or self.path.startswith('/api/git?'):
            self.handle_git_status()
        elif self.path == '/api/agents' or self.path.startswith('/api/agents?'):
            self.handle_agents_status()
        elif self.path == '/api/cron' or self.path.startswith('/api/cron?'):
            self.handle_cron_status()
        elif self.path == '/api/activity' or self.path.startswith('/api/activity?'):
            self.handle_activity()
        elif self.path.startswith('/api/files'):
            self.handle_files_get()
        elif self.path.startswith('/api/diff'):
            self.handle_git_diff()
        elif self.path.startswith('/api/'):
            self.send_error(404)
        else:
            # Serve static files
            super().do_GET()

    def handle_git_status(self):
        """Get git status for dashboard."""
        try:
            workspace = Path('/home/moltbot/clawd')

            # Get current branch
            branch = subprocess.run(
                ['git', 'branch', '--show-current'],
                cwd=workspace, capture_output=True, text=True, timeout=5
            ).stdout.strip()

            # Get last commit
            last_commit = subprocess.run(
                ['git', 'log', '-1', '--format=%h|%s|%cr'],
                cwd=workspace, capture_output=True, text=True, timeout=5
            ).stdout.strip()

            commit_parts = last_commit.split('|') if last_commit else ['', '', '']

            # Get uncommitted files
            status_output = subprocess.run(
                ['git', 'status', '--short'],
                cwd=workspace, capture_output=True, text=True, timeout=5
            ).stdout.strip()

            uncommitted = status_output.split('\n') if status_output else []
            uncommitted = [f for f in uncommitted if f.strip()]

            # Get diff stats if there are uncommitted files
            diff_stat = ''
            if uncommitted:
                diff_stat = subprocess.run(
                    ['git', 'diff', '--stat', '--cached'],
                    cwd=workspace, capture_output=True, text=True, timeout=5
                ).stdout.strip()
                if not diff_stat:
                    diff_stat = subprocess.run(
                        ['git', 'diff', '--stat'],
                        cwd=workspace, capture_output=True, text=True, timeout=5
                    ).stdout.strip()

            # Parse uncommitted into structured format
            uncommitted_parsed = []
            for line in uncommitted:
                if len(line) >= 3:
                    status = line[:2].strip()
                    filepath = line[3:].strip()
                    uncommitted_parsed.append({'status': status, 'path': filepath})

            self.send_json({
                'branch': branch,
                'lastCommit': {
                    'hash': commit_parts[0] if len(commit_parts) > 0 else '',
                    'message': commit_parts[1] if len(commit_parts) > 1 else '',
                    'time': commit_parts[2] if len(commit_parts) > 2 else ''
                },
                'uncommitted': uncommitted,
                'uncommittedParsed': uncommitted_parsed,
                'uncommittedCount': len(uncommitted),
                'diffStat': diff_stat,
                'clean': len(uncommitted) == 0
            })
        except Exception as e:
            self.send_json({'error': str(e)}, 500)

    def handle_git_diff(self):
        """Get git diff for a specific file."""
        from urllib.parse import urlparse, parse_qs
        parsed = urlparse(self.path)
        params = parse_qs(parsed.query)

        filepath = params.get('path', [''])[0]

        if not filepath:
            self.send_json({'error': 'path required'}, 400)
            return

        try:
            workspace = Path('/home/moltbot/clawd')

            # Security check
            target = (workspace / filepath).resolve()
            if not str(target).startswith(str(workspace)):
                self.send_json({'error': 'Access denied'}, 403)
                return

            # Get diff (try staged first, then unstaged)
            diff = subprocess.run(
                ['git', 'diff', '--cached', '--', filepath],
                cwd=workspace, capture_output=True, text=True, timeout=10
            ).stdout

            if not diff:
                diff = subprocess.run(
                    ['git', 'diff', '--', filepath],
                    cwd=workspace, capture_output=True, text=True, timeout=10
                ).stdout

            # If still no diff, file might be untracked - show full content
            if not diff:
                status = subprocess.run(
                    ['git', 'status', '--short', '--', filepath],
                    cwd=workspace, capture_output=True, text=True, timeout=5
                ).stdout.strip()

                if status.startswith('??'):
                    # Untracked file - show as new
                    if target.exists():
                        content = target.read_text(encoding='utf-8', errors='replace')[:50000]
                        diff = f"+++ b/{filepath}\n" + '\n'.join(f'+{line}' for line in content.split('\n'))

            self.send_json({
                'path': filepath,
                'diff': diff or 'No changes',
                'hasDiff': bool(diff)
            })
        except Exception as e:
            self.send_json({'error': str(e)}, 500)

    def handle_agents_status(self):
        """Get agents status - fast version reading session files directly."""
        try:
            # Define known agents
            agents_config = [
                {'id': 'echo', 'name': 'Echo', 'emoji': '🌀'},
                {'id': 'echo-work', 'name': 'Work', 'emoji': '⚡'},
                {'id': 'echo-health', 'name': 'Health', 'emoji': '❤️'},
                {'id': 'echo-growth', 'name': 'Growth', 'emoji': '🪜'},
                {'id': 'echo-sprijin', 'name': 'Sprijin', 'emoji': '⭕'},
                {'id': 'echo-scout', 'name': 'Scout', 'emoji': '⚜️'},
            ]

            # Check active sessions by reading session files directly (fast)
            active_agents = set()
            sessions_base = Path.home() / '.clawdbot' / 'agents'

            if sessions_base.exists():
                for agent_dir in sessions_base.iterdir():
                    if agent_dir.is_dir():
                        sessions_file = agent_dir / 'sessions' / 'sessions.json'
                        if sessions_file.exists():
                            try:
                                data = json.loads(sessions_file.read_text())
                                # sessions.json is an object with session keys
                                now = datetime.now().timestamp() * 1000
                                for key, sess in data.items():
                                    if isinstance(sess, dict):
                                        last_active = sess.get('updatedAt', 0)
                                        if now - last_active < 30 * 60 * 1000:  # 30 min
                                            active_agents.add(agent_dir.name)
                                            break
                            except:
                                pass

            # Build response
            agents = []
            for cfg in agents_config:
                agents.append({
                    'id': cfg['id'],
                    'name': cfg['name'],
                    'emoji': cfg['emoji'],
                    'active': cfg['id'] in active_agents
                })

            self.send_json({'agents': agents})
        except Exception as e:
            self.send_json({'error': str(e)}, 500)

    def handle_cron_status(self):
        """Get cron jobs status from ~/.clawdbot/cron/jobs.json"""
        try:
            jobs_file = Path.home() / '.clawdbot' / 'cron' / 'jobs.json'

            if not jobs_file.exists():
                self.send_json({'jobs': [], 'error': 'No jobs file found'})
                return

            data = json.loads(jobs_file.read_text())
            all_jobs = data.get('jobs', [])

            # Filter enabled jobs and format for dashboard
            now_ms = datetime.now().timestamp() * 1000
            today_start = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
            today_start_ms = today_start.timestamp() * 1000

            jobs = []
            for job in all_jobs:
                if not job.get('enabled', False):
                    continue

                # Parse cron expression to get time
                schedule = job.get('schedule', {})
                expr = schedule.get('expr', '')

                # Simple cron parsing for display - convert UTC to Bucharest
                parts = expr.split()
                if len(parts) >= 2:
                    minute = parts[0]
                    hour = parts[1]
                    if minute.isdigit() and (hour.isdigit() or '-' in hour):
                        # Handle hour ranges like "7-17"
                        if '-' in hour:
                            hour_start, hour_end = hour.split('-')
                            hour = hour_start  # Show first hour
                        # Convert UTC to Bucharest (UTC+2 winter, UTC+3 summer)
                        from datetime import timezone as dt_timezone
                        from zoneinfo import ZoneInfo
                        try:
                            bucharest = ZoneInfo('Europe/Bucharest')
                            utc_hour = int(hour)
                            utc_minute = int(minute)
                            # Create UTC datetime for today
                            utc_dt = datetime.now(dt_timezone.utc).replace(hour=utc_hour, minute=utc_minute, second=0, microsecond=0)
                            local_dt = utc_dt.astimezone(bucharest)
                            time_str = f"{local_dt.hour:02d}:{local_dt.minute:02d}"
                        except:
                            time_str = f"{int(hour):02d}:{int(minute):02d}"
                    else:
                        time_str = expr[:15]
                else:
                    time_str = expr[:15]

                # Check if ran today
                state = job.get('state', {})
                last_run = state.get('lastRunAtMs', 0)
                ran_today = last_run >= today_start_ms
                last_status = state.get('lastStatus', 'unknown')

                jobs.append({
                    'id': job.get('id'),
                    'name': job.get('name'),
                    'agentId': job.get('agentId'),
                    'time': time_str,
                    'schedule': expr,
                    'ranToday': ran_today,
                    'lastStatus': last_status if ran_today else None,
                    'lastRunAtMs': last_run,
                    'nextRunAtMs': state.get('nextRunAtMs')
                })

            # Sort by time
            jobs.sort(key=lambda j: j['time'])

            self.send_json({
                'jobs': jobs,
                'total': len(jobs),
                'ranToday': sum(1 for j in jobs if j['ranToday'])
            })
        except Exception as e:
            self.send_json({'error': str(e)}, 500)

    def handle_activity(self):
        """Aggregate activity from multiple sources: cron jobs, git commits, file changes."""
        from datetime import timezone as dt_timezone
        from zoneinfo import ZoneInfo

        try:
            activities = []
            bucharest = ZoneInfo('Europe/Bucharest')
            workspace = Path('/home/moltbot/clawd')

            # 1. Cron jobs ran today
            try:
                result = subprocess.run(
                    ['clawdbot', 'cron', 'list', '--json'],
                    capture_output=True, text=True, timeout=10
                )
                if result.returncode == 0:
                    cron_data = json.loads(result.stdout)
                    today_start = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
                    today_start_ms = today_start.timestamp() * 1000

                    for job in cron_data.get('jobs', []):
                        state = job.get('state', {})
                        last_run = state.get('lastRunAtMs', 0)
                        if last_run >= today_start_ms:
                            run_time = datetime.fromtimestamp(last_run / 1000, tz=dt_timezone.utc)
                            local_time = run_time.astimezone(bucharest)
                            activities.append({
                                'type': 'cron',
                                'icon': 'clock',
                                'text': f"Job: {job.get('name', 'unknown')}",
                                'agent': job.get('agentId', 'echo'),
                                'time': local_time.strftime('%H:%M'),
                                'timestamp': last_run,
                                'status': state.get('lastStatus', 'ok')
                            })
            except:
                pass

            # 2. Git commits (last 24h)
            try:
                result = subprocess.run(
                    ['git', 'log', '--oneline', '--since=24 hours ago', '--format=%H|%s|%at'],
                    cwd=workspace, capture_output=True, text=True, timeout=10
                )
                if result.returncode == 0:
                    for line in result.stdout.strip().split('\n'):
                        if '|' in line:
                            parts = line.split('|')
                            if len(parts) >= 3:
                                commit_hash, message, timestamp = parts[0], parts[1], int(parts[2])
                                commit_time = datetime.fromtimestamp(timestamp, tz=dt_timezone.utc)
                                local_time = commit_time.astimezone(bucharest)
                                activities.append({
                                    'type': 'git',
                                    'icon': 'git-commit',
                                    'text': message[:60] + ('...' if len(message) > 60 else ''),
                                    'agent': 'git',
                                    'time': local_time.strftime('%H:%M'),
                                    'timestamp': timestamp * 1000,
                                    'commitHash': commit_hash[:8]
                                })
            except:
                pass

            # 2b. Git uncommitted files
            try:
                result = subprocess.run(
                    ['git', 'status', '--short'],
                    cwd=workspace, capture_output=True, text=True, timeout=10
                )
                if result.returncode == 0 and result.stdout.strip():
                    for line in result.stdout.strip().split('\n'):
                        if len(line) >= 4:
                            # Git status format: XY filename (XY = 2 chars status)
                            # Handle both "M " and " M" formats
                            status = line[:2]
                            # Find filepath - skip status chars and any spaces
                            filepath = line[2:].lstrip()
                            if not filepath:
                                continue
                            status_clean = status.strip()
                            status_labels = {'M': 'modificat', 'A': 'adăugat', 'D': 'șters', '??': 'nou', 'R': 'redenumit'}
                            status_label = status_labels.get(status_clean, status_clean)
                            activities.append({
                                'type': 'git-file',
                                'icon': 'file-diff',
                                'text': f"{filepath}",
                                'agent': f"git ({status_label})",
                                'time': 'acum',
                                'timestamp': int(datetime.now().timestamp() * 1000),
                                'path': filepath,
                                'gitStatus': status_clean
                            })
            except:
                pass

            # 3. Recent files in kb/ (last 24h)
            try:
                kb_dir = workspace / 'kb'
                cutoff = datetime.now().timestamp() - (24 * 3600)
                for md_file in kb_dir.rglob('*.md'):
                    stat = md_file.stat()
                    if stat.st_mtime > cutoff:
                        file_time = datetime.fromtimestamp(stat.st_mtime, tz=dt_timezone.utc)
                        local_time = file_time.astimezone(bucharest)
                        rel_path = md_file.relative_to(workspace)
                        activities.append({
                            'type': 'file',
                            'icon': 'file-text',
                            'text': f"Fișier: {md_file.name}",
                            'agent': str(rel_path.parent),
                            'time': local_time.strftime('%H:%M'),
                            'timestamp': int(stat.st_mtime * 1000),
                            'path': str(rel_path)
                        })
            except:
                pass

            # 4. Tasks from tasks.json
            try:
                tasks_file = workspace / 'dashboard' / 'tasks.json'
                if tasks_file.exists():
                    tasks_data = json.loads(tasks_file.read_text())
                    for col in tasks_data.get('columns', []):
                        for task in col.get('tasks', []):
                            ts_str = task.get('completed') or task.get('created', '')
                            if ts_str:
                                try:
                                    ts = datetime.fromisoformat(ts_str.replace('Z', '+00:00'))
                                    if ts.timestamp() > (datetime.now().timestamp() - 7 * 24 * 3600):
                                        local_time = ts.astimezone(bucharest)
                                        activities.append({
                                            'type': 'task',
                                            'icon': 'check-circle' if task.get('completed') else 'circle',
                                            'text': task.get('title', ''),
                                            'agent': task.get('agent', 'Echo'),
                                            'time': local_time.strftime('%d %b %H:%M'),
                                            'timestamp': int(ts.timestamp() * 1000),
                                            'status': 'done' if task.get('completed') else col['id']
                                        })
                                except:
                                    pass
            except:
                pass

            # Sort by timestamp descending
            activities.sort(key=lambda x: x.get('timestamp', 0), reverse=True)

            # Limit to 30 items
            activities = activities[:30]

            self.send_json({
                'activities': activities,
                'total': len(activities)
            })
        except Exception as e:
            self.send_json({'error': str(e)}, 500)

    def handle_files_get(self):
        """List files or get file content."""
        from urllib.parse import urlparse, parse_qs
        parsed = urlparse(self.path)
        params = parse_qs(parsed.query)

        path = params.get('path', [''])[0]
        action = params.get('action', ['list'])[0]

        # Security: only allow access within workspace
        workspace = Path('/home/moltbot/clawd')
        try:
            target = (workspace / path).resolve()
            if not str(target).startswith(str(workspace)):
                self.send_json({'error': 'Access denied'}, 403)
                return
        except:
            self.send_json({'error': 'Invalid path'}, 400)
            return

        if action == 'list':
            if not target.exists():
                self.send_json({'error': 'Path not found'}, 404)
                return

            if target.is_file():
                # Return file content
                try:
                    content = target.read_text(encoding='utf-8', errors='replace')
                    self.send_json({
                        'type': 'file',
                        'path': path,
                        'name': target.name,
                        'content': content[:100000],  # Limit to 100KB
                        'size': target.stat().st_size,
                        'truncated': target.stat().st_size > 100000
                    })
                except Exception as e:
                    self.send_json({'error': str(e)}, 500)
            else:
                # List directory
                items = []
                try:
                    for item in sorted(target.iterdir()):
                        stat = item.stat()
                        items.append({
                            'name': item.name,
                            'type': 'dir' if item.is_dir() else 'file',
                            'size': stat.st_size if item.is_file() else None,
                            'mtime': stat.st_mtime,
                            'path': str(item.relative_to(workspace))
                        })
                    self.send_json({
                        'type': 'dir',
                        'path': path,
                        'items': items
                    })
                except Exception as e:
                    self.send_json({'error': str(e)}, 500)
        else:
            self.send_json({'error': 'Unknown action'}, 400)

    def handle_youtube(self):
        try:
            content_length = int(self.headers['Content-Length'])
            post_data = self.rfile.read(content_length).decode('utf-8')
            data = json.loads(post_data)

            url = data.get('url', '').strip()

            if not url or 'youtube.com' not in url and 'youtu.be' not in url:
                self.send_json({'error': 'URL YouTube invalid'}, 400)
                return

            # Process synchronously (simpler, avoids fork issues)
            try:
                print(f"Processing YouTube URL: {url}")
                result = process_youtube(url)
                print(f"Processing result: {result}")
                self.send_json({
                    'status': 'done',
                    'message': 'Notița a fost creată! Refresh pagina Notes.'
                })
            except Exception as e:
                import traceback
                print(f"YouTube processing error: {e}")
                traceback.print_exc()
                self.send_json({
                    'status': 'error',
                    'message': f'Eroare: {str(e)}'
                }, 500)

        except Exception as e:
            self.send_json({'error': str(e)}, 500)

    def send_json(self, data, code=200):
        self.send_response(code)
        self.send_header('Content-Type', 'application/json')
        self.send_header('Access-Control-Allow-Origin', '*')
        self.send_header('Cache-Control', 'no-cache, no-store, must-revalidate')
        self.send_header('Pragma', 'no-cache')
        self.send_header('Expires', '0')
        self.end_headers()
        self.wfile.write(json.dumps(data).encode())

    def do_OPTIONS(self):
        self.send_response(200)
        self.send_header('Access-Control-Allow-Origin', '*')
        self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
        self.send_header('Access-Control-Allow-Headers', 'Content-Type')
        self.end_headers()


def process_youtube(url):
    """Download subtitles, summarize, save note."""
    import time

    # Get video info and subtitles
    yt_dlp = os.path.expanduser('~/.local/bin/yt-dlp')

    # Get title
    result = subprocess.run(
        [yt_dlp, '--dump-json', '--no-download', url],
        capture_output=True, text=True, timeout=30
    )

    if result.returncode != 0:
        print(f"Failed to get video info: {result.stderr}")
        return

    info = json.loads(result.stdout)
    title = info.get('title', 'Unknown')
    duration = info.get('duration', 0)
    video_id = info.get('id', 'unknown')

    # Download subtitles
    temp_dir = Path('/tmp/yt_subs')
    temp_dir.mkdir(exist_ok=True)

    for f in temp_dir.glob('*'):
        f.unlink()

    subprocess.run([
        yt_dlp, '--write-auto-subs', '--sub-langs', 'en',
        '--skip-download', '--sub-format', 'vtt',
        '-o', str(temp_dir / '%(id)s'),
        url
    ], capture_output=True, timeout=120)

    # Find and read subtitle file
    transcript = None
    for sub_file in temp_dir.glob('*.vtt'):
        content = sub_file.read_text(encoding='utf-8', errors='replace')
        transcript = clean_vtt(content)
        break

    if not transcript:
        print("No subtitles found")
        return

    # Create note filename
    date_str = datetime.now().strftime('%Y-%m-%d')
    slug = re.sub(r'[^\w\s-]', '', title.lower())[:50].strip().replace(' ', '-')
    filename = f"{date_str}_{slug}.md"

    # Create simple note (without AI summary for now - just transcript)
    note_content = f"""# {title}

**Video:** {url}
**Duration:** {duration // 60}:{duration % 60:02d}
**Saved:** {date_str}
**Tags:** #youtube #to-summarize

---

## Transcript

{transcript[:15000]}

---

*Notă: Sumarizarea va fi adăugată de Echo.*
"""

    # Save note
    NOTES_DIR.mkdir(parents=True, exist_ok=True)
    note_path = NOTES_DIR / filename
    note_path.write_text(note_content, encoding='utf-8')

    # Update index
    subprocess.run([
        sys.executable, str(TOOLS_DIR / 'update_notes_index.py')
    ], capture_output=True)

    # Add task to kanban
    subprocess.run([
        sys.executable, str(KANBAN_DIR / 'update_task.py'),
        'add', 'in-progress', f'Sumarizare: {title[:30]}...', url, 'medium'
    ], capture_output=True)

    print(f"Created note: {filename}")
    return filename


def clean_vtt(content):
    """Convert VTT to plain text."""
    lines = []
    seen = set()

    for line in content.split('\n'):
        if any([
            line.startswith('WEBVTT'),
            line.startswith('Kind:'),
            line.startswith('Language:'),
            '-->' in line,
            line.strip().startswith('<'),
            not line.strip(),
            re.match(r'^\d+$', line.strip())
        ]):
            continue

        clean = re.sub(r'<[^>]+>', '', line).strip()
        if clean and clean not in seen:
            seen.add(clean)
            lines.append(clean)

    return ' '.join(lines)


if __name__ == '__main__':
    port = 8080
    os.chdir(KANBAN_DIR)

    print(f"Starting Echo Task Board API on port {port}")
    httpd = HTTPServer(('0.0.0.0', port), TaskBoardHandler)
    httpd.serve_forever()