#!/usr/bin/env python3 """ Content Discovery - Căutare automată de conținut bazată pe interese. Rulează noaptea, pregătește note pentru morning report. Usage: python3 content_discovery.py [--dry-run] """ import os import json import re from datetime import datetime, timedelta from pathlib import Path WORKSPACE = Path(__file__).parent.parent MEMORY_DIR = WORKSPACE / "memory" INSIGHTS_DIR = WORKSPACE / "kb" / "insights" USER_MD = WORKSPACE / "USER.md" # Interese de bază (fallback) BASE_INTERESTS = [ "NLP Sleight of Mouth patterns", "comunicare nonviolentă Marshall Rosenberg", "James Clear atomic habits productivity", "Monica Ion mindset antreprenor", "dezvoltare personală coaching", "Rumi quotes wisdom philosophy", "stoicism practical philosophy", "noua medicină germanică", "post negru fasting benefits", "80/20 principle productivity", "leadership entrepreneurship", ] def get_recent_files(directory: Path, days: int = 3) -> list: """Get files modified in last N days""" cutoff = datetime.now() - timedelta(days=days) files = [] if directory.exists(): for f in directory.glob("*.md"): if f.stat().st_mtime > cutoff.timestamp(): files.append(f) return sorted(files, key=lambda x: x.stat().st_mtime, reverse=True) def extract_topics_from_file(filepath: Path) -> list: """Extract potential topics/keywords from a markdown file""" topics = [] try: content = filepath.read_text(encoding='utf-8') # Extract from headers headers = re.findall(r'^##?\s+(.+)$', content, re.MULTILINE) topics.extend(headers[:5]) # Extract YouTube video titles yt_titles = re.findall(r'^#\s+(.+)$', content, re.MULTILINE) topics.extend(yt_titles[:3]) # Extract @tags tags = re.findall(r'@(\w+)', content) topics.extend(tags[:5]) # Extract bold terms bold = re.findall(r'\*\*([^*]+)\*\*', content) topics.extend([b for b in bold if len(b) < 50][:5]) except Exception as e: print(f" Warning: Could not read {filepath}: {e}") return list(set(topics)) def get_recent_topics() -> list: """Analyze recent memory and insights to find current interests""" recent_topics = [] # Check recent memory files print("Scanning recent memory...") for f in get_recent_files(MEMORY_DIR, days=3): topics = extract_topics_from_file(f) recent_topics.extend(topics) print(f" {f.name}: {len(topics)} topics") # Check recent insights print("Scanning recent insights...") for f in get_recent_files(INSIGHTS_DIR, days=3): topics = extract_topics_from_file(f) recent_topics.extend(topics) print(f" {f.name}: {len(topics)} topics") # Check recent YouTube notes yt_dir = WORKSPACE / "kb" / "youtube" print("Scanning recent YouTube notes...") for f in get_recent_files(yt_dir, days=3): topics = extract_topics_from_file(f) recent_topics.extend(topics) print(f" {f.name}: {len(topics)} topics") return list(set(recent_topics)) def build_search_queries(recent_topics: list, base_interests: list) -> list: """Build search queries with 60% recent, 40% base interests""" queries = [] # Filter and clean topics recent_clean = [t for t in recent_topics if len(t) > 3 and len(t) < 100][:10] # 60% from recent (if available) if recent_clean: # Pick top 2-3 recent topics for topic in recent_clean[:3]: queries.append({ "query": f"{topic} YouTube tutorial", "source": "recent", "topic": topic }) # 40% from base interests (rotate based on day) day_of_year = datetime.now().timetuple().tm_yday rotated_base = base_interests[day_of_year % len(base_interests):] rotated_base.extend(base_interests[:day_of_year % len(base_interests)]) for interest in rotated_base[:2]: queries.append({ "query": interest, "source": "base", "topic": interest }) return queries[:5] # Max 5 queries def save_discovery_plan(queries: list): """Save the discovery plan for the agent to execute""" plan = { "generated_at": datetime.now().isoformat(), "queries": queries, "status": "pending", "results": [] } plan_file = WORKSPACE / "memory" / "content-discovery-plan.json" with open(plan_file, 'w', encoding='utf-8') as f: json.dump(plan, f, indent=2, ensure_ascii=False) print(f"\n✅ Plan saved to {plan_file}") return plan_file def main(dry_run: bool = False): print("=" * 50) print("🔍 Content Discovery - Pregătire căutare") print(f" Data: {datetime.now().strftime('%Y-%m-%d %H:%M')}") print("=" * 50) # 1. Get recent topics recent_topics = get_recent_topics() print(f"\n📌 Topics recente găsite: {len(recent_topics)}") if recent_topics: print(f" Exemple: {recent_topics[:5]}") # 2. Build search queries queries = build_search_queries(recent_topics, BASE_INTERESTS) print(f"\n🔎 Queries generate: {len(queries)}") for i, q in enumerate(queries, 1): print(f" {i}. [{q['source']}] {q['query'][:60]}...") if dry_run: print("\n⚠️ DRY RUN - nu salvez planul") return # 3. Save plan for agent execution plan_file = save_discovery_plan(queries) print("\n📋 Următorul pas:") print(" Agentul va citi planul și va executa căutările") print(" Rezultatele vor fi în morning report") if __name__ == "__main__": import sys dry_run = "--dry-run" in sys.argv main(dry_run)