#!/usr/bin/env python3 """ Generează index.json pentru notes din fișierele .md Extrage titlu, dată, tags, și domenii (@work, @health, etc.) Scanează TOATE subdirectoarele din notes/ (youtube, retete, etc.) """ import os import re import json from pathlib import Path NOTES_ROOT = Path(__file__).parent.parent / "notes" INDEX_FILE = NOTES_ROOT / "index.json" # Subdirectoare de scanat (adaugă altele aici) SCAN_DIRS = ['youtube', 'retete'] # Domenii de agenți VALID_DOMAINS = ['work', 'health', 'growth', 'sprijin', 'scout'] def extract_metadata(filepath): """Extrage metadata din fișierul markdown""" with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # Extrage titlul (prima linie cu #) title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE) title = title_match.group(1) if title_match else filepath.stem # Extrage tags (linia cu **Tags:** sau tags:) tags = [] domains = [] tags_match = re.search(r'\*\*Tags?:\*\*\s*(.+)$|^Tags?:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE) if tags_match: tags_str = tags_match.group(1) or tags_match.group(2) # Extrage domenii (@work, @health, etc.) domain_matches = re.findall(r'@(\w+)', tags_str) domains = [d for d in domain_matches if d in VALID_DOMAINS] # Extrage tags normale (#tag) - exclude domeniile all_tags = re.findall(r'#([\w-]+)', tags_str) tags = [t for t in all_tags if t not in VALID_DOMAINS] # Extrage data din filename (YYYY-MM-DD_slug.md) date_match = re.match(r'(\d{4}-\d{2}-\d{2})_', filepath.name) date = date_match.group(1) if date_match else "" # Extrage video URL video_match = re.search(r'\*\*(?:Video|Link):\*\*\s*(https?://[^\s]+)', content) video_url = video_match.group(1) if video_match else "" # Extrage TL;DR (primele 200 caractere) tldr_match = re.search(r'##\s*📋?\s*TL;DR\s*\n+(.+?)(?=\n##|\n---|\Z)', content, re.DOTALL) tldr = "" if tldr_match: tldr = tldr_match.group(1).strip()[:200] if len(tldr_match.group(1).strip()) > 200: tldr += "..." return { "file": filepath.name, "title": title, "date": date, "tags": tags, "domains": domains, "video": video_url, "tldr": tldr } def generate_index(): """Generează index.json din toate fișierele .md din toate subdirectoarele""" notes = [] # Stats per domeniu domain_stats = {d: 0 for d in VALID_DOMAINS} # Stats per categorie category_stats = {} for subdir in SCAN_DIRS: notes_dir = NOTES_ROOT / subdir if not notes_dir.exists(): print(f" (skipping {subdir}/ - not found)") continue print(f"Scanning notes/{subdir}/...") category_stats[subdir] = 0 for filepath in sorted(notes_dir.glob("*.md"), reverse=True): if filepath.name == 'index.json': continue try: metadata = extract_metadata(filepath) # Adaugă categoria (subdirectorul) metadata['category'] = subdir # Modifică path-ul fișierului să includă subdirectorul metadata['file'] = f"{subdir}/{filepath.name}" notes.append(metadata) # Update stats category_stats[subdir] += 1 for d in metadata['domains']: domain_stats[d] += 1 domains_str = ' '.join([f'@{d}' for d in metadata['domains']]) if metadata['domains'] else '' print(f" + {metadata['title'][:40]}... {domains_str}") except Exception as e: print(f" ! Error processing {filepath.name}: {e}") # Sortează după dată descrescător notes.sort(key=lambda x: x['date'], reverse=True) # Adaugă metadata globală output = { "notes": notes, "stats": { "total": len(notes), "by_domain": domain_stats, "by_category": category_stats }, "domains": VALID_DOMAINS, "categories": SCAN_DIRS } with open(INDEX_FILE, 'w', encoding='utf-8') as f: json.dump(output, f, indent=2, ensure_ascii=False) print(f"\n✅ Generated {INDEX_FILE} with {len(notes)} notes") print(f" Domains: {domain_stats}") print(f" Categories: {category_stats}") return output if __name__ == "__main__": generate_index()