- Mutare și reorganizare foldere proiecte - Actualizare path-uri în TOOLS.md - Sincronizare configurații agenți - 79 fișiere actualizate
290 lines
10 KiB
Python
290 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generează index.json pentru KB din fișierele .md
|
|
Scanează: kb/, memory/, conversations/
|
|
Extrage titlu, dată, tags, și domenii (@work, @health, etc.)
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import json
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
BASE_DIR = Path(__file__).parent.parent
|
|
KB_ROOT = BASE_DIR / "kb"
|
|
MEMORY_DIR = BASE_DIR / "memory"
|
|
CONVERSATIONS_DIR = BASE_DIR / "conversations"
|
|
INDEX_FILE = KB_ROOT / "index.json"
|
|
|
|
# Domenii de agenți
|
|
VALID_DOMAINS = ['work', 'health', 'growth', 'sprijin', 'scout']
|
|
|
|
# Tipuri speciale (pentru grup-sprijin etc.)
|
|
VALID_TYPES = ['exercitiu', 'meditatie', 'reflectie', 'intrebare', 'fisa', 'project', 'memory', 'conversation', 'coaching']
|
|
|
|
# Cache for rules files
|
|
_rules_cache = {}
|
|
|
|
def load_rules(filepath):
|
|
"""Încarcă regulile din .rules.json din directorul fișierului sau părinți"""
|
|
dir_path = filepath.parent
|
|
|
|
# Check cache
|
|
if str(dir_path) in _rules_cache:
|
|
return _rules_cache[str(dir_path)]
|
|
|
|
# Look for .rules.json in current dir and parents (up to kb/)
|
|
rules = {
|
|
"defaultDomains": [],
|
|
"defaultTypes": [],
|
|
"defaultTags": [],
|
|
"inferTypeFromFilename": False,
|
|
"filenameTypeMap": {}
|
|
}
|
|
|
|
# Collect rules from all levels (child rules override parent)
|
|
rules_chain = []
|
|
current = dir_path
|
|
while current >= KB_ROOT:
|
|
rules_file = current / ".rules.json"
|
|
if rules_file.exists():
|
|
try:
|
|
with open(rules_file, 'r', encoding='utf-8') as f:
|
|
rules_chain.insert(0, json.load(f)) # Parent first
|
|
except:
|
|
pass
|
|
current = current.parent
|
|
|
|
# Merge rules (child overrides parent)
|
|
for r in rules_chain:
|
|
for key in rules:
|
|
if key in r:
|
|
if isinstance(rules[key], list):
|
|
# Extend lists (don't override)
|
|
rules[key] = list(set(rules[key] + r[key]))
|
|
else:
|
|
rules[key] = r[key]
|
|
|
|
_rules_cache[str(dir_path)] = rules
|
|
return rules
|
|
|
|
def extract_metadata(filepath, category, subcategory=None):
|
|
"""Extrage metadata din fișierul markdown"""
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Extrage titlul (prima linie cu #)
|
|
title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
|
|
title = title_match.group(1) if title_match else filepath.stem
|
|
|
|
# Extrage tags (linia cu **Tags:** sau tags:)
|
|
tags = []
|
|
domains = []
|
|
types = []
|
|
tags_match = re.search(r'\*\*Tags?:\*\*\s*(.+)$|^Tags?:\s*(.+)$', content, re.MULTILINE | re.IGNORECASE)
|
|
if tags_match:
|
|
tags_str = tags_match.group(1) or tags_match.group(2)
|
|
|
|
# Extrage domenii (@work, @health, etc.)
|
|
domain_matches = re.findall(r'@(\w+)', tags_str)
|
|
domains = [d for d in domain_matches if d in VALID_DOMAINS]
|
|
types = [d for d in domain_matches if d in VALID_TYPES]
|
|
|
|
# Extrage tags normale (#tag)
|
|
all_tags = re.findall(r'#([\w-]+)', tags_str)
|
|
tags = [t for t in all_tags if t not in VALID_DOMAINS and t not in VALID_TYPES]
|
|
|
|
# Aplică reguli din .rules.json (dacă există)
|
|
rules = load_rules(filepath)
|
|
|
|
# Adaugă domains implicite (dacă nu sunt deja)
|
|
for d in rules.get("defaultDomains", []):
|
|
if d not in domains:
|
|
domains.append(d)
|
|
|
|
# Adaugă types implicite
|
|
for t in rules.get("defaultTypes", []):
|
|
if t not in types:
|
|
types.append(t)
|
|
|
|
# Adaugă tags implicite
|
|
for t in rules.get("defaultTags", []):
|
|
if t not in tags:
|
|
tags.append(t)
|
|
|
|
# Inferă type din filename (dacă e configurat)
|
|
if rules.get("inferTypeFromFilename"):
|
|
filename_lower = filepath.stem.lower()
|
|
for pattern, type_name in rules.get("filenameTypeMap", {}).items():
|
|
if pattern in filename_lower and type_name not in types:
|
|
types.append(type_name)
|
|
break
|
|
|
|
# Extrage data din filename (YYYY-MM-DD_slug.md sau YYYY-MM-DD.md)
|
|
date_match = re.match(r'(\d{4}-\d{2}-\d{2})', filepath.name)
|
|
date = date_match.group(1) if date_match else ""
|
|
|
|
# Pentru fișiere fără dată în nume, folosește mtime
|
|
if not date:
|
|
mtime = filepath.stat().st_mtime
|
|
date = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d')
|
|
|
|
# Extrage video URL
|
|
video_match = re.search(r'\*\*(?:Video|Link):\*\*\s*(https?://[^\s]+)', content)
|
|
video_url = video_match.group(1) if video_match else ""
|
|
|
|
# Extrage TL;DR sau primele 200 caractere de conținut
|
|
tldr = ""
|
|
tldr_match = re.search(r'##\s*📋?\s*TL;DR\s*\n+(.+?)(?=\n##|\n---|\Z)', content, re.DOTALL)
|
|
if tldr_match:
|
|
tldr = tldr_match.group(1).strip()[:200]
|
|
else:
|
|
# Fallback: primul paragraf după titlu
|
|
para_match = re.search(r'^#.+\n+(.+?)(?=\n\n|\n#|\Z)', content, re.DOTALL)
|
|
if para_match:
|
|
tldr = para_match.group(1).strip()[:200]
|
|
if len(tldr) >= 200:
|
|
tldr += "..."
|
|
|
|
# Construiește path-ul relativ pentru web (din dashboard/)
|
|
# Dashboard are symlinks: notes-data -> ../kb, memory -> ../memory, conversations -> ../conversations
|
|
rel_path = str(filepath.relative_to(BASE_DIR))
|
|
# Transformă kb/... în notes-data/... pentru web
|
|
if rel_path.startswith('kb/'):
|
|
rel_path = 'notes-data/' + rel_path[3:]
|
|
|
|
return {
|
|
"file": rel_path,
|
|
"title": title,
|
|
"date": date,
|
|
"tags": tags,
|
|
"domains": domains,
|
|
"types": types,
|
|
"category": category,
|
|
"project": subcategory, # primul nivel sub projects/ (grup-sprijin, vending-master)
|
|
"subdir": None, # se setează în scan_directory pentru niveluri mai adânci
|
|
"video": video_url,
|
|
"tldr": tldr
|
|
}
|
|
|
|
def scan_directory(dir_path, category, subcategory=None, recursive=False):
|
|
"""Scanează un director pentru fișiere .md"""
|
|
notes = []
|
|
|
|
if not dir_path.exists():
|
|
return notes
|
|
|
|
# Defaults pentru categorii speciale (memory/, conversations/)
|
|
category_defaults = {
|
|
"memory": {"types": ["memory"], "domains": []},
|
|
"conversations": {"types": ["conversation"], "domains": []}
|
|
}
|
|
|
|
if recursive:
|
|
# Scanează recursiv
|
|
for filepath in dir_path.rglob("*.md"):
|
|
if filepath.name.startswith('.') or 'template' in filepath.name.lower():
|
|
continue
|
|
try:
|
|
# Determină project și subdir din path
|
|
# Ex: projects/grup-sprijin/biblioteca/file.md
|
|
# project = grup-sprijin, subdir = biblioteca
|
|
rel_to_dir = filepath.relative_to(dir_path)
|
|
parts = rel_to_dir.parts[:-1] # exclude filename
|
|
|
|
project = parts[0] if len(parts) > 0 else None
|
|
subdir = parts[1] if len(parts) > 1 else None
|
|
|
|
metadata = extract_metadata(filepath, category, project)
|
|
metadata['subdir'] = subdir
|
|
notes.append(metadata)
|
|
except Exception as e:
|
|
print(f" ! Error processing {filepath}: {e}")
|
|
else:
|
|
# Scanează doar fișierele din director (nu subdirectoare)
|
|
for filepath in sorted(dir_path.glob("*.md"), reverse=True):
|
|
if filepath.name.startswith('.') or 'template' in filepath.name.lower():
|
|
continue
|
|
try:
|
|
metadata = extract_metadata(filepath, category, subcategory)
|
|
# Aplică defaults pentru categoria specială
|
|
if category in category_defaults:
|
|
defaults = category_defaults[category]
|
|
for t in defaults.get("types", []):
|
|
if t not in metadata["types"]:
|
|
metadata["types"].append(t)
|
|
for d in defaults.get("domains", []):
|
|
if d not in metadata["domains"]:
|
|
metadata["domains"].append(d)
|
|
notes.append(metadata)
|
|
except Exception as e:
|
|
print(f" ! Error processing {filepath}: {e}")
|
|
|
|
return notes
|
|
|
|
def generate_index():
|
|
"""Generează index.json din toate sursele"""
|
|
all_notes = []
|
|
|
|
# Stats
|
|
domain_stats = {d: 0 for d in VALID_DOMAINS}
|
|
category_stats = {}
|
|
|
|
# Scanează TOATE subdirectoarele din kb/ recursiv
|
|
print("Scanning kb/ (all subdirectories)...")
|
|
for subdir in sorted(KB_ROOT.iterdir()):
|
|
if subdir.is_dir() and not subdir.name.startswith('.'):
|
|
category = subdir.name
|
|
print(f" [{category}]")
|
|
notes = scan_directory(subdir, category, recursive=True)
|
|
all_notes.extend(notes)
|
|
category_stats[category] = len(notes)
|
|
for n in notes:
|
|
sub = f"/{n['subcategory']}" if n.get('subcategory') else ""
|
|
print(f" + {n['title'][:42]}...")
|
|
for d in n['domains']:
|
|
domain_stats[d] += 1
|
|
|
|
# 4. Scanează memory/
|
|
print("Scanning memory/...")
|
|
memory_notes = scan_directory(MEMORY_DIR, "memory")
|
|
all_notes.extend(memory_notes)
|
|
category_stats["memory"] = len(memory_notes)
|
|
for n in memory_notes:
|
|
print(f" + {n['title'][:45]}...")
|
|
|
|
# 5. Scanează conversations/
|
|
print("Scanning conversations/...")
|
|
conv_notes = scan_directory(CONVERSATIONS_DIR, "conversations")
|
|
all_notes.extend(conv_notes)
|
|
category_stats["conversations"] = len(conv_notes)
|
|
for n in conv_notes:
|
|
print(f" + {n['title'][:45]}...")
|
|
|
|
# Sortează după dată descrescător
|
|
all_notes.sort(key=lambda x: x['date'], reverse=True)
|
|
|
|
# Adaugă metadata globală
|
|
output = {
|
|
"notes": all_notes,
|
|
"stats": {
|
|
"total": len(all_notes),
|
|
"by_domain": domain_stats,
|
|
"by_category": category_stats
|
|
},
|
|
"domains": VALID_DOMAINS,
|
|
"types": VALID_TYPES,
|
|
"categories": list(category_stats.keys())
|
|
}
|
|
|
|
with open(INDEX_FILE, 'w', encoding='utf-8') as f:
|
|
json.dump(output, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"\n✅ Generated {INDEX_FILE} with {len(all_notes)} notes")
|
|
print(f" Categories: {category_stats}")
|
|
return output
|
|
|
|
if __name__ == "__main__":
|
|
generate_index()
|