feat(memory): hybrid retrieval — navigation index.md + RAG hardening

Expose a navigation layer to the agent and harden RAG, after analyzing the
OKF note and testing on the real KB.

- memory_search.search(): dedupe best-chunk-per-file (a relevant note can no
  longer be buried by another file's chunks) + keyword fallback tagged
  degraded:True when Ollama is unreachable (no more hard crash).
- update_notes_index.py: emit per-folder index.md + root router; prune empty
  folders; fix latent subcategory->project bug.
- Exclude generated index.md from RAG rglob (reindex/incremental) + indexer
  scans + heartbeat freshness check (prevents self-pollution / reindex thrash).
- CLAUDE.md: reframe memory as hybrid (navigation first, RAG for fuzzy recall).
- Delete stale orphan kb/youtube/index.json; correct the OKF source note.
- Tests: dedup, keyword fallback, index.md exclusion. Plan + review in docs/.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-27 17:52:27 +00:00
parent 6e9dfd137c
commit 5c9748ffb4
23 changed files with 1526 additions and 164 deletions

View File

@@ -186,6 +186,8 @@ def scan_directory(dir_path, category, subcategory=None, recursive=False):
for filepath in dir_path.rglob("*.md"):
if filepath.name.startswith('.') or 'template' in filepath.name.lower():
continue
if filepath.name == 'index.md': # generated nav file, not a note
continue
try:
# Determină project și subdir din path
# Ex: projects/grup-sprijin/biblioteca/file.md
@@ -206,6 +208,8 @@ def scan_directory(dir_path, category, subcategory=None, recursive=False):
for filepath in sorted(dir_path.glob("*.md"), reverse=True):
if filepath.name.startswith('.') or 'template' in filepath.name.lower():
continue
if filepath.name == 'index.md': # generated nav file, not a note
continue
try:
metadata = extract_metadata(filepath, category, subcategory)
# Aplică defaults pentru categoria specială
@@ -223,6 +227,81 @@ def scan_directory(dir_path, category, subcategory=None, recursive=False):
return notes
def _slim_tags(domains, tags):
parts = [f"@{d}" for d in domains] + [f"#{t}" for t in tags]
return " ".join(parts)
def write_folder_indexes():
"""Generează index.md slim per-folder (navigabil de agent) + un index.md router la rădăcină.
Agent-facing: titlu + tags + descriere 1 rând per notă. Ieftin de citit (un
folder ~ câteva mii tokens) și funcționează ca fallback fără embeddings.
Owns the full lifecycle — pune un index.md gol-de-prune pentru foldere fără note.
"""
written = []
for subdir in sorted(KB_ROOT.iterdir()):
if not subdir.is_dir() or subdir.name.startswith('.'):
continue
notes = []
for fp in sorted(subdir.rglob("*.md")):
if fp.name == 'index.md' or fp.name.startswith('.') or 'template' in fp.name.lower():
continue
try:
md = extract_metadata(fp, subdir.name)
except Exception as e:
print(f" ! index.md skip {fp}: {e}")
continue
rel = fp.relative_to(subdir)
notes.append((str(rel), md['title'], md.get('domains', []), md.get('tags', []), md.get('tldr', '')))
index_path = subdir / "index.md"
if not notes:
if index_path.exists(): # prune stale nav file for now-empty folder
index_path.unlink()
continue
lines = [
f"# Index — {subdir.name}/", "",
f"> {len(notes)} note. Citește acest index întâi; deschide doar fișierele relevante.", "",
]
for rel, title, domains, tags, tldr in notes:
tagstr = _slim_tags(domains, tags)
tagpart = f" `{tagstr}`" if tagstr.strip() else ""
desc = re.sub(r'\s+', ' ', tldr or '').strip()[:140]
lines.append(f"- **[{title}]({rel})**{tagpart}")
if desc:
lines.append(f" {desc}")
index_path.write_text("\n".join(lines) + "\n", encoding='utf-8')
written.append((subdir.name, len(notes)))
# Root router: agentul citește asta întâi, alege folderul, apoi <folder>/index.md
root_lines = [
"# Index — knowledge base (memory/kb)", "",
"> Router. Alege folderul relevant, apoi citește `<folder>/index.md`.", "",
]
for name, count in written:
root_lines.append(f"- **[{name}/]({name}/index.md)** — {count} note")
loose = [
fp for fp in sorted(KB_ROOT.glob("*.md"))
if fp.name != 'index.md' and not fp.name.startswith('.') and 'template' not in fp.name.lower()
]
if loose:
root_lines += ["", "## Note la rădăcină", ""]
for fp in loose:
try:
md = extract_metadata(fp, "kb")
root_lines.append(f"- **[{md['title']}]({fp.name})**")
except Exception:
root_lines.append(f"- **[{fp.stem}]({fp.name})**")
(KB_ROOT / "index.md").write_text("\n".join(root_lines) + "\n", encoding='utf-8')
total = sum(c for _, c in written)
print(f"✅ Generated {len(written)} folder index.md files + root router ({total} notes)")
return written
def generate_index():
"""Generează index.json din toate sursele"""
all_notes = []
@@ -241,8 +320,8 @@ def generate_index():
all_notes.extend(notes)
category_stats[category] = len(notes)
for n in notes:
sub = f"/{n['subcategory']}" if n.get('subcategory') else ""
print(f" + {n['title'][:42]}...")
sub = f"/{n['project']}" if n.get('project') else ""
print(f" + {n['title'][:42]}{sub}")
for d in n['domains']:
domain_stats[d] += 1
@@ -283,6 +362,9 @@ def generate_index():
print(f"\n✅ Generated {INDEX_FILE} with {len(all_notes)} notes")
print(f" Categories: {category_stats}")
# Agent-facing navigation layer (per-folder index.md + root router)
write_folder_indexes()
return output
if __name__ == "__main__":