feat(memory): hybrid retrieval — navigation index.md + RAG hardening
Expose a navigation layer to the agent and harden RAG, after analyzing the OKF note and testing on the real KB. - memory_search.search(): dedupe best-chunk-per-file (a relevant note can no longer be buried by another file's chunks) + keyword fallback tagged degraded:True when Ollama is unreachable (no more hard crash). - update_notes_index.py: emit per-folder index.md + root router; prune empty folders; fix latent subcategory->project bug. - Exclude generated index.md from RAG rglob (reindex/incremental) + indexer scans + heartbeat freshness check (prevents self-pollution / reindex thrash). - CLAUDE.md: reframe memory as hybrid (navigation first, RAG for fuzzy recall). - Delete stale orphan kb/youtube/index.json; correct the OKF source note. - Tests: dedup, keyword fallback, index.md exclusion. Plan + review in docs/. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -186,6 +186,8 @@ def scan_directory(dir_path, category, subcategory=None, recursive=False):
|
||||
for filepath in dir_path.rglob("*.md"):
|
||||
if filepath.name.startswith('.') or 'template' in filepath.name.lower():
|
||||
continue
|
||||
if filepath.name == 'index.md': # generated nav file, not a note
|
||||
continue
|
||||
try:
|
||||
# Determină project și subdir din path
|
||||
# Ex: projects/grup-sprijin/biblioteca/file.md
|
||||
@@ -206,6 +208,8 @@ def scan_directory(dir_path, category, subcategory=None, recursive=False):
|
||||
for filepath in sorted(dir_path.glob("*.md"), reverse=True):
|
||||
if filepath.name.startswith('.') or 'template' in filepath.name.lower():
|
||||
continue
|
||||
if filepath.name == 'index.md': # generated nav file, not a note
|
||||
continue
|
||||
try:
|
||||
metadata = extract_metadata(filepath, category, subcategory)
|
||||
# Aplică defaults pentru categoria specială
|
||||
@@ -223,6 +227,81 @@ def scan_directory(dir_path, category, subcategory=None, recursive=False):
|
||||
|
||||
return notes
|
||||
|
||||
def _slim_tags(domains, tags):
|
||||
parts = [f"@{d}" for d in domains] + [f"#{t}" for t in tags]
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
def write_folder_indexes():
|
||||
"""Generează index.md slim per-folder (navigabil de agent) + un index.md router la rădăcină.
|
||||
|
||||
Agent-facing: titlu + tags + descriere 1 rând per notă. Ieftin de citit (un
|
||||
folder ~ câteva mii tokens) și funcționează ca fallback fără embeddings.
|
||||
Owns the full lifecycle — pune un index.md gol-de-prune pentru foldere fără note.
|
||||
"""
|
||||
written = []
|
||||
for subdir in sorted(KB_ROOT.iterdir()):
|
||||
if not subdir.is_dir() or subdir.name.startswith('.'):
|
||||
continue
|
||||
notes = []
|
||||
for fp in sorted(subdir.rglob("*.md")):
|
||||
if fp.name == 'index.md' or fp.name.startswith('.') or 'template' in fp.name.lower():
|
||||
continue
|
||||
try:
|
||||
md = extract_metadata(fp, subdir.name)
|
||||
except Exception as e:
|
||||
print(f" ! index.md skip {fp}: {e}")
|
||||
continue
|
||||
rel = fp.relative_to(subdir)
|
||||
notes.append((str(rel), md['title'], md.get('domains', []), md.get('tags', []), md.get('tldr', '')))
|
||||
|
||||
index_path = subdir / "index.md"
|
||||
if not notes:
|
||||
if index_path.exists(): # prune stale nav file for now-empty folder
|
||||
index_path.unlink()
|
||||
continue
|
||||
|
||||
lines = [
|
||||
f"# Index — {subdir.name}/", "",
|
||||
f"> {len(notes)} note. Citește acest index întâi; deschide doar fișierele relevante.", "",
|
||||
]
|
||||
for rel, title, domains, tags, tldr in notes:
|
||||
tagstr = _slim_tags(domains, tags)
|
||||
tagpart = f" `{tagstr}`" if tagstr.strip() else ""
|
||||
desc = re.sub(r'\s+', ' ', tldr or '').strip()[:140]
|
||||
lines.append(f"- **[{title}]({rel})**{tagpart}")
|
||||
if desc:
|
||||
lines.append(f" {desc}")
|
||||
index_path.write_text("\n".join(lines) + "\n", encoding='utf-8')
|
||||
written.append((subdir.name, len(notes)))
|
||||
|
||||
# Root router: agentul citește asta întâi, alege folderul, apoi <folder>/index.md
|
||||
root_lines = [
|
||||
"# Index — knowledge base (memory/kb)", "",
|
||||
"> Router. Alege folderul relevant, apoi citește `<folder>/index.md`.", "",
|
||||
]
|
||||
for name, count in written:
|
||||
root_lines.append(f"- **[{name}/]({name}/index.md)** — {count} note")
|
||||
|
||||
loose = [
|
||||
fp for fp in sorted(KB_ROOT.glob("*.md"))
|
||||
if fp.name != 'index.md' and not fp.name.startswith('.') and 'template' not in fp.name.lower()
|
||||
]
|
||||
if loose:
|
||||
root_lines += ["", "## Note la rădăcină", ""]
|
||||
for fp in loose:
|
||||
try:
|
||||
md = extract_metadata(fp, "kb")
|
||||
root_lines.append(f"- **[{md['title']}]({fp.name})**")
|
||||
except Exception:
|
||||
root_lines.append(f"- **[{fp.stem}]({fp.name})**")
|
||||
(KB_ROOT / "index.md").write_text("\n".join(root_lines) + "\n", encoding='utf-8')
|
||||
|
||||
total = sum(c for _, c in written)
|
||||
print(f"✅ Generated {len(written)} folder index.md files + root router ({total} notes)")
|
||||
return written
|
||||
|
||||
|
||||
def generate_index():
|
||||
"""Generează index.json din toate sursele"""
|
||||
all_notes = []
|
||||
@@ -241,8 +320,8 @@ def generate_index():
|
||||
all_notes.extend(notes)
|
||||
category_stats[category] = len(notes)
|
||||
for n in notes:
|
||||
sub = f"/{n['subcategory']}" if n.get('subcategory') else ""
|
||||
print(f" + {n['title'][:42]}...")
|
||||
sub = f"/{n['project']}" if n.get('project') else ""
|
||||
print(f" + {n['title'][:42]}{sub}")
|
||||
for d in n['domains']:
|
||||
domain_stats[d] += 1
|
||||
|
||||
@@ -283,6 +362,9 @@ def generate_index():
|
||||
|
||||
print(f"\n✅ Generated {INDEX_FILE} with {len(all_notes)} notes")
|
||||
print(f" Categories: {category_stats}")
|
||||
|
||||
# Agent-facing navigation layer (per-folder index.md + root router)
|
||||
write_folder_indexes()
|
||||
return output
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user