feat(memory): hybrid retrieval — navigation index.md + RAG hardening

Expose a navigation layer to the agent and harden RAG, after analyzing the OKF note and testing on the real KB. - memory_search.search(): dedupe best-chunk-per-file (a relevant note can no longer be buried by another file's chunks) + keyword fallback tagged degraded:True when Ollama is unreachable (no more hard crash). - update_notes_index.py: emit per-folder index.md + root router; prune empty folders; fix latent subcategory->project bug. - Exclude generated index.md from RAG rglob (reindex/incremental) + indexer scans + heartbeat freshness check (prevents self-pollution / reindex thrash). - CLAUDE.md: reframe memory as hybrid (navigation first, RAG for fuzzy recall). - Delete stale orphan kb/youtube/index.json; correct the OKF source note. - Tests: dedup, keyword fallback, index.md exclusion. Plan + review in docs/. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 17:52:27 +00:00
parent 6e9dfd137c
commit 5c9748ffb4
23 changed files with 1526 additions and 164 deletions
--- a/src/heartbeat.py
+++ b/src/heartbeat.py
@@ -316,6 +316,10 @@ def _check_kb_index() -> str | None:

    newer = 0
    for md in kb_dir.rglob("*.md"):
+        # Skip generated nav files — they're written by the reindex itself, so
+        # comparing them against index.json mtime would cause perpetual reindex.
+        if md.name == "index.md":
+            continue
        if md.stat().st_mtime > index_mtime:
            newer += 1

--- a/src/memory_search.py
+++ b/src/memory_search.py
@@ -5,6 +5,7 @@ Uses Ollama all-minilm embeddings stored in SQLite for cosine similarity search.

 import logging
 import math
+import re
 import sqlite3
 import struct
 from datetime import datetime, timezone
@@ -62,6 +63,11 @@ def init_config(config=None) -> None:
 init_config()


+def _is_indexable(md_file: Path) -> bool:
+    """Skip generated navigation files so they aren't embedded as if they were notes."""
+    return md_file.name != "index.md"
+
+
 def get_db() -> sqlite3.Connection:
    """Get SQLite connection, create table if needed."""
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
@@ -211,6 +217,8 @@ def reindex() -> dict:
    files_count = 0
    chunks_count = 0
    for md_file in sorted(MEMORY_DIR.rglob("*.md")):
+        if not _is_indexable(md_file):
+            continue
        try:
            n = index_file(md_file)
            files_count += 1
@@ -242,6 +250,8 @@ def incremental_index() -> dict:
    files_indexed = 0
    chunks_total = 0
    for md_file in sorted(MEMORY_DIR.rglob("*.md")):
+        if not _is_indexable(md_file):
+            continue
        rel_path = str(md_file.relative_to(MEMORY_DIR))
        file_mtime = datetime.fromtimestamp(
            md_file.stat().st_mtime, tz=timezone.utc
@@ -264,9 +274,55 @@ def incremental_index() -> dict:
    return {"indexed": files_indexed, "chunks": chunks_total}


+def _keyword_fallback(query: str, top_k: int = 5) -> list[dict]:
+    """Keyword search over indexed chunks. Used when the embedding backend is down.
+
+    Returns the same shape as search() plus "degraded": True so callers can
+    tell the user that semantic recall was unavailable. Ranks best-chunk-per-file
+    by raw term-occurrence count.
+    """
+    terms = [t for t in re.findall(r"\w+", query.lower()) if len(t) > 2]
+
+    conn = get_db()
+    try:
+        rows = conn.execute("SELECT file_path, chunk_text FROM chunks").fetchall()
+    finally:
+        conn.close()
+
+    best: dict[str, dict] = {}
+    for file_path, chunk_text in rows:
+        low = chunk_text.lower()
+        hits = sum(low.count(t) for t in terms) if terms else 0
+        if hits == 0:
+            continue
+        cur = best.get(file_path)
+        if cur is None or hits > cur["score"]:
+            best[file_path] = {
+                "file": file_path,
+                "chunk": chunk_text,
+                "score": float(hits),
+                "degraded": True,
+            }
+
+    scored = sorted(best.values(), key=lambda x: x["score"], reverse=True)
+    return scored[:top_k]
+
+
 def search(query: str, top_k: int = 5) -> list[dict]:
-    """Search for query. Returns list of {"file": str, "chunk": str, "score": float}."""
-    query_embedding = get_embedding(query)
+    """Search for query. Returns list of {"file": str, "chunk": str, "score": float}.
+
+    Results are deduped to the best-scoring chunk per file, so a relevant note
+    can't be buried by another file contributing several chunks. If the embedding
+    backend (Ollama) is unreachable, falls back to keyword search and tags each
+    result with "degraded": True instead of raising.
+    """
+    try:
+        query_embedding = get_embedding(query)
+    except ConnectionError as e:
+        log.warning(
+            "Embedding backend unavailable (%s); falling back to keyword search", e
+        )
+        return _keyword_fallback(query, top_k)

    conn = get_db()
    try:
@@ -279,11 +335,13 @@ def search(query: str, top_k: int = 5) -> list[dict]:
    if not rows:
        return []

-    scored = []
+    best: dict[str, dict] = {}
    for file_path, chunk_text, emb_blob in rows:
        emb = deserialize_embedding(emb_blob)
        score = cosine_similarity(query_embedding, emb)
-        scored.append({"file": file_path, "chunk": chunk_text, "score": score})
+        cur = best.get(file_path)
+        if cur is None or score > cur["score"]:
+            best[file_path] = {"file": file_path, "chunk": chunk_text, "score": score}

-    scored.sort(key=lambda x: x["score"], reverse=True)
+    scored = sorted(best.values(), key=lambda x: x["score"], reverse=True)
    return scored[:top_k]