feat(memory): hybrid retrieval — navigation index.md + RAG hardening

Expose a navigation layer to the agent and harden RAG, after analyzing the
OKF note and testing on the real KB.

- memory_search.search(): dedupe best-chunk-per-file (a relevant note can no
  longer be buried by another file's chunks) + keyword fallback tagged
  degraded:True when Ollama is unreachable (no more hard crash).
- update_notes_index.py: emit per-folder index.md + root router; prune empty
  folders; fix latent subcategory->project bug.
- Exclude generated index.md from RAG rglob (reindex/incremental) + indexer
  scans + heartbeat freshness check (prevents self-pollution / reindex thrash).
- CLAUDE.md: reframe memory as hybrid (navigation first, RAG for fuzzy recall).
- Delete stale orphan kb/youtube/index.json; correct the OKF source note.
- Tests: dedup, keyword fallback, index.md exclusion. Plan + review in docs/.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-27 17:52:27 +00:00
parent 6e9dfd137c
commit 5c9748ffb4
23 changed files with 1526 additions and 164 deletions

View File

@@ -316,6 +316,10 @@ def _check_kb_index() -> str | None:
newer = 0
for md in kb_dir.rglob("*.md"):
# Skip generated nav files — they're written by the reindex itself, so
# comparing them against index.json mtime would cause perpetual reindex.
if md.name == "index.md":
continue
if md.stat().st_mtime > index_mtime:
newer += 1

View File

@@ -5,6 +5,7 @@ Uses Ollama all-minilm embeddings stored in SQLite for cosine similarity search.
import logging
import math
import re
import sqlite3
import struct
from datetime import datetime, timezone
@@ -62,6 +63,11 @@ def init_config(config=None) -> None:
init_config()
def _is_indexable(md_file: Path) -> bool:
"""Skip generated navigation files so they aren't embedded as if they were notes."""
return md_file.name != "index.md"
def get_db() -> sqlite3.Connection:
"""Get SQLite connection, create table if needed."""
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
@@ -211,6 +217,8 @@ def reindex() -> dict:
files_count = 0
chunks_count = 0
for md_file in sorted(MEMORY_DIR.rglob("*.md")):
if not _is_indexable(md_file):
continue
try:
n = index_file(md_file)
files_count += 1
@@ -242,6 +250,8 @@ def incremental_index() -> dict:
files_indexed = 0
chunks_total = 0
for md_file in sorted(MEMORY_DIR.rglob("*.md")):
if not _is_indexable(md_file):
continue
rel_path = str(md_file.relative_to(MEMORY_DIR))
file_mtime = datetime.fromtimestamp(
md_file.stat().st_mtime, tz=timezone.utc
@@ -264,9 +274,55 @@ def incremental_index() -> dict:
return {"indexed": files_indexed, "chunks": chunks_total}
def _keyword_fallback(query: str, top_k: int = 5) -> list[dict]:
"""Keyword search over indexed chunks. Used when the embedding backend is down.
Returns the same shape as search() plus "degraded": True so callers can
tell the user that semantic recall was unavailable. Ranks best-chunk-per-file
by raw term-occurrence count.
"""
terms = [t for t in re.findall(r"\w+", query.lower()) if len(t) > 2]
conn = get_db()
try:
rows = conn.execute("SELECT file_path, chunk_text FROM chunks").fetchall()
finally:
conn.close()
best: dict[str, dict] = {}
for file_path, chunk_text in rows:
low = chunk_text.lower()
hits = sum(low.count(t) for t in terms) if terms else 0
if hits == 0:
continue
cur = best.get(file_path)
if cur is None or hits > cur["score"]:
best[file_path] = {
"file": file_path,
"chunk": chunk_text,
"score": float(hits),
"degraded": True,
}
scored = sorted(best.values(), key=lambda x: x["score"], reverse=True)
return scored[:top_k]
def search(query: str, top_k: int = 5) -> list[dict]:
"""Search for query. Returns list of {"file": str, "chunk": str, "score": float}."""
query_embedding = get_embedding(query)
"""Search for query. Returns list of {"file": str, "chunk": str, "score": float}.
Results are deduped to the best-scoring chunk per file, so a relevant note
can't be buried by another file contributing several chunks. If the embedding
backend (Ollama) is unreachable, falls back to keyword search and tags each
result with "degraded": True instead of raising.
"""
try:
query_embedding = get_embedding(query)
except ConnectionError as e:
log.warning(
"Embedding backend unavailable (%s); falling back to keyword search", e
)
return _keyword_fallback(query, top_k)
conn = get_db()
try:
@@ -279,11 +335,13 @@ def search(query: str, top_k: int = 5) -> list[dict]:
if not rows:
return []
scored = []
best: dict[str, dict] = {}
for file_path, chunk_text, emb_blob in rows:
emb = deserialize_embedding(emb_blob)
score = cosine_similarity(query_embedding, emb)
scored.append({"file": file_path, "chunk": chunk_text, "score": score})
cur = best.get(file_path)
if cur is None or score > cur["score"]:
best[file_path] = {"file": file_path, "chunk": chunk_text, "score": score}
scored.sort(key=lambda x: x["score"], reverse=True)
scored = sorted(best.values(), key=lambda x: x["score"], reverse=True)
return scored[:top_k]