Files
echo-core/src/memory_search.py
Marius Mutu 5c9748ffb4 feat(memory): hybrid retrieval — navigation index.md + RAG hardening
Expose a navigation layer to the agent and harden RAG, after analyzing the
OKF note and testing on the real KB.

- memory_search.search(): dedupe best-chunk-per-file (a relevant note can no
  longer be buried by another file's chunks) + keyword fallback tagged
  degraded:True when Ollama is unreachable (no more hard crash).
- update_notes_index.py: emit per-folder index.md + root router; prune empty
  folders; fix latent subcategory->project bug.
- Exclude generated index.md from RAG rglob (reindex/incremental) + indexer
  scans + heartbeat freshness check (prevents self-pollution / reindex thrash).
- CLAUDE.md: reframe memory as hybrid (navigation first, RAG for fuzzy recall).
- Delete stale orphan kb/youtube/index.json; correct the OKF source note.
- Tests: dedup, keyword fallback, index.md exclusion. Plan + review in docs/.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 17:52:27 +00:00

348 lines
11 KiB
Python

"""Echo Core memory search — semantic search over memory/*.md files.
Uses Ollama all-minilm embeddings stored in SQLite for cosine similarity search.
"""
import logging
import math
import re
import sqlite3
import struct
from datetime import datetime, timezone
from pathlib import Path
import httpx
log = logging.getLogger(__name__)
PROJECT_ROOT = Path(__file__).resolve().parent.parent
DB_PATH = PROJECT_ROOT / "memory" / "echo.sqlite"
MEMORY_DIR = PROJECT_ROOT / "memory"
# Defaults — overridable via config.json ollama/memory sections
_OLLAMA_BASE_URL = "http://localhost:11434"
_OLLAMA_MODEL = "all-minilm"
_EMBEDDING_DIM = 384
_CHUNK_TARGET = 500
_CHUNK_MAX = 1000
_CHUNK_MIN = 100
# Runtime config (populated by init_config)
OLLAMA_URL = f"{_OLLAMA_BASE_URL}/api/embeddings"
OLLAMA_MODEL = _OLLAMA_MODEL
EMBEDDING_DIM = _EMBEDDING_DIM
def init_config(config=None) -> None:
"""Load settings from config object. Call once at startup."""
global OLLAMA_URL, OLLAMA_MODEL, EMBEDDING_DIM
if config is None:
# Try loading from config.json directly
config_file = PROJECT_ROOT / "config.json"
if config_file.exists():
import json
try:
with open(config_file, encoding="utf-8") as f:
data = json.load(f)
base_url = data.get("ollama", {}).get("url", _OLLAMA_BASE_URL)
OLLAMA_URL = f"{base_url.rstrip('/')}/api/embeddings"
OLLAMA_MODEL = data.get("ollama", {}).get("model", _OLLAMA_MODEL)
EMBEDDING_DIM = data.get("ollama", {}).get("embedding_dim", _EMBEDDING_DIM)
except (json.JSONDecodeError, OSError):
pass
return
# Config object with .get() method
base_url = config.get("ollama.url", _OLLAMA_BASE_URL)
OLLAMA_URL = f"{base_url.rstrip('/')}/api/embeddings"
OLLAMA_MODEL = config.get("ollama.model", _OLLAMA_MODEL)
EMBEDDING_DIM = config.get("ollama.embedding_dim", _EMBEDDING_DIM)
# Auto-init from config.json on import
init_config()
def _is_indexable(md_file: Path) -> bool:
"""Skip generated navigation files so they aren't embedded as if they were notes."""
return md_file.name != "index.md"
def get_db() -> sqlite3.Connection:
"""Get SQLite connection, create table if needed."""
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(DB_PATH))
conn.execute(
"""CREATE TABLE IF NOT EXISTS chunks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_path TEXT NOT NULL,
chunk_index INTEGER NOT NULL,
chunk_text TEXT NOT NULL,
embedding BLOB NOT NULL,
updated_at TEXT NOT NULL,
UNIQUE(file_path, chunk_index)
)"""
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_file_path ON chunks(file_path)"
)
conn.commit()
return conn
def get_embedding(text: str) -> list[float]:
"""Get embedding vector from Ollama. Returns list of 384 floats."""
try:
resp = httpx.post(
OLLAMA_URL,
json={"model": OLLAMA_MODEL, "prompt": text},
timeout=30.0,
)
resp.raise_for_status()
embedding = resp.json()["embedding"]
if len(embedding) != EMBEDDING_DIM:
raise ValueError(
f"Expected {EMBEDDING_DIM} dimensions, got {len(embedding)}"
)
return embedding
except httpx.ConnectError:
raise ConnectionError(
f"Cannot connect to Ollama at {OLLAMA_URL}. Is Ollama running?"
)
except httpx.HTTPStatusError as e:
raise ConnectionError(f"Ollama API error: {e.response.status_code}")
def serialize_embedding(embedding: list[float]) -> bytes:
"""Pack floats to bytes for SQLite storage."""
return struct.pack(f"{len(embedding)}f", *embedding)
def deserialize_embedding(data: bytes) -> list[float]:
"""Unpack bytes to floats."""
n = len(data) // 4
return list(struct.unpack(f"{n}f", data))
def cosine_similarity(a: list[float], b: list[float]) -> float:
"""Compute cosine similarity between two vectors."""
dot = sum(x * y for x, y in zip(a, b))
norm_a = math.sqrt(sum(x * x for x in a))
norm_b = math.sqrt(sum(x * x for x in b))
if norm_a == 0 or norm_b == 0:
return 0.0
return dot / (norm_a * norm_b)
def chunk_file(file_path: Path) -> list[str]:
"""Split .md file into chunks of ~500 chars."""
text = file_path.read_text(encoding="utf-8")
if not text.strip():
return []
# Split by double newlines or headers
raw_parts: list[str] = []
current = ""
for line in text.split("\n"):
# Split on headers or empty lines (paragraph boundaries)
if line.startswith("#") and current.strip():
raw_parts.append(current.strip())
current = line + "\n"
elif line.strip() == "" and current.strip():
raw_parts.append(current.strip())
current = ""
else:
current += line + "\n"
if current.strip():
raw_parts.append(current.strip())
# Merge small chunks with next, split large ones
chunks: list[str] = []
buffer = ""
for part in raw_parts:
if buffer and len(buffer) + len(part) + 1 > _CHUNK_MAX:
chunks.append(buffer)
buffer = part
elif buffer:
buffer = buffer + "\n\n" + part
else:
buffer = part
# If buffer exceeds max, flush
if len(buffer) > _CHUNK_MAX:
chunks.append(buffer)
buffer = ""
if buffer:
# Merge tiny trailing chunk with previous
if len(buffer) < _CHUNK_MIN and chunks:
chunks[-1] = chunks[-1] + "\n\n" + buffer
else:
chunks.append(buffer)
return chunks
def index_file(file_path: Path) -> int:
"""Index a single file. Returns number of chunks created."""
rel_path = str(file_path.relative_to(MEMORY_DIR))
chunks = chunk_file(file_path)
if not chunks:
return 0
now = datetime.now(timezone.utc).isoformat()
conn = get_db()
try:
conn.execute("DELETE FROM chunks WHERE file_path = ?", (rel_path,))
for i, chunk_text in enumerate(chunks):
embedding = get_embedding(chunk_text)
conn.execute(
"""INSERT INTO chunks (file_path, chunk_index, chunk_text, embedding, updated_at)
VALUES (?, ?, ?, ?, ?)""",
(rel_path, i, chunk_text, serialize_embedding(embedding), now),
)
conn.commit()
return len(chunks)
finally:
conn.close()
def reindex() -> dict:
"""Rebuild entire index. Returns {"files": N, "chunks": M}."""
conn = get_db()
conn.execute("DELETE FROM chunks")
conn.commit()
conn.close()
files_count = 0
chunks_count = 0
for md_file in sorted(MEMORY_DIR.rglob("*.md")):
if not _is_indexable(md_file):
continue
try:
n = index_file(md_file)
files_count += 1
chunks_count += n
log.info("Indexed %s (%d chunks)", md_file.name, n)
except Exception as e:
log.warning("Failed to index %s: %s", md_file, e)
return {"files": files_count, "chunks": chunks_count}
def incremental_index() -> dict:
"""Index only new or modified .md files. Returns {"indexed": N, "chunks": M}."""
conn = get_db()
try:
# Get latest updated_at per file from DB
rows = conn.execute(
"SELECT file_path, MAX(updated_at) FROM chunks GROUP BY file_path"
).fetchall()
db_times = {}
for rel_path, updated_at in rows:
try:
db_times[rel_path] = datetime.fromisoformat(updated_at)
except (ValueError, TypeError):
pass
finally:
conn.close()
files_indexed = 0
chunks_total = 0
for md_file in sorted(MEMORY_DIR.rglob("*.md")):
if not _is_indexable(md_file):
continue
rel_path = str(md_file.relative_to(MEMORY_DIR))
file_mtime = datetime.fromtimestamp(
md_file.stat().st_mtime, tz=timezone.utc
)
db_time = db_times.get(rel_path)
if db_time is not None:
# Ensure both are offset-aware for comparison
if db_time.tzinfo is None:
db_time = db_time.replace(tzinfo=timezone.utc)
if file_mtime <= db_time:
continue
try:
n = index_file(md_file)
files_indexed += 1
chunks_total += n
log.info("Incremental indexed %s (%d chunks)", md_file.name, n)
except Exception as e:
log.warning("Failed to index %s: %s", md_file, e)
return {"indexed": files_indexed, "chunks": chunks_total}
def _keyword_fallback(query: str, top_k: int = 5) -> list[dict]:
"""Keyword search over indexed chunks. Used when the embedding backend is down.
Returns the same shape as search() plus "degraded": True so callers can
tell the user that semantic recall was unavailable. Ranks best-chunk-per-file
by raw term-occurrence count.
"""
terms = [t for t in re.findall(r"\w+", query.lower()) if len(t) > 2]
conn = get_db()
try:
rows = conn.execute("SELECT file_path, chunk_text FROM chunks").fetchall()
finally:
conn.close()
best: dict[str, dict] = {}
for file_path, chunk_text in rows:
low = chunk_text.lower()
hits = sum(low.count(t) for t in terms) if terms else 0
if hits == 0:
continue
cur = best.get(file_path)
if cur is None or hits > cur["score"]:
best[file_path] = {
"file": file_path,
"chunk": chunk_text,
"score": float(hits),
"degraded": True,
}
scored = sorted(best.values(), key=lambda x: x["score"], reverse=True)
return scored[:top_k]
def search(query: str, top_k: int = 5) -> list[dict]:
"""Search for query. Returns list of {"file": str, "chunk": str, "score": float}.
Results are deduped to the best-scoring chunk per file, so a relevant note
can't be buried by another file contributing several chunks. If the embedding
backend (Ollama) is unreachable, falls back to keyword search and tags each
result with "degraded": True instead of raising.
"""
try:
query_embedding = get_embedding(query)
except ConnectionError as e:
log.warning(
"Embedding backend unavailable (%s); falling back to keyword search", e
)
return _keyword_fallback(query, top_k)
conn = get_db()
try:
rows = conn.execute(
"SELECT file_path, chunk_text, embedding FROM chunks"
).fetchall()
finally:
conn.close()
if not rows:
return []
best: dict[str, dict] = {}
for file_path, chunk_text, emb_blob in rows:
emb = deserialize_embedding(emb_blob)
score = cosine_similarity(query_embedding, emb)
cur = best.get(file_path)
if cur is None or score > cur["score"]:
best[file_path] = {"file": file_path, "chunk": chunk_text, "score": score}
scored = sorted(best.values(), key=lambda x: x["score"], reverse=True)
return scored[:top_k]