#!/usr/bin/env python3 """ Auto-Memory Hook for OpenClaw Analyzes agent turns and stores valuable information in ChromaDB. Usage: echo '{"user":"...","assistant":"...","agent_id":"case","session":"abc"}' | python3 auto-memory-hook.py python3 auto-memory-hook.py --user "msg" --assistant "resp" --agent-id case --session abc """ import sys, json, os, re, time, uuid, hashlib, logging from datetime import datetime, timezone from pathlib import Path import requests import chromadb # --- Config --- CHROMADB_HOST = os.environ.get("CHROMADB_HOST", "192.168.86.25") CHROMADB_PORT = int(os.environ.get("CHROMADB_PORT", "8000")) OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://192.168.86.40:11434") EMBED_MODEL = "nomic-embed-text" LLM_MODEL = "qwen3:8b" COLLECTION = "auto-memory" DEDUP_THRESHOLD = 0.85 # cosine similarity; ChromaDB returns distances, so threshold = 1 - 0.85 = 0.15 LOG_DIR = Path(__file__).resolve().parent.parent / "logs" LOG_FILE = LOG_DIR / "auto-memory.log" LOG_DIR.mkdir(parents=True, exist_ok=True) logging.basicConfig( filename=str(LOG_FILE), format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO, ) log = logging.getLogger("auto-memory") def ollama_available() -> bool: try: r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=3) return r.status_code == 200 except Exception: return False def ollama_generate(prompt: str, timeout: float = 10) -> str: """Call Ollama for classification/extraction.""" r = requests.post( f"{OLLAMA_URL}/api/generate", json={"model": LLM_MODEL, "prompt": prompt, "stream": False, "options": {"temperature": 0.3, "num_predict": 512}}, timeout=timeout, ) r.raise_for_status() return r.json().get("response", "") def ollama_embed(text: str) -> list[float]: """Get embedding from Ollama.""" r = requests.post( f"{OLLAMA_URL}/api/embed", json={"model": EMBED_MODEL, "input": text}, timeout=10, ) r.raise_for_status() data = r.json() # API returns {"embeddings": [[...]]} return data["embeddings"][0] # --- Heuristic fallback --- WORTH_PATTERNS = [ r"(?i)(fix|fixed|resolved|solution|workaround)\b.*\b(by|with|using|was)\b", r"(?i)learned\s+that\b", r"(?i)(decided|decision|we('ll| will))\s+(to|go with)\b", r"(?i)(config|configured|set\s+up|installed)\b.*\b(on|at|for|to)\b", r"(?i)(password|secret|token|api[_-]?key|port|ip|url)\s*(is|=|:)\s*\S+", r"(?i)(error|bug|issue)\b.*\b(because|caused by|due to)\b", r"(?i)(important|remember|note that|fyi)\b", r"(?i)(created|deployed|migrated|upgraded)\b.*\b(service|server|vm|container|database)\b", ] def heuristic_classify(user: str, assistant: str) -> tuple[bool, float]: """Fallback classification using regex patterns.""" combined = f"{user}\n{assistant}" hits = sum(1 for p in WORTH_PATTERNS if re.search(p, combined)) if hits == 0: return False, 0.0 confidence = min(0.4 + hits * 0.15, 0.9) return True, confidence def heuristic_extract(user: str, assistant: str) -> tuple[str, str]: """Fallback extraction: first 2 sentences of assistant response + topic guess.""" sentences = re.split(r'(?<=[.!?])\s+', assistant.strip()) memory = " ".join(sentences[:3])[:500] # Guess topic topic = "general" topic_map = { "infrastructure": r"(?i)(server|vm|port|docker|nginx|systemd|network|ip|dns|proxy)", "code": r"(?i)(function|script|python|node|api|bug|error|code|git)", "config": r"(?i)(config|setting|env|variable|yaml|json|toml)", "decision": r"(?i)(decided|decision|chose|picked|go with)", } combined = f"{user}\n{assistant}" for t, pat in topic_map.items(): if re.search(pat, combined): topic = t break return memory, topic def llm_classify_and_extract(user: str, assistant: str) -> tuple[bool, float, str, str]: """Use Ollama LLM for classification and extraction.""" prompt = f"""/no_think You analyze agent conversations to decide if they contain information worth remembering long-term. USER MESSAGE: {user[:1500]} ASSISTANT RESPONSE: {assistant[:2000]} Respond with EXACTLY this JSON (no other text): {{"worth_remembering": true/false, "confidence": 0.0-1.0, "memory": "concise fact/solution/decision (1-2 sentences)", "topic": "one of: infrastructure, code, config, decision, troubleshooting, general"}} """ raw = ollama_generate(prompt, timeout=8) # Extract JSON from response match = re.search(r'\{[^{}]*\}', raw, re.DOTALL) if not match: raise ValueError(f"No JSON in LLM response: {raw[:200]}") data = json.loads(match.group()) return ( bool(data.get("worth_remembering", False)), float(data.get("confidence", 0.5)), str(data.get("memory", "")), str(data.get("topic", "general")), ) def get_collection(): client = chromadb.HttpClient(host=CHROMADB_HOST, port=CHROMADB_PORT) return client.get_or_create_collection(name=COLLECTION, metadata={"hnsw:space": "cosine"}) def check_duplicate(collection, embedding: list[float]) -> bool: """Check if a similar memory already exists. Returns True if duplicate.""" try: results = collection.query(query_embeddings=[embedding], n_results=1) if results and results["distances"] and results["distances"][0]: # For cosine space, distance = 1 - similarity distance = results["distances"][0][0] similarity = 1 - distance if similarity > DEDUP_THRESHOLD: log.info(f"Duplicate found (similarity={similarity:.3f}), skipping") return True except Exception as e: log.warning(f"Dedup check failed: {e}") return False def store_memory(collection, memory: str, embedding: list[float], metadata: dict): doc_id = hashlib.sha256(memory.encode()).hexdigest()[:16] collection.add( documents=[memory], embeddings=[embedding], metadatas=[metadata], ids=[doc_id], ) log.info(f"Stored memory [{doc_id}]: {memory[:100]}...") def process_turn(user: str, assistant: str, agent_id: str = "unknown", session: str = "unknown"): """Main pipeline.""" # Skip trivial interactions if len(assistant) < 50: log.info("SKIP: response too short") return use_llm = ollama_available() # 1. Classify & Extract if use_llm: try: worth, confidence, memory, topic = llm_classify_and_extract(user, assistant) except Exception as e: log.warning(f"LLM failed ({e}), falling back to heuristics") use_llm = False if not use_llm: worth, confidence = heuristic_classify(user, assistant) if worth: memory, topic = heuristic_extract(user, assistant) else: memory, topic = "", "general" if not worth or not memory: log.info(f"SKIP: not worth remembering (confidence={confidence:.2f})") return log.info(f"CANDIDATE: topic={topic} confidence={confidence:.2f} memory={memory[:80]}...") # 2. Embed try: embedding = ollama_embed(memory) except Exception as e: log.error(f"Embedding failed: {e}") return # 3. Dedup & Store try: collection = get_collection() if check_duplicate(collection, embedding): return metadata = { "date": datetime.now(timezone.utc).strftime("%Y-%m-%d"), "agent_id": agent_id, "topic": topic, "confidence": confidence, "source": f"session:{session}", } store_memory(collection, memory, embedding, metadata) print(json.dumps({"stored": True, "memory": memory, "topic": topic, "confidence": confidence})) except Exception as e: log.error(f"ChromaDB error: {e}") print(json.dumps({"stored": False, "error": str(e)})) def main(): import argparse parser = argparse.ArgumentParser(description="Auto-memory hook") parser.add_argument("--user", help="User message") parser.add_argument("--assistant", help="Assistant response") parser.add_argument("--agent-id", default="unknown") parser.add_argument("--session", default="unknown") args = parser.parse_args() if args.user and args.assistant: process_turn(args.user, args.assistant, args.agent_id, args.session) elif not sys.stdin.isatty(): data = json.load(sys.stdin) process_turn( data.get("user", ""), data.get("assistant", ""), data.get("agent_id", "unknown"), data.get("session", "unknown"), ) else: parser.print_help() sys.exit(1) if __name__ == "__main__": main()