Full sync - all projects, memory, configs
This commit is contained in:
254
tools/auto-memory-hook.py
Executable file
254
tools/auto-memory-hook.py
Executable file
@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Auto-Memory Hook for OpenClaw
|
||||
Analyzes agent turns and stores valuable information in ChromaDB.
|
||||
Usage:
|
||||
echo '{"user":"...","assistant":"...","agent_id":"case","session":"abc"}' | python3 auto-memory-hook.py
|
||||
python3 auto-memory-hook.py --user "msg" --assistant "resp" --agent-id case --session abc
|
||||
"""
|
||||
|
||||
import sys, json, os, re, time, uuid, hashlib, logging
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
import chromadb
|
||||
|
||||
# --- Config ---
|
||||
CHROMADB_HOST = os.environ.get("CHROMADB_HOST", "192.168.86.25")
|
||||
CHROMADB_PORT = int(os.environ.get("CHROMADB_PORT", "8000"))
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://192.168.86.40:11434")
|
||||
EMBED_MODEL = "nomic-embed-text"
|
||||
LLM_MODEL = "qwen3:8b"
|
||||
COLLECTION = "auto-memory"
|
||||
DEDUP_THRESHOLD = 0.85 # cosine similarity; ChromaDB returns distances, so threshold = 1 - 0.85 = 0.15
|
||||
LOG_DIR = Path(__file__).resolve().parent.parent / "logs"
|
||||
LOG_FILE = LOG_DIR / "auto-memory.log"
|
||||
|
||||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
logging.basicConfig(
|
||||
filename=str(LOG_FILE),
|
||||
format="%(asctime)s %(levelname)s %(message)s",
|
||||
level=logging.INFO,
|
||||
)
|
||||
log = logging.getLogger("auto-memory")
|
||||
|
||||
|
||||
def ollama_available() -> bool:
|
||||
try:
|
||||
r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=3)
|
||||
return r.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def ollama_generate(prompt: str, timeout: float = 10) -> str:
|
||||
"""Call Ollama for classification/extraction."""
|
||||
r = requests.post(
|
||||
f"{OLLAMA_URL}/api/generate",
|
||||
json={"model": LLM_MODEL, "prompt": prompt, "stream": False,
|
||||
"options": {"temperature": 0.3, "num_predict": 512}},
|
||||
timeout=timeout,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json().get("response", "")
|
||||
|
||||
|
||||
def ollama_embed(text: str) -> list[float]:
|
||||
"""Get embedding from Ollama."""
|
||||
r = requests.post(
|
||||
f"{OLLAMA_URL}/api/embed",
|
||||
json={"model": EMBED_MODEL, "input": text},
|
||||
timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
# API returns {"embeddings": [[...]]}
|
||||
return data["embeddings"][0]
|
||||
|
||||
|
||||
# --- Heuristic fallback ---
|
||||
WORTH_PATTERNS = [
|
||||
r"(?i)(fix|fixed|resolved|solution|workaround)\b.*\b(by|with|using|was)\b",
|
||||
r"(?i)learned\s+that\b",
|
||||
r"(?i)(decided|decision|we('ll| will))\s+(to|go with)\b",
|
||||
r"(?i)(config|configured|set\s+up|installed)\b.*\b(on|at|for|to)\b",
|
||||
r"(?i)(password|secret|token|api[_-]?key|port|ip|url)\s*(is|=|:)\s*\S+",
|
||||
r"(?i)(error|bug|issue)\b.*\b(because|caused by|due to)\b",
|
||||
r"(?i)(important|remember|note that|fyi)\b",
|
||||
r"(?i)(created|deployed|migrated|upgraded)\b.*\b(service|server|vm|container|database)\b",
|
||||
]
|
||||
|
||||
|
||||
def heuristic_classify(user: str, assistant: str) -> tuple[bool, float]:
|
||||
"""Fallback classification using regex patterns."""
|
||||
combined = f"{user}\n{assistant}"
|
||||
hits = sum(1 for p in WORTH_PATTERNS if re.search(p, combined))
|
||||
if hits == 0:
|
||||
return False, 0.0
|
||||
confidence = min(0.4 + hits * 0.15, 0.9)
|
||||
return True, confidence
|
||||
|
||||
|
||||
def heuristic_extract(user: str, assistant: str) -> tuple[str, str]:
|
||||
"""Fallback extraction: first 2 sentences of assistant response + topic guess."""
|
||||
sentences = re.split(r'(?<=[.!?])\s+', assistant.strip())
|
||||
memory = " ".join(sentences[:3])[:500]
|
||||
# Guess topic
|
||||
topic = "general"
|
||||
topic_map = {
|
||||
"infrastructure": r"(?i)(server|vm|port|docker|nginx|systemd|network|ip|dns|proxy)",
|
||||
"code": r"(?i)(function|script|python|node|api|bug|error|code|git)",
|
||||
"config": r"(?i)(config|setting|env|variable|yaml|json|toml)",
|
||||
"decision": r"(?i)(decided|decision|chose|picked|go with)",
|
||||
}
|
||||
combined = f"{user}\n{assistant}"
|
||||
for t, pat in topic_map.items():
|
||||
if re.search(pat, combined):
|
||||
topic = t
|
||||
break
|
||||
return memory, topic
|
||||
|
||||
|
||||
def llm_classify_and_extract(user: str, assistant: str) -> tuple[bool, float, str, str]:
|
||||
"""Use Ollama LLM for classification and extraction."""
|
||||
prompt = f"""/no_think
|
||||
You analyze agent conversations to decide if they contain information worth remembering long-term.
|
||||
|
||||
USER MESSAGE:
|
||||
{user[:1500]}
|
||||
|
||||
ASSISTANT RESPONSE:
|
||||
{assistant[:2000]}
|
||||
|
||||
Respond with EXACTLY this JSON (no other text):
|
||||
{{"worth_remembering": true/false, "confidence": 0.0-1.0, "memory": "concise fact/solution/decision (1-2 sentences)", "topic": "one of: infrastructure, code, config, decision, troubleshooting, general"}}
|
||||
"""
|
||||
raw = ollama_generate(prompt, timeout=8)
|
||||
# Extract JSON from response
|
||||
match = re.search(r'\{[^{}]*\}', raw, re.DOTALL)
|
||||
if not match:
|
||||
raise ValueError(f"No JSON in LLM response: {raw[:200]}")
|
||||
data = json.loads(match.group())
|
||||
return (
|
||||
bool(data.get("worth_remembering", False)),
|
||||
float(data.get("confidence", 0.5)),
|
||||
str(data.get("memory", "")),
|
||||
str(data.get("topic", "general")),
|
||||
)
|
||||
|
||||
|
||||
def get_collection():
|
||||
client = chromadb.HttpClient(host=CHROMADB_HOST, port=CHROMADB_PORT)
|
||||
return client.get_or_create_collection(name=COLLECTION, metadata={"hnsw:space": "cosine"})
|
||||
|
||||
|
||||
def check_duplicate(collection, embedding: list[float]) -> bool:
|
||||
"""Check if a similar memory already exists. Returns True if duplicate."""
|
||||
try:
|
||||
results = collection.query(query_embeddings=[embedding], n_results=1)
|
||||
if results and results["distances"] and results["distances"][0]:
|
||||
# For cosine space, distance = 1 - similarity
|
||||
distance = results["distances"][0][0]
|
||||
similarity = 1 - distance
|
||||
if similarity > DEDUP_THRESHOLD:
|
||||
log.info(f"Duplicate found (similarity={similarity:.3f}), skipping")
|
||||
return True
|
||||
except Exception as e:
|
||||
log.warning(f"Dedup check failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def store_memory(collection, memory: str, embedding: list[float], metadata: dict):
|
||||
doc_id = hashlib.sha256(memory.encode()).hexdigest()[:16]
|
||||
collection.add(
|
||||
documents=[memory],
|
||||
embeddings=[embedding],
|
||||
metadatas=[metadata],
|
||||
ids=[doc_id],
|
||||
)
|
||||
log.info(f"Stored memory [{doc_id}]: {memory[:100]}...")
|
||||
|
||||
|
||||
def process_turn(user: str, assistant: str, agent_id: str = "unknown", session: str = "unknown"):
|
||||
"""Main pipeline."""
|
||||
# Skip trivial interactions
|
||||
if len(assistant) < 50:
|
||||
log.info("SKIP: response too short")
|
||||
return
|
||||
|
||||
use_llm = ollama_available()
|
||||
|
||||
# 1. Classify & Extract
|
||||
if use_llm:
|
||||
try:
|
||||
worth, confidence, memory, topic = llm_classify_and_extract(user, assistant)
|
||||
except Exception as e:
|
||||
log.warning(f"LLM failed ({e}), falling back to heuristics")
|
||||
use_llm = False
|
||||
|
||||
if not use_llm:
|
||||
worth, confidence = heuristic_classify(user, assistant)
|
||||
if worth:
|
||||
memory, topic = heuristic_extract(user, assistant)
|
||||
else:
|
||||
memory, topic = "", "general"
|
||||
|
||||
if not worth or not memory:
|
||||
log.info(f"SKIP: not worth remembering (confidence={confidence:.2f})")
|
||||
return
|
||||
|
||||
log.info(f"CANDIDATE: topic={topic} confidence={confidence:.2f} memory={memory[:80]}...")
|
||||
|
||||
# 2. Embed
|
||||
try:
|
||||
embedding = ollama_embed(memory)
|
||||
except Exception as e:
|
||||
log.error(f"Embedding failed: {e}")
|
||||
return
|
||||
|
||||
# 3. Dedup & Store
|
||||
try:
|
||||
collection = get_collection()
|
||||
if check_duplicate(collection, embedding):
|
||||
return
|
||||
metadata = {
|
||||
"date": datetime.now(timezone.utc).strftime("%Y-%m-%d"),
|
||||
"agent_id": agent_id,
|
||||
"topic": topic,
|
||||
"confidence": confidence,
|
||||
"source": f"session:{session}",
|
||||
}
|
||||
store_memory(collection, memory, embedding, metadata)
|
||||
print(json.dumps({"stored": True, "memory": memory, "topic": topic, "confidence": confidence}))
|
||||
except Exception as e:
|
||||
log.error(f"ChromaDB error: {e}")
|
||||
print(json.dumps({"stored": False, "error": str(e)}))
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Auto-memory hook")
|
||||
parser.add_argument("--user", help="User message")
|
||||
parser.add_argument("--assistant", help="Assistant response")
|
||||
parser.add_argument("--agent-id", default="unknown")
|
||||
parser.add_argument("--session", default="unknown")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.user and args.assistant:
|
||||
process_turn(args.user, args.assistant, args.agent_id, args.session)
|
||||
elif not sys.stdin.isatty():
|
||||
data = json.load(sys.stdin)
|
||||
process_turn(
|
||||
data.get("user", ""),
|
||||
data.get("assistant", ""),
|
||||
data.get("agent_id", "unknown"),
|
||||
data.get("session", "unknown"),
|
||||
)
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user