#!/usr/bin/env python3
"""
Auto-Memory Hook for OpenClaw
Analyzes agent turns and stores valuable information in ChromaDB.
Usage:
  echo '{"user":"...","assistant":"...","agent_id":"case","session":"abc"}' | python3 auto-memory-hook.py
  python3 auto-memory-hook.py --user "msg" --assistant "resp" --agent-id case --session abc
"""

import sys, json, os, re, time, uuid, hashlib, logging
from datetime import datetime, timezone
from pathlib import Path

import requests
import chromadb

# --- Config ---
CHROMADB_HOST = os.environ.get("CHROMADB_HOST", "192.168.86.25")
CHROMADB_PORT = int(os.environ.get("CHROMADB_PORT", "8000"))
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://192.168.86.40:11434")
EMBED_MODEL = "nomic-embed-text"
LLM_MODEL = "qwen3:8b"
COLLECTION = "auto-memory"
DEDUP_THRESHOLD = 0.85  # cosine similarity; ChromaDB returns distances, so threshold = 1 - 0.85 = 0.15
LOG_DIR = Path(__file__).resolve().parent.parent / "logs"
LOG_FILE = LOG_DIR / "auto-memory.log"

LOG_DIR.mkdir(parents=True, exist_ok=True)
logging.basicConfig(
    filename=str(LOG_FILE),
    format="%(asctime)s %(levelname)s %(message)s",
    level=logging.INFO,
)
log = logging.getLogger("auto-memory")


def ollama_available() -> bool:
    try:
        r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=3)
        return r.status_code == 200
    except Exception:
        return False


def ollama_generate(prompt: str, timeout: float = 10) -> str:
    """Call Ollama for classification/extraction."""
    r = requests.post(
        f"{OLLAMA_URL}/api/generate",
        json={"model": LLM_MODEL, "prompt": prompt, "stream": False,
              "options": {"temperature": 0.3, "num_predict": 512}},
        timeout=timeout,
    )
    r.raise_for_status()
    return r.json().get("response", "")


def ollama_embed(text: str) -> list[float]:
    """Get embedding from Ollama."""
    r = requests.post(
        f"{OLLAMA_URL}/api/embed",
        json={"model": EMBED_MODEL, "input": text},
        timeout=10,
    )
    r.raise_for_status()
    data = r.json()
    # API returns {"embeddings": [[...]]}
    return data["embeddings"][0]


# --- Heuristic fallback ---
WORTH_PATTERNS = [
    r"(?i)(fix|fixed|resolved|solution|workaround)\b.*\b(by|with|using|was)\b",
    r"(?i)learned\s+that\b",
    r"(?i)(decided|decision|we('ll| will))\s+(to|go with)\b",
    r"(?i)(config|configured|set\s+up|installed)\b.*\b(on|at|for|to)\b",
    r"(?i)(password|secret|token|api[_-]?key|port|ip|url)\s*(is|=|:)\s*\S+",
    r"(?i)(error|bug|issue)\b.*\b(because|caused by|due to)\b",
    r"(?i)(important|remember|note that|fyi)\b",
    r"(?i)(created|deployed|migrated|upgraded)\b.*\b(service|server|vm|container|database)\b",
]


def heuristic_classify(user: str, assistant: str) -> tuple[bool, float]:
    """Fallback classification using regex patterns."""
    combined = f"{user}\n{assistant}"
    hits = sum(1 for p in WORTH_PATTERNS if re.search(p, combined))
    if hits == 0:
        return False, 0.0
    confidence = min(0.4 + hits * 0.15, 0.9)
    return True, confidence


def heuristic_extract(user: str, assistant: str) -> tuple[str, str]:
    """Fallback extraction: first 2 sentences of assistant response + topic guess."""
    sentences = re.split(r'(?<=[.!?])\s+', assistant.strip())
    memory = " ".join(sentences[:3])[:500]
    # Guess topic
    topic = "general"
    topic_map = {
        "infrastructure": r"(?i)(server|vm|port|docker|nginx|systemd|network|ip|dns|proxy)",
        "code": r"(?i)(function|script|python|node|api|bug|error|code|git)",
        "config": r"(?i)(config|setting|env|variable|yaml|json|toml)",
        "decision": r"(?i)(decided|decision|chose|picked|go with)",
    }
    combined = f"{user}\n{assistant}"
    for t, pat in topic_map.items():
        if re.search(pat, combined):
            topic = t
            break
    return memory, topic


def llm_classify_and_extract(user: str, assistant: str) -> tuple[bool, float, str, str]:
    """Use Ollama LLM for classification and extraction."""
    prompt = f"""/no_think
You analyze agent conversations to decide if they contain information worth remembering long-term.

USER MESSAGE:
{user[:1500]}

ASSISTANT RESPONSE:
{assistant[:2000]}

Respond with EXACTLY this JSON (no other text):
{{"worth_remembering": true/false, "confidence": 0.0-1.0, "memory": "concise fact/solution/decision (1-2 sentences)", "topic": "one of: infrastructure, code, config, decision, troubleshooting, general"}}
"""
    raw = ollama_generate(prompt, timeout=8)
    # Extract JSON from response
    match = re.search(r'\{[^{}]*\}', raw, re.DOTALL)
    if not match:
        raise ValueError(f"No JSON in LLM response: {raw[:200]}")
    data = json.loads(match.group())
    return (
        bool(data.get("worth_remembering", False)),
        float(data.get("confidence", 0.5)),
        str(data.get("memory", "")),
        str(data.get("topic", "general")),
    )


def get_collection():
    client = chromadb.HttpClient(host=CHROMADB_HOST, port=CHROMADB_PORT)
    return client.get_or_create_collection(name=COLLECTION, metadata={"hnsw:space": "cosine"})


def check_duplicate(collection, embedding: list[float]) -> bool:
    """Check if a similar memory already exists. Returns True if duplicate."""
    try:
        results = collection.query(query_embeddings=[embedding], n_results=1)
        if results and results["distances"] and results["distances"][0]:
            # For cosine space, distance = 1 - similarity
            distance = results["distances"][0][0]
            similarity = 1 - distance
            if similarity > DEDUP_THRESHOLD:
                log.info(f"Duplicate found (similarity={similarity:.3f}), skipping")
                return True
    except Exception as e:
        log.warning(f"Dedup check failed: {e}")
    return False


def store_memory(collection, memory: str, embedding: list[float], metadata: dict):
    doc_id = hashlib.sha256(memory.encode()).hexdigest()[:16]
    collection.add(
        documents=[memory],
        embeddings=[embedding],
        metadatas=[metadata],
        ids=[doc_id],
    )
    log.info(f"Stored memory [{doc_id}]: {memory[:100]}...")


def process_turn(user: str, assistant: str, agent_id: str = "unknown", session: str = "unknown"):
    """Main pipeline."""
    # Skip trivial interactions
    if len(assistant) < 50:
        log.info("SKIP: response too short")
        return

    use_llm = ollama_available()

    # 1. Classify & Extract
    if use_llm:
        try:
            worth, confidence, memory, topic = llm_classify_and_extract(user, assistant)
        except Exception as e:
            log.warning(f"LLM failed ({e}), falling back to heuristics")
            use_llm = False

    if not use_llm:
        worth, confidence = heuristic_classify(user, assistant)
        if worth:
            memory, topic = heuristic_extract(user, assistant)
        else:
            memory, topic = "", "general"

    if not worth or not memory:
        log.info(f"SKIP: not worth remembering (confidence={confidence:.2f})")
        return

    log.info(f"CANDIDATE: topic={topic} confidence={confidence:.2f} memory={memory[:80]}...")

    # 2. Embed
    try:
        embedding = ollama_embed(memory)
    except Exception as e:
        log.error(f"Embedding failed: {e}")
        return

    # 3. Dedup & Store
    try:
        collection = get_collection()
        if check_duplicate(collection, embedding):
            return
        metadata = {
            "date": datetime.now(timezone.utc).strftime("%Y-%m-%d"),
            "agent_id": agent_id,
            "topic": topic,
            "confidence": confidence,
            "source": f"session:{session}",
        }
        store_memory(collection, memory, embedding, metadata)
        print(json.dumps({"stored": True, "memory": memory, "topic": topic, "confidence": confidence}))
    except Exception as e:
        log.error(f"ChromaDB error: {e}")
        print(json.dumps({"stored": False, "error": str(e)}))


def main():
    import argparse
    parser = argparse.ArgumentParser(description="Auto-memory hook")
    parser.add_argument("--user", help="User message")
    parser.add_argument("--assistant", help="Assistant response")
    parser.add_argument("--agent-id", default="unknown")
    parser.add_argument("--session", default="unknown")
    args = parser.parse_args()

    if args.user and args.assistant:
        process_turn(args.user, args.assistant, args.agent_id, args.session)
    elif not sys.stdin.isatty():
        data = json.load(sys.stdin)
        process_turn(
            data.get("user", ""),
            data.get("assistant", ""),
            data.get("agent_id", "unknown"),
            data.get("session", "unknown"),
        )
    else:
        parser.print_help()
        sys.exit(1)


if __name__ == "__main__":
    main()