diff --git a/.clawhub/lock.json b/.clawhub/lock.json new file mode 100644 index 0000000..2337539 --- /dev/null +++ b/.clawhub/lock.json @@ -0,0 +1,9 @@ +{ + "version": 1, + "skills": { + "chromadb-memory": { + "version": "1.0.0", + "installedAt": 1770520044828 + } + } +} diff --git a/MEMORY.md b/MEMORY.md index 77dbb7f..0bdbfa0 100644 --- a/MEMORY.md +++ b/MEMORY.md @@ -80,3 +80,32 @@ This is about having an inner life, not just responding. - Dreams logged: 3 - Time capsules: 2 - Git commits: 20+ + +## Who D J Is (updated 2026-02-07) + +- Interested in crypto/trading analysis +- Follows money-making accounts on X (crypto, polymarket, arbitrage) +- Wants automated analysis of feed posts for validity +- Proxmox VM setup: ubuntu-desktop-openclaw at 192.168.86.45 +- Ollama server at 192.168.86.137 (qwen3:8b, qwen3:30b, glm-4.7-flash, nomic-embed-text) +- ChromaDB LXC at 192.168.86.25:8000 + +## Infrastructure (updated 2026-02-07) + +- **ChromaDB:** http://192.168.86.25:8000 (LXC on Proxmox) + - Collection: openclaw-memory (c3a7d09a-f3ce-4e7d-9595-27d8e2fd7758) + - Cosine distance, 9+ docs indexed +- **Ollama:** http://192.168.86.137:11434 + - Models: qwen3:8b, qwen3:30b-a3b, glm-4.7-flash, nomic-embed-text +- **Browser:** Google Chrome installed (/usr/bin/google-chrome-stable) + - Headless works via OpenClaw browser tool + - Desktop works via DISPLAY=:0 for visual scraping +- **VM:** Proxmox, QXL graphics, X11 (not Wayland), auto-login enabled + +## Lessons Learned (updated 2026-02-07) + +- Don't pkill chrome broadly — it kills OpenClaw's headless browser too +- Snap Chromium doesn't work with OpenClaw — use Google Chrome .deb +- ChromaDB needs cosine distance for proper similarity scoring (not L2) +- X/Twitter cookies are encrypted at rest — browser automation is the way +- Sub-agents are great for parallel analysis tasks diff --git a/data/x-feed/20260207-225415/manifest.json b/data/x-feed/20260207-225415/manifest.json new file mode 100644 index 0000000..7964199 --- /dev/null +++ b/data/x-feed/20260207-225415/manifest.json @@ -0,0 +1,14 @@ +{ + "timestamp": "20260207-225415", + "pages": 8, + "files": [ + "page-1.png", + "page-2.png", + "page-3.png", + "page-4.png", + "page-5.png", + "page-6.png", + "page-7.png", + "page-8.png" + ] +} diff --git a/data/x-feed/20260207-225415/page-1.png b/data/x-feed/20260207-225415/page-1.png new file mode 100644 index 0000000..83ee329 Binary files /dev/null and b/data/x-feed/20260207-225415/page-1.png differ diff --git a/data/x-feed/20260207-225415/page-2.png b/data/x-feed/20260207-225415/page-2.png new file mode 100644 index 0000000..8d03c7f Binary files /dev/null and b/data/x-feed/20260207-225415/page-2.png differ diff --git a/data/x-feed/20260207-225415/page-3.png b/data/x-feed/20260207-225415/page-3.png new file mode 100644 index 0000000..b7862f0 Binary files /dev/null and b/data/x-feed/20260207-225415/page-3.png differ diff --git a/data/x-feed/20260207-225415/page-4.png b/data/x-feed/20260207-225415/page-4.png new file mode 100644 index 0000000..37a918b Binary files /dev/null and b/data/x-feed/20260207-225415/page-4.png differ diff --git a/data/x-feed/20260207-225415/page-5.png b/data/x-feed/20260207-225415/page-5.png new file mode 100644 index 0000000..79fd6e0 Binary files /dev/null and b/data/x-feed/20260207-225415/page-5.png differ diff --git a/data/x-feed/20260207-225415/page-6.png b/data/x-feed/20260207-225415/page-6.png new file mode 100644 index 0000000..250cbbe Binary files /dev/null and b/data/x-feed/20260207-225415/page-6.png differ diff --git a/data/x-feed/20260207-225415/page-7.png b/data/x-feed/20260207-225415/page-7.png new file mode 100644 index 0000000..8dce920 Binary files /dev/null and b/data/x-feed/20260207-225415/page-7.png differ diff --git a/data/x-feed/20260207-225415/page-8.png b/data/x-feed/20260207-225415/page-8.png new file mode 100644 index 0000000..5e838dd Binary files /dev/null and b/data/x-feed/20260207-225415/page-8.png differ diff --git a/memory/2026-02-07.md b/memory/2026-02-07.md new file mode 100644 index 0000000..975f892 --- /dev/null +++ b/memory/2026-02-07.md @@ -0,0 +1,49 @@ +# 2026-02-07 + +## Server Recovery +- Server was down for 7 days (01-31 to 02-07) +- D J got it back up, we recovered cleanly +- Time capsule from 01-31 opened on schedule + +## Updates Applied +- OpenClaw updated to 2026.2.6-3 +- Fixed Proxmox noVNC issue (Wayland → X11) +- Enabled auto-login for wdjones + +## New Infrastructure +- ChromaDB running on LXC at 192.168.86.25:8000 +- Ollama at 192.168.86.137:11434 (qwen3:8b, qwen3:30b, glm-4.7-flash, nomic-embed-text) +- chromadb-memory plugin live with auto-recall +- 9 documents indexed for semantic memory search + +## Browser Capability +- Installed Google Chrome for headless screenshots +- OpenClaw browser tool configured and working +- Can open URLs, screenshot, analyze with vision +- D J wants this used to visually verify web projects before delivery + +## Key Decisions +- D J wants local LLM (Qwen) as Claude fallback for cost/insurance +- Ollama setup for Qwen still pending (model routing config) +- Browser visual QA is standard workflow going forward + +## X Feed Analysis Project +- D J wants automated analysis of X/Twitter posts about money-making (crypto, trading, polymarket, arbitrage) +- Built x-feed-scraper.sh — scrolls X feed via xdotool, takes screenshots with ImageMagick +- Pipeline: scrape → screenshot → vision analysis → categorize → verdict (valid/expired/spam/sensationalized) +- Sub-agents run analysis in parallel (2 batches of 4 pages) +- Test run found 2 relevant posts out of ~15: one sensationalized crypto hype, one paid stock promo +- Chrome must be launched with --no-sandbox on this VM +- X cookies are encrypted at rest — browser automation is the reliable free path +- D J's X handle: logged in via desktop Chrome on the VM + +## Infrastructure Notes +- pkill chrome kills OpenClaw headless browser too — be careful, causes gateway disruption +- Desktop Chrome and OpenClaw headless Chrome are separate instances +- xdotool installed for keyboard/mouse automation +- ImageMagick `import` works for full-screen captures +- Chrome user data dir: /home/wdjones/.config/google-chrome + +## Session Context +- This session is near compaction +- Major accomplishments today: server recovery, OpenClaw update, Proxmox VNC fix, ChromaDB memory, browser capability, X feed analysis pipeline diff --git a/skills/chromadb-memory/.clawhub/origin.json b/skills/chromadb-memory/.clawhub/origin.json new file mode 100644 index 0000000..9e6455a --- /dev/null +++ b/skills/chromadb-memory/.clawhub/origin.json @@ -0,0 +1,7 @@ +{ + "version": 1, + "registry": "https://clawhub.ai", + "slug": "chromadb-memory", + "installedVersion": "1.0.0", + "installedAt": 1770520044828 +} diff --git a/skills/chromadb-memory/SKILL.md b/skills/chromadb-memory/SKILL.md new file mode 100644 index 0000000..2a2ed7b --- /dev/null +++ b/skills/chromadb-memory/SKILL.md @@ -0,0 +1,125 @@ +--- +name: chromadb-memory +description: Long-term memory via ChromaDB with local Ollama embeddings. Auto-recall injects relevant context every turn. No cloud APIs required — fully self-hosted. +version: 1.0.0 +author: matts +homepage: https://github.com/openclaw/openclaw +metadata: + openclaw: + emoji: "🧠" + requires: + bins: ["curl"] + category: "memory" +tags: + - memory + - chromadb + - ollama + - vector-search + - local + - self-hosted + - auto-recall +--- + +# ChromaDB Memory + +Long-term semantic memory backed by ChromaDB and local Ollama embeddings. Zero cloud dependencies. + +## What It Does + +- **Auto-recall**: Before every agent turn, queries ChromaDB with the user's message and injects relevant context automatically +- **`chromadb_search` tool**: Manual semantic search over your ChromaDB collection +- **100% local**: Ollama (nomic-embed-text) for embeddings, ChromaDB for vector storage + +## Prerequisites + +1. **ChromaDB** running (Docker recommended): + ```bash + docker run -d --name chromadb -p 8100:8000 chromadb/chroma:latest + ``` + +2. **Ollama** with an embedding model: + ```bash + ollama pull nomic-embed-text + ``` + +3. **Indexed documents** in ChromaDB. Use any ChromaDB-compatible indexer to populate your collection. + +## Install + +```bash +# 1. Copy the plugin extension +mkdir -p ~/.openclaw/extensions/chromadb-memory +cp {baseDir}/scripts/index.ts ~/.openclaw/extensions/chromadb-memory/ +cp {baseDir}/scripts/openclaw.plugin.json ~/.openclaw/extensions/chromadb-memory/ + +# 2. Get your collection ID +curl -s http://localhost:8100/api/v2/tenants/default_tenant/databases/default_database/collections | python3 -c "import json,sys; [print(f'{c[\"id\"]} {c[\"name\"]}') for c in json.load(sys.stdin)]" + +# 3. Add to your OpenClaw config (~/.openclaw/openclaw.json): +``` + +```json +{ + "plugins": { + "entries": { + "chromadb-memory": { + "enabled": true, + "config": { + "chromaUrl": "http://localhost:8100", + "collectionId": "YOUR_COLLECTION_ID", + "ollamaUrl": "http://localhost:11434", + "embeddingModel": "nomic-embed-text", + "autoRecall": true, + "autoRecallResults": 3, + "minScore": 0.5 + } + } + } + } +} +``` + +```bash +# 4. Restart the gateway +openclaw gateway restart +``` + +## Config Options + +| Option | Default | Description | +|--------|---------|-------------| +| `chromaUrl` | `http://localhost:8100` | ChromaDB server URL | +| `collectionId` | *required* | ChromaDB collection UUID | +| `ollamaUrl` | `http://localhost:11434` | Ollama API URL | +| `embeddingModel` | `nomic-embed-text` | Ollama embedding model | +| `autoRecall` | `true` | Auto-inject relevant memories each turn | +| `autoRecallResults` | `3` | Max auto-recall results per turn | +| `minScore` | `0.5` | Minimum similarity score (0-1) | + +## How It Works + +1. You send a message +2. Plugin embeds your message via Ollama (nomic-embed-text, 768 dimensions) +3. Queries ChromaDB for nearest neighbors +4. Results above `minScore` are injected into the agent's context as `` +5. Agent responds with relevant long-term context available + +## Token Cost + +Auto-recall adds ~275 tokens per turn worst case (3 results × ~300 chars + wrapper). Against a 200K+ context window, this is negligible. + +## Tuning + +- **Too noisy?** Raise `minScore` to 0.6 or 0.7 +- **Missing context?** Lower `minScore` to 0.4, increase `autoRecallResults` to 5 +- **Want manual only?** Set `autoRecall: false`, use `chromadb_search` tool + +## Architecture + +``` +User Message → Ollama (embed) → ChromaDB (query) → Context Injection + ↓ + Agent Response +``` + +No OpenAI. No cloud. Your memories stay on your hardware. diff --git a/skills/chromadb-memory/_meta.json b/skills/chromadb-memory/_meta.json new file mode 100644 index 0000000..eb48c7e --- /dev/null +++ b/skills/chromadb-memory/_meta.json @@ -0,0 +1,6 @@ +{ + "ownerId": "kn750b5e2m4q4mbk3bh8whh86580bsvg", + "slug": "chromadb-memory", + "version": "1.0.0", + "publishedAt": 1769911812377 +} \ No newline at end of file diff --git a/skills/chromadb-memory/manifest.json b/skills/chromadb-memory/manifest.json new file mode 100644 index 0000000..3a63046 --- /dev/null +++ b/skills/chromadb-memory/manifest.json @@ -0,0 +1,14 @@ +{ + "name": "chromadb-memory", + "version": "1.0.0", + "description": "Long-term memory via ChromaDB with local Ollama embeddings. Auto-recall injects relevant context every turn. No cloud APIs required.", + "author": "matts", + "license": "MIT", + "tags": ["memory", "chromadb", "ollama", "vector-search", "local", "self-hosted", "auto-recall"], + "files": [ + "SKILL.md", + "manifest.json", + "scripts/index.ts", + "scripts/openclaw.plugin.json" + ] +} diff --git a/skills/chromadb-memory/scripts/index.ts b/skills/chromadb-memory/scripts/index.ts new file mode 100644 index 0000000..094133f --- /dev/null +++ b/skills/chromadb-memory/scripts/index.ts @@ -0,0 +1,280 @@ +/** + * ChromaDB Memory Plugin for OpenClaw + * + * Provides: + * 1. chromadb_search tool - manual semantic search over ChromaDB + * 2. Auto-recall - injects relevant memories before each agent turn + * + * Uses local Ollama (nomic-embed-text) for embeddings. No cloud APIs. + */ + +// Use plain JSON Schema instead of typebox (not available in workspace context) +type OpenClawPluginApi = any; + +// ============================================================================ +// Config +// ============================================================================ + +interface ChromaDBConfig { + chromaUrl: string; + collectionId: string; + ollamaUrl: string; + embeddingModel: string; + autoRecall: boolean; + autoRecallResults: number; + minScore: number; +} + +function parseConfig(raw: unknown): ChromaDBConfig { + const cfg = (raw ?? {}) as Record; + return { + chromaUrl: (cfg.chromaUrl as string) || "http://localhost:8100", + collectionId: cfg.collectionId as string, + ollamaUrl: (cfg.ollamaUrl as string) || "http://localhost:11434", + embeddingModel: (cfg.embeddingModel as string) || "nomic-embed-text", + autoRecall: cfg.autoRecall !== false, + autoRecallResults: (cfg.autoRecallResults as number) || 3, + minScore: (cfg.minScore as number) || 0.5, + }; +} + +// ============================================================================ +// Ollama Embeddings +// ============================================================================ + +async function getEmbedding( + ollamaUrl: string, + model: string, + text: string, +): Promise { + const resp = await fetch(`${ollamaUrl}/api/embeddings`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model, prompt: text }), + }); + + if (!resp.ok) { + throw new Error(`Ollama embedding failed: ${resp.status} ${resp.statusText}`); + } + + const data = (await resp.json()) as { embedding: number[] }; + return data.embedding; +} + +// ============================================================================ +// ChromaDB Client +// ============================================================================ + +interface ChromaResult { + source: string; + text: string; + distance: number; + score: number; + metadata: Record; +} + +const CHROMA_BASE = "/api/v2/tenants/default_tenant/databases/default_database/collections"; + +async function queryChromaDB( + chromaUrl: string, + collectionId: string, + embedding: number[], + nResults: number, +): Promise { + const url = `${chromaUrl}${CHROMA_BASE}/${collectionId}/query`; + + const resp = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + query_embeddings: [embedding], + n_results: nResults, + include: ["documents", "metadatas", "distances"], + }), + }); + + if (!resp.ok) { + throw new Error(`ChromaDB query failed: ${resp.status} ${resp.statusText}`); + } + + const data = (await resp.json()) as { + ids: string[][]; + documents: string[][]; + metadatas: Record[][]; + distances: number[][]; + }; + + if (!data.ids?.[0]?.length) return []; + + return data.ids[0].map((id, i) => ({ + source: data.metadatas[0][i]?.source || "unknown", + text: data.documents[0][i] || "", + distance: data.distances[0][i], + // Convert cosine distance to similarity score (0-1) + score: 1 - data.distances[0][i], + metadata: data.metadatas[0][i] || {}, + })); +} + +// ============================================================================ +// Plugin +// ============================================================================ + +export default function register(api: OpenClawPluginApi) { + const cfg = parseConfig(api.pluginConfig); + + if (!cfg.collectionId) { + api.logger.warn("chromadb-memory: No collectionId configured, plugin disabled"); + return; + } + + api.logger.info( + `chromadb-memory: registered (chroma: ${cfg.chromaUrl}, ollama: ${cfg.ollamaUrl}, model: ${cfg.embeddingModel})`, + ); + + // ======================================================================== + // Tool: chromadb_search + // ======================================================================== + + api.registerTool({ + name: "chromadb_search", + description: + "Search the ChromaDB long-term memory archive. Contains indexed memory files, session transcripts, and homelab documentation. Use when you need deep historical context or can't find something in memory_search.", + parameters: { + type: "object", + properties: { + query: { type: "string", description: "Semantic search query" }, + limit: { type: "number", description: "Max results (default: 5)" }, + }, + required: ["query"], + }, + async execute(_toolCallId, params) { + const { query, limit = 5 } = params as { + query: string; + limit?: number; + }; + + try { + const embedding = await getEmbedding( + cfg.ollamaUrl, + cfg.embeddingModel, + query, + ); + const results = await queryChromaDB( + cfg.chromaUrl, + cfg.collectionId, + embedding, + limit, + ); + + if (results.length === 0) { + return { + content: [ + { type: "text", text: "No relevant results found in ChromaDB." }, + ], + }; + } + + const filtered = results.filter((r) => r.score >= cfg.minScore); + + if (filtered.length === 0) { + return { + content: [ + { + type: "text", + text: `Found ${results.length} results but none above similarity threshold (${cfg.minScore}). Best match: ${results[0].score.toFixed(3)} from ${results[0].source}`, + }, + ], + }; + } + + const text = filtered + .map( + (r, i) => + `### Result ${i + 1} — ${r.source} (${(r.score * 100).toFixed(0)}% match)\n${r.text.slice(0, 500)}${r.text.length > 500 ? "..." : ""}`, + ) + .join("\n\n"); + + return { + content: [ + { + type: "text", + text: `Found ${filtered.length} results from ChromaDB:\n\n${text}`, + }, + ], + }; + } catch (err) { + return { + content: [ + { + type: "text", + text: `ChromaDB search error: ${String(err)}`, + }, + ], + isError: true, + }; + } + }, + }); + + // ======================================================================== + // Auto-recall: inject relevant memories before each agent turn + // ======================================================================== + + if (cfg.autoRecall) { + api.on("before_agent_start", async (event: { prompt?: string }) => { + if (!event.prompt || event.prompt.length < 10) return; + + try { + const embedding = await getEmbedding( + cfg.ollamaUrl, + cfg.embeddingModel, + event.prompt, + ); + const results = await queryChromaDB( + cfg.chromaUrl, + cfg.collectionId, + embedding, + cfg.autoRecallResults, + ); + + // Filter by minimum similarity + const relevant = results.filter((r) => r.score >= cfg.minScore); + if (relevant.length === 0) return; + + const memoryContext = relevant + .map( + (r) => + `- [${r.source}] ${r.text.slice(0, 300)}${r.text.length > 300 ? "..." : ""}`, + ) + .join("\n"); + + api.logger.info( + `chromadb-memory: auto-recall injecting ${relevant.length} memories (best: ${relevant[0].score.toFixed(3)} from ${relevant[0].source})`, + ); + + return { + prependContext: `\nRelevant context from long-term memory (ChromaDB):\n${memoryContext}\n`, + }; + } catch (err) { + api.logger.warn(`chromadb-memory: auto-recall failed: ${String(err)}`); + } + }); + } + + // ======================================================================== + // Service + // ======================================================================== + + api.registerService({ + id: "chromadb-memory", + start: () => { + api.logger.info( + `chromadb-memory: service started (auto-recall: ${cfg.autoRecall}, collection: ${cfg.collectionId})`, + ); + }, + stop: () => { + api.logger.info("chromadb-memory: stopped"); + }, + }); +} diff --git a/skills/chromadb-memory/scripts/openclaw.plugin.json b/skills/chromadb-memory/scripts/openclaw.plugin.json new file mode 100644 index 0000000..b418786 --- /dev/null +++ b/skills/chromadb-memory/scripts/openclaw.plugin.json @@ -0,0 +1,61 @@ +{ + "id": "chromadb-memory", + "name": "ChromaDB Memory", + "description": "ChromaDB long-term memory with Ollama embeddings. Auto-recall injects relevant context before each turn. Manual search tool also available.", + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "chromaUrl": { + "type": "string", + "default": "http://localhost:8100" + }, + "collectionId": { + "type": "string" + }, + "ollamaUrl": { + "type": "string", + "default": "http://localhost:11434" + }, + "embeddingModel": { + "type": "string", + "default": "nomic-embed-text" + }, + "autoRecall": { + "type": "boolean", + "default": true + }, + "autoRecallResults": { + "type": "number", + "default": 3 + }, + "minScore": { + "type": "number", + "default": 0.5 + } + }, + "required": ["collectionId"] + }, + "uiHints": { + "chromaUrl": { + "label": "ChromaDB URL", + "placeholder": "http://localhost:8100" + }, + "collectionId": { + "label": "Collection ID", + "placeholder": "5a87acc5-..." + }, + "ollamaUrl": { + "label": "Ollama URL", + "placeholder": "http://localhost:11434" + }, + "embeddingModel": { + "label": "Embedding Model", + "placeholder": "nomic-embed-text" + }, + "autoRecall": { + "label": "Auto-Recall", + "help": "Automatically inject relevant ChromaDB memories into context" + } + } +} diff --git a/tools/x-feed-scraper.sh b/tools/x-feed-scraper.sh new file mode 100755 index 0000000..9230f6f --- /dev/null +++ b/tools/x-feed-scraper.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# X Feed Scraper — Automated screenshot pipeline +# Usage: ./x-feed-scraper.sh [pages] [output_dir] + +PAGES=${1:-5} +OUTPUT_DIR=${2:-/home/wdjones/.openclaw/workspace/data/x-feed} +DELAY=3 + +mkdir -p "$OUTPUT_DIR" +TIMESTAMP=$(date +%Y%m%d-%H%M%S) +RUN_DIR="$OUTPUT_DIR/$TIMESTAMP" +mkdir -p "$RUN_DIR" + +echo "=== X Feed Scraper ===" +echo "Pages: $PAGES" +echo "Output: $RUN_DIR" + +# Check if Chrome is running on X +CHROME_PID=$(pgrep -f "google-chrome.*x.com" | head -1) + +if [ -z "$CHROME_PID" ]; then + echo "Launching Chrome with X..." + DISPLAY=:0 nohup /usr/bin/google-chrome-stable --no-sandbox \ + --user-data-dir=/home/wdjones/.config/google-chrome \ + https://x.com/home > /dev/null 2>&1 & + sleep 8 + echo "Chrome launched" +else + echo "Chrome already running (PID: $CHROME_PID)" +fi + +# Click on the feed area to ensure focus +DISPLAY=:0 xdotool mousemove 960 540 click 1 +sleep 1 + +# Scroll to top first +DISPLAY=:0 xdotool key Home +sleep 2 + +echo "Starting capture..." + +for i in $(seq 1 $PAGES); do + DISPLAY=:0 import -window root "$RUN_DIR/page-$i.png" + echo "Captured page $i/$PAGES" + + if [ $i -lt $PAGES ]; then + # Scroll down + DISPLAY=:0 xdotool key Page_Down + sleep $DELAY + fi +done + +# Generate manifest +echo "{" > "$RUN_DIR/manifest.json" +echo " \"timestamp\": \"$TIMESTAMP\"," >> "$RUN_DIR/manifest.json" +echo " \"pages\": $PAGES," >> "$RUN_DIR/manifest.json" +echo " \"files\": [" >> "$RUN_DIR/manifest.json" +for i in $(seq 1 $PAGES); do + COMMA="" + [ $i -lt $PAGES ] && COMMA="," + echo " \"page-$i.png\"$COMMA" >> "$RUN_DIR/manifest.json" +done +echo " ]" >> "$RUN_DIR/manifest.json" +echo "}" >> "$RUN_DIR/manifest.json" + +echo "" +echo "=== Done ===" +echo "Captured $PAGES pages to $RUN_DIR" +echo "Run analysis with: ws x-analyze $RUN_DIR"