workspace/tools/extract-and-index-memory.py

#!/usr/bin/env python3

import json
import subprocess
import sys
import os
import glob
from datetime import datetime

def extract_conversations_from_sessions():
    """Extract the last 10 assistant turns from recent session files."""

    sessions_dir = "/home/wdjones/.openclaw/agents/main/sessions/"
    session_files = glob.glob(os.path.join(sessions_dir, "*.jsonl"))

    # Filter out deleted files and sort by modification time (newest first)
    session_files = [f for f in session_files if ".deleted." not in f]
    session_files.sort(key=os.path.getmtime, reverse=True)

    all_turns = []

    print(f"Processing {len(session_files)} session files...")

    for session_file in session_files:
        try:
            messages = []
            with open(session_file, 'r') as f:
                for line in f:
                    try:
                        data = json.loads(line)
                        if data.get('type') == 'message' and data.get('message', {}).get('role') in ['user', 'assistant']:
                            messages.append(data)
                    except json.JSONDecodeError:
                        continue

            # Extract user-assistant pairs (turns)
            i = 0
            while i < len(messages):
                if (messages[i]['message']['role'] == 'user' and
                    i + 1 < len(messages) and
                    messages[i+1]['message']['role'] == 'assistant'):

                    user_content = messages[i]['message']['content']
                    assistant_content = messages[i+1]['message']['content']

                    # Extract text from content arrays
                    user_text = ''
                    if isinstance(user_content, list):
                        for item in user_content:
                            if item.get('type') == 'text':
                                user_text += item.get('text', '')
                    else:
                        user_text = str(user_content)

                    assistant_text = ''
                    if isinstance(assistant_content, list):
                        for item in assistant_content:
                            if item.get('type') == 'text':
                                assistant_text += item.get('text', '')
                    else:
                        assistant_text = str(assistant_content)

                    # Filter out system messages and cron jobs
                    if (user_text.strip() and assistant_text.strip() and
                        not user_text.startswith('[cron:') and
                        not user_text.startswith('[System Message]') and
                        len(user_text.strip()) > 10 and  # Substantial content
                        len(assistant_text.strip()) > 10):

                        turn = {
                            'user': user_text.strip(),
                            'assistant': assistant_text.strip(),
                            'agent_id': 'case',
                            'session': 'main'
                        }
                        all_turns.append(turn)

                    i += 2
                else:
                    i += 1

        except Exception as e:
            print(f"Error processing {session_file}: {e}")
            continue

    print(f"Found {len(all_turns)} total conversation turns")

    # Get last 10 turns
    last_turns = all_turns[-10:] if all_turns else []

    print(f"Processing last {len(last_turns)} turns")

    # Send each turn to the auto-memory hook
    processed_count = 0
    for i, turn in enumerate(last_turns):
        try:
            # Run the auto-memory hook
            proc = subprocess.Popen(
                ['python3', '/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py'],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True
            )
            stdout, stderr = proc.communicate(input=json.dumps(turn))

            if proc.returncode == 0:
                processed_count += 1
                print(f"✓ Turn {i+1}: Processed successfully")
                if stdout.strip():
                    print(f"  Output: {stdout.strip()}")
            else:
                print(f"✗ Turn {i+1}: Failed (exit code {proc.returncode})")
                if stderr.strip():
                    print(f"  Error: {stderr.strip()}")

        except Exception as e:
            print(f"✗ Turn {i+1}: Exception: {e}")

    print(f"\nAuto-memory indexing complete: {processed_count}/{len(last_turns)} turns processed successfully")

    return processed_count

if __name__ == "__main__":
    extract_conversations_from_sessions()