workspace/auto_memory_processor.py

#!/usr/bin/env python3

import json
import sys
import subprocess
import argparse
from pathlib import Path

def extract_message_pairs(jsonl_file):
    """Extract user/assistant message pairs from a JSONL session file."""
    messages = []

    with open(jsonl_file, 'r') as f:
        for line in f:
            if not line.strip():
                continue
            try:
                data = json.loads(line)
                if data.get('type') == 'message' and 'message' in data:
                    msg = data['message']
                    role = msg.get('role')
                    if role in ['user', 'assistant']:
                        content = msg.get('content', '')
                        # Handle content as list of objects (new format)
                        if isinstance(content, list):
                            text_content = []
                            for item in content:
                                if isinstance(item, dict) and item.get('type') == 'text':
                                    text_content.append(item.get('text', ''))
                            content = '\n'.join(text_content)
                        elif isinstance(content, str):
                            # Handle content as simple string
                            content = content
                        else:
                            content = str(content)

                        messages.append({
                            'role': role,
                            'content': content,
                            'timestamp': data.get('timestamp', '')
                        })
            except json.JSONDecodeError:
                continue

    # Now pair user messages with following assistant messages
    pairs = []
    i = 0
    while i < len(messages):
        if messages[i]['role'] == 'user':
            user_msg = messages[i]
            # Look for the next assistant message
            for j in range(i + 1, min(i + 10, len(messages))):  # Look within next 10 messages
                if messages[j]['role'] == 'assistant':
                    assistant_msg = messages[j]
                    pairs.append({
                        'user': user_msg['content'],
                        'assistant': assistant_msg['content'],
                        'user_timestamp': user_msg['timestamp'],
                        'assistant_timestamp': assistant_msg['timestamp']
                    })
                    i = j  # Continue from the assistant message
                    break
            else:
                i += 1  # No assistant response found, continue
        else:
            i += 1

    return pairs

def send_to_memory_hook(pair_data, hook_script):
    """Send a user/assistant pair to the auto-memory hook script."""
    json_input = json.dumps({
        "user": pair_data["user"],
        "assistant": pair_data["assistant"],
        "agent_id": "case",
        "session": "main"
    })

    try:
        result = subprocess.run(
            ['python3', hook_script],
            input=json_input,
            text=True,
            capture_output=True,
            timeout=30
        )
        return result.returncode == 0
    except subprocess.TimeoutExpired:
        print("Warning: Memory hook script timed out")
        return False
    except Exception as e:
        print(f"Error running memory hook script: {e}")
        return False

def main():
    parser = argparse.ArgumentParser(description='Process session transcript for auto-memory indexing')
    parser.add_argument('--session-file', required=True, help='Path to session JSONL file')
    parser.add_argument('--memory-hook', default='/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py',
                       help='Path to auto-memory-hook.py script')
    parser.add_argument('--num-turns', type=int, default=10, help='Number of assistant turns to process')

    args = parser.parse_args()

    if not Path(args.session_file).exists():
        print(f"Error: Session file {args.session_file} does not exist")
        return 1

    if not Path(args.memory_hook).exists():
        print(f"Error: Memory hook script {args.memory_hook} does not exist")
        return 1

    print(f"Processing session file: {args.session_file}")
    print(f"Memory hook script: {args.memory_hook}")

    # Extract message pairs
    pairs = extract_message_pairs(args.session_file)

    if not pairs:
        print("No user/assistant message pairs found")
        return 0

    # Take the last N turns
    recent_pairs = pairs[-args.num_turns:]
    print(f"Found {len(pairs)} total pairs, processing last {len(recent_pairs)} turns")

    success_count = 0
    for i, pair in enumerate(recent_pairs):
        print(f"Processing turn {i+1}/{len(recent_pairs)}")
        if send_to_memory_hook(pair, args.memory_hook):
            success_count += 1
        else:
            print(f"Failed to process turn {i+1}")

    print(f"Successfully processed {success_count}/{len(recent_pairs)} turns")
    return 0

if __name__ == '__main__':
    sys.exit(main())