workspace/tools/extract-memory-turns.py

#!/usr/bin/env python3

import json
import sys
import subprocess
import os
import glob
from datetime import datetime

def extract_conversations_from_file(filepath):
    """Extract conversation pairs from a single JSONL session file."""
    pairs = []
    messages = []

    try:
        with open(filepath, 'r') as f:
            for line in f:
                try:
                    data = json.loads(line.strip())
                    if data.get('type') == 'message' and 'message' in data:
                        messages.append(data)
                except:
                    continue
    except:
        return pairs

    # Extract conversation pairs (user + assistant)
    user_msg = None
    for msg in messages:
        role = msg['message']['role']
        if role == 'user':
            user_msg = msg
        elif role == 'assistant' and user_msg:
            # Extract text content from both messages
            user_content = ''
            for content in user_msg['message']['content']:
                if content['type'] == 'text':
                    user_content += content['text']

            assistant_content = ''
            for content in msg['message']['content']:
                if content['type'] == 'text':
                    assistant_content += content['text']

            # Skip cron job messages
            if not user_content.startswith('[cron:') and user_content.strip() and assistant_content.strip():
                pairs.append({
                    'user': user_content.strip(),
                    'assistant': assistant_content.strip(),
                    'agent_id': 'case',
                    'session': 'main',
                    'timestamp': msg.get('timestamp', '')
                })
            user_msg = None

    return pairs

def main():
    sessions_dir = '/home/wdjones/.openclaw/agents/main/sessions/'

    # Get all session files sorted by modification time (newest first)
    session_files = []
    for filepath in glob.glob(os.path.join(sessions_dir, '*.jsonl')):
        mtime = os.path.getmtime(filepath)
        session_files.append((mtime, filepath))

    session_files.sort(reverse=True)

    # Extract conversations from recent files until we have enough
    all_pairs = []
    for mtime, filepath in session_files[:10]:  # Check last 10 session files
        pairs = extract_conversations_from_file(filepath)
        all_pairs.extend(pairs)
        print(f"Found {len(pairs)} pairs in {os.path.basename(filepath)}")

        if len(all_pairs) >= 10:
            break

    # Get last 10 pairs
    last_pairs = all_pairs[-10:] if len(all_pairs) >= 10 else all_pairs

    print(f'Total pairs found: {len(all_pairs)}, processing last {len(last_pairs)}')

    # Process each pair
    for i, pair in enumerate(last_pairs):
        try:
            # Pipe to auto-memory-hook.py
            proc = subprocess.Popen(['python3', '/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py'],
                                  stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
            stdout, stderr = proc.communicate(json.dumps(pair))
            print(f'Pair {i+1}: Processed (exit code: {proc.returncode})')
            if stderr:
                print(f'  stderr: {stderr.strip()}')
        except Exception as e:
            print(f'Pair {i+1}: Error - {e}')

if __name__ == '__main__':
    main()