workspace/extract-assistant-turns.py

#!/usr/bin/env python3
"""
Extract the last N assistant turns from a session JSONL file and format them for auto-memory-hook.py
"""
import json
import sys
import subprocess
from datetime import datetime

def extract_assistant_turns(jsonl_file, limit=10):
    """Extract assistant turns with their preceding user messages"""

    turns = []

    try:
        with open(jsonl_file, 'r') as f:
            lines = f.readlines()

        messages = []
        for line in lines:
            try:
                data = json.loads(line.strip())
                if data.get('type') == 'message' and 'message' in data:
                    messages.append(data['message'])
            except json.JSONDecodeError:
                continue

        # Find user messages and collect all subsequent assistant responses
        last_user_message = None

        for msg in messages:
            if msg.get('role') == 'user':
                last_user_message = msg
            elif msg.get('role') == 'assistant' and last_user_message:
                # Extract user text
                user_text = ""
                if last_user_message.get('content'):
                    for content in last_user_message['content']:
                        if content.get('type') == 'text':
                            user_text += content.get('text', '')

                # Extract assistant text
                assistant_text = ""
                if msg.get('content'):
                    for content in msg['content']:
                        if content.get('type') == 'text':
                            assistant_text += content.get('text', '')

                if user_text.strip() and assistant_text.strip():
                    turns.append({
                        'user': user_text.strip(),
                        'assistant': assistant_text.strip(),
                        'agent_id': 'case',
                        'session': 'main'
                    })
                    # Don't reset last_user_message to allow multiple assistant responses

        # Return last N turns
        return turns[-limit:] if len(turns) > limit else turns

    except Exception as e:
        print(f"Error processing file: {e}", file=sys.stderr)
        return []

def main():
    if len(sys.argv) < 2:
        print("Usage: python3 extract-assistant-turns.py <session_file.jsonl> [limit]")
        sys.exit(1)

    jsonl_file = sys.argv[1]
    limit = int(sys.argv[2]) if len(sys.argv) > 2 else 10

    turns = extract_assistant_turns(jsonl_file, limit)

    if not turns:
        print(f"No assistant turns found in {jsonl_file}")
        sys.exit(1)

    print(f"Found {len(turns)} assistant turns, processing through auto-memory-hook.py...")

    # Pipe each turn to auto-memory-hook.py
    for turn in turns:
        try:
            result = subprocess.run([
                'python3', '/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py'
            ], input=json.dumps(turn), text=True, capture_output=True)

            if result.returncode != 0:
                print(f"Error processing turn: {result.stderr}", file=sys.stderr)
            else:
                print(f"✓ Processed turn: {turn['user'][:50]}...")

        except Exception as e:
            print(f"Error running auto-memory-hook.py: {e}", file=sys.stderr)

    print(f"Completed processing {len(turns)} turns")

if __name__ == '__main__':
    main()