#!/usr/bin/env python3 import json import sys import subprocess import argparse from pathlib import Path def extract_message_pairs(jsonl_file): """Extract user/assistant message pairs from a JSONL session file.""" messages = [] with open(jsonl_file, 'r') as f: for line in f: if not line.strip(): continue try: data = json.loads(line) if data.get('type') == 'message' and 'message' in data: msg = data['message'] role = msg.get('role') if role in ['user', 'assistant']: content = msg.get('content', '') # Handle content as list of objects (new format) if isinstance(content, list): text_content = [] for item in content: if isinstance(item, dict) and item.get('type') == 'text': text_content.append(item.get('text', '')) content = '\n'.join(text_content) elif isinstance(content, str): # Handle content as simple string content = content else: content = str(content) messages.append({ 'role': role, 'content': content, 'timestamp': data.get('timestamp', '') }) except json.JSONDecodeError: continue # Now pair user messages with following assistant messages pairs = [] i = 0 while i < len(messages): if messages[i]['role'] == 'user': user_msg = messages[i] # Look for the next assistant message for j in range(i + 1, min(i + 10, len(messages))): # Look within next 10 messages if messages[j]['role'] == 'assistant': assistant_msg = messages[j] pairs.append({ 'user': user_msg['content'], 'assistant': assistant_msg['content'], 'user_timestamp': user_msg['timestamp'], 'assistant_timestamp': assistant_msg['timestamp'] }) i = j # Continue from the assistant message break else: i += 1 # No assistant response found, continue else: i += 1 return pairs def send_to_memory_hook(pair_data, hook_script): """Send a user/assistant pair to the auto-memory hook script.""" json_input = json.dumps({ "user": pair_data["user"], "assistant": pair_data["assistant"], "agent_id": "case", "session": "main" }) try: result = subprocess.run( ['python3', hook_script], input=json_input, text=True, capture_output=True, timeout=30 ) return result.returncode == 0 except subprocess.TimeoutExpired: print("Warning: Memory hook script timed out") return False except Exception as e: print(f"Error running memory hook script: {e}") return False def main(): parser = argparse.ArgumentParser(description='Process session transcript for auto-memory indexing') parser.add_argument('--session-file', required=True, help='Path to session JSONL file') parser.add_argument('--memory-hook', default='/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py', help='Path to auto-memory-hook.py script') parser.add_argument('--num-turns', type=int, default=10, help='Number of assistant turns to process') args = parser.parse_args() if not Path(args.session_file).exists(): print(f"Error: Session file {args.session_file} does not exist") return 1 if not Path(args.memory_hook).exists(): print(f"Error: Memory hook script {args.memory_hook} does not exist") return 1 print(f"Processing session file: {args.session_file}") print(f"Memory hook script: {args.memory_hook}") # Extract message pairs pairs = extract_message_pairs(args.session_file) if not pairs: print("No user/assistant message pairs found") return 0 # Take the last N turns recent_pairs = pairs[-args.num_turns:] print(f"Found {len(pairs)} total pairs, processing last {len(recent_pairs)} turns") success_count = 0 for i, pair in enumerate(recent_pairs): print(f"Processing turn {i+1}/{len(recent_pairs)}") if send_to_memory_hook(pair, args.memory_hook): success_count += 1 else: print(f"Failed to process turn {i+1}") print(f"Successfully processed {success_count}/{len(recent_pairs)} turns") return 0 if __name__ == '__main__': sys.exit(main())