#!/usr/bin/env python3 import json import sys import os import glob from pathlib import Path def parse_jsonl_file(file_path): """Parse a JSONL session file and extract user/assistant message pairs""" pairs = [] try: with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() # Parse each line as JSON messages = [] for line in lines: if line.strip(): try: data = json.loads(line.strip()) if data.get("type") == "message": messages.append(data) except json.JSONDecodeError: continue # Extract user/assistant pairs i = 0 while i < len(messages): msg = messages[i] role = msg.get("message", {}).get("role") if role == "user": user_content = extract_text_content(msg["message"]["content"]) # Look for the next assistant response if i + 1 < len(messages): next_msg = messages[i + 1] if next_msg.get("message", {}).get("role") == "assistant": assistant_content = extract_text_content(next_msg["message"]["content"]) # Skip auto-memory indexer cron jobs if not user_content.startswith("[cron:") or "auto-memory-indexer" not in user_content: pairs.append({ "user": user_content, "assistant": assistant_content, "agent_id": "case", "session": "main" }) i += 2 # Skip both messages else: i += 1 else: i += 1 else: i += 1 except Exception as e: print(f"Error processing {file_path}: {e}", file=sys.stderr) return pairs def extract_text_content(content_array): """Extract text from content array, skipping thinking blocks""" text_parts = [] for item in content_array: if item.get("type") == "text": text_parts.append(item.get("text", "")) elif item.get("type") == "toolCall": # Include tool calls in a simplified format tool_name = item.get("name", "unknown") text_parts.append(f"[Used tool: {tool_name}]") return " ".join(text_parts).strip() def find_session_files(sessions_dir): """Find all session files ordered by modification time""" pattern = os.path.join(sessions_dir, "*.jsonl") files = glob.glob(pattern) # Sort by modification time, newest first return sorted(files, key=lambda x: os.path.getmtime(x), reverse=True) def main(): sessions_dir = "/home/wdjones/.openclaw/agents/main/sessions/" # Find all session files session_files = find_session_files(sessions_dir) all_pairs = [] # Process session files until we have at least 10 assistant turns for session_file in session_files: pairs = parse_jsonl_file(session_file) all_pairs.extend(pairs) # Stop when we have enough pairs if len(all_pairs) >= 10: break # Take the last 10 pairs last_10_pairs = all_pairs[-10:] # Output each pair as JSON to stdout for pair in last_10_pairs: print(json.dumps(pair)) if __name__ == "__main__": main()