#!/usr/bin/env python3 import json import os import glob import subprocess import sys def clean_text(text): """Clean and prepare text for JSON encoding""" if not text: return "" # Remove any problematic characters and normalize text = text.replace('\n', ' ') # Replace newlines with spaces text = text.replace('\r', ' ') # Replace carriage returns text = text.replace('\t', ' ') # Replace tabs # Remove any escape sequences that might cause issues text = ' '.join(text.split()) # Normalize whitespace return text def extract_pairs_from_session(file_path): """Extract user-assistant pairs from a session file""" pairs = [] try: with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() messages = [] for line in lines: try: data = json.loads(line.strip()) if data.get('type') == 'message': messages.append(data) except json.JSONDecodeError: continue # Extract user-assistant pairs for i in range(len(messages) - 1): if (messages[i]['message']['role'] == 'user' and i + 1 < len(messages) and messages[i + 1]['message']['role'] == 'assistant'): user_content = messages[i]['message']['content'] assistant_content = messages[i + 1]['message']['content'] # Extract text content user_text = '' assistant_text = '' if isinstance(user_content, list): for item in user_content: if item.get('type') == 'text': user_text += item.get('text', '') elif isinstance(user_content, str): user_text = user_content if isinstance(assistant_content, list): for item in assistant_content: if item.get('type') == 'text': assistant_text += item.get('text', '') elif isinstance(assistant_content, str): assistant_text = assistant_content # Clean the texts user_text = clean_text(user_text) assistant_text = clean_text(assistant_text) if user_text and assistant_text: pairs.append({ 'user': user_text, 'assistant': assistant_text, 'agent_id': 'case', 'session': 'main', 'timestamp': messages[i + 1]['timestamp'] }) except Exception as e: print(f"Error processing {file_path}: {e}", file=sys.stderr) return pairs def main(): sessions_dir = "/home/wdjones/.openclaw/agents/main/sessions/" # Get recent session files (sorted by modification time) cmd = f"find {sessions_dir} -name '*.jsonl' -type f -printf '%T@ %p\\n' | sort -rn | head -10" result = subprocess.run(cmd, shell=True, capture_output=True, text=True) if result.returncode != 0: print("Error finding session files", file=sys.stderr) return all_pairs = [] for line in result.stdout.strip().split('\n'): if line: parts = line.split(' ', 1) if len(parts) == 2: file_path = parts[1] pairs = extract_pairs_from_session(file_path) all_pairs.extend(pairs) # Sort by timestamp and take last 10 all_pairs.sort(key=lambda x: x['timestamp']) last_10_pairs = all_pairs[-10:] print(f"Extracted {len(last_10_pairs)} assistant turns from recent sessions", file=sys.stderr) # Output JSON for each pair for pair in last_10_pairs: # Remove timestamp for final output output_pair = {k: v for k, v in pair.items() if k != 'timestamp'} print(json.dumps(output_pair, ensure_ascii=True)) if __name__ == "__main__": main()