Files
workspace/extract-turns-fixed.py

85 lines
2.9 KiB
Python

#!/usr/bin/env python3
import json
import sys
import re
from pathlib import Path
def clean_content(text):
"""Clean and escape content for JSON"""
if not text:
return ""
# Remove control characters and Unicode escapes
text = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', text)
text = re.sub(r'\\u[0-9a-fA-F]{4}', ' ', text)
text = re.sub(r'\s+', ' ', text)
# Basic cleanup
text = text.strip()
return text
def extract_last_assistant_turns(session_file, count=10):
"""Extract the last N assistant turns with their preceding user messages"""
with open(session_file, 'r') as f:
lines = f.readlines()
# Parse JSONL and extract message entries
messages = []
for line in lines:
try:
data = json.loads(line.strip())
if data.get('type') == 'message' and 'message' in data:
msg = data['message']
content = ""
if 'content' in msg:
# Extract text content from content array
for item in msg['content']:
if isinstance(item, dict) and item.get('type') == 'text':
content += item.get('text', '')
# Skip system messages and very short content
content = clean_content(content)
if len(content) < 10:
continue
if content.startswith('System:') or content.startswith('[System'):
continue
messages.append({
'role': msg['role'],
'content': content,
'timestamp': data['timestamp']
})
except (json.JSONDecodeError, KeyError):
continue
# Find last N assistant turns with their preceding user messages
assistant_turns = []
user_msg = ""
for i, msg in enumerate(messages):
if msg['role'] == 'user':
user_msg = msg['content']
elif msg['role'] == 'assistant' and user_msg:
# Skip if either message is too short or contains problematic content
if len(user_msg) > 5 and len(msg['content']) > 10:
assistant_turns.append({
'user': user_msg[:500], # Truncate long messages
'assistant': msg['content'][:1000],
'agent_id': 'case',
'session': 'main'
})
user_msg = "" # Reset for next turn
return assistant_turns[-count:]
if __name__ == "__main__":
session_file = sys.argv[1] if len(sys.argv) > 1 else "/home/wdjones/.openclaw/agents/main/sessions/2c022034-fccc-4c2c-b8f0-dce45ad22e68.jsonl"
turns = extract_last_assistant_turns(session_file)
# Send each turn to the auto-memory hook
for turn in turns:
print(json.dumps(turn, ensure_ascii=True))