85 lines
2.9 KiB
Python
85 lines
2.9 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import json
|
|
import sys
|
|
import re
|
|
from pathlib import Path
|
|
|
|
def clean_content(text):
|
|
"""Clean and escape content for JSON"""
|
|
if not text:
|
|
return ""
|
|
|
|
# Remove control characters and Unicode escapes
|
|
text = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', text)
|
|
text = re.sub(r'\\u[0-9a-fA-F]{4}', ' ', text)
|
|
text = re.sub(r'\s+', ' ', text)
|
|
|
|
# Basic cleanup
|
|
text = text.strip()
|
|
return text
|
|
|
|
def extract_last_assistant_turns(session_file, count=10):
|
|
"""Extract the last N assistant turns with their preceding user messages"""
|
|
|
|
with open(session_file, 'r') as f:
|
|
lines = f.readlines()
|
|
|
|
# Parse JSONL and extract message entries
|
|
messages = []
|
|
for line in lines:
|
|
try:
|
|
data = json.loads(line.strip())
|
|
if data.get('type') == 'message' and 'message' in data:
|
|
msg = data['message']
|
|
content = ""
|
|
if 'content' in msg:
|
|
# Extract text content from content array
|
|
for item in msg['content']:
|
|
if isinstance(item, dict) and item.get('type') == 'text':
|
|
content += item.get('text', '')
|
|
|
|
# Skip system messages and very short content
|
|
content = clean_content(content)
|
|
if len(content) < 10:
|
|
continue
|
|
|
|
if content.startswith('System:') or content.startswith('[System'):
|
|
continue
|
|
|
|
messages.append({
|
|
'role': msg['role'],
|
|
'content': content,
|
|
'timestamp': data['timestamp']
|
|
})
|
|
except (json.JSONDecodeError, KeyError):
|
|
continue
|
|
|
|
# Find last N assistant turns with their preceding user messages
|
|
assistant_turns = []
|
|
user_msg = ""
|
|
|
|
for i, msg in enumerate(messages):
|
|
if msg['role'] == 'user':
|
|
user_msg = msg['content']
|
|
elif msg['role'] == 'assistant' and user_msg:
|
|
# Skip if either message is too short or contains problematic content
|
|
if len(user_msg) > 5 and len(msg['content']) > 10:
|
|
assistant_turns.append({
|
|
'user': user_msg[:500], # Truncate long messages
|
|
'assistant': msg['content'][:1000],
|
|
'agent_id': 'case',
|
|
'session': 'main'
|
|
})
|
|
user_msg = "" # Reset for next turn
|
|
|
|
return assistant_turns[-count:]
|
|
|
|
if __name__ == "__main__":
|
|
session_file = sys.argv[1] if len(sys.argv) > 1 else "/home/wdjones/.openclaw/agents/main/sessions/2c022034-fccc-4c2c-b8f0-dce45ad22e68.jsonl"
|
|
|
|
turns = extract_last_assistant_turns(session_file)
|
|
|
|
# Send each turn to the auto-memory hook
|
|
for turn in turns:
|
|
print(json.dumps(turn, ensure_ascii=True)) |