Full sync - all projects, memory, configs
This commit is contained in:
85
extract-turns-fixed.py
Normal file
85
extract-turns-fixed.py
Normal file
@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import sys
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
def clean_content(text):
|
||||
"""Clean and escape content for JSON"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# Remove control characters and Unicode escapes
|
||||
text = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', text)
|
||||
text = re.sub(r'\\u[0-9a-fA-F]{4}', ' ', text)
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
|
||||
# Basic cleanup
|
||||
text = text.strip()
|
||||
return text
|
||||
|
||||
def extract_last_assistant_turns(session_file, count=10):
|
||||
"""Extract the last N assistant turns with their preceding user messages"""
|
||||
|
||||
with open(session_file, 'r') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# Parse JSONL and extract message entries
|
||||
messages = []
|
||||
for line in lines:
|
||||
try:
|
||||
data = json.loads(line.strip())
|
||||
if data.get('type') == 'message' and 'message' in data:
|
||||
msg = data['message']
|
||||
content = ""
|
||||
if 'content' in msg:
|
||||
# Extract text content from content array
|
||||
for item in msg['content']:
|
||||
if isinstance(item, dict) and item.get('type') == 'text':
|
||||
content += item.get('text', '')
|
||||
|
||||
# Skip system messages and very short content
|
||||
content = clean_content(content)
|
||||
if len(content) < 10:
|
||||
continue
|
||||
|
||||
if content.startswith('System:') or content.startswith('[System'):
|
||||
continue
|
||||
|
||||
messages.append({
|
||||
'role': msg['role'],
|
||||
'content': content,
|
||||
'timestamp': data['timestamp']
|
||||
})
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
continue
|
||||
|
||||
# Find last N assistant turns with their preceding user messages
|
||||
assistant_turns = []
|
||||
user_msg = ""
|
||||
|
||||
for i, msg in enumerate(messages):
|
||||
if msg['role'] == 'user':
|
||||
user_msg = msg['content']
|
||||
elif msg['role'] == 'assistant' and user_msg:
|
||||
# Skip if either message is too short or contains problematic content
|
||||
if len(user_msg) > 5 and len(msg['content']) > 10:
|
||||
assistant_turns.append({
|
||||
'user': user_msg[:500], # Truncate long messages
|
||||
'assistant': msg['content'][:1000],
|
||||
'agent_id': 'case',
|
||||
'session': 'main'
|
||||
})
|
||||
user_msg = "" # Reset for next turn
|
||||
|
||||
return assistant_turns[-count:]
|
||||
|
||||
if __name__ == "__main__":
|
||||
session_file = sys.argv[1] if len(sys.argv) > 1 else "/home/wdjones/.openclaw/agents/main/sessions/2c022034-fccc-4c2c-b8f0-dce45ad22e68.jsonl"
|
||||
|
||||
turns = extract_last_assistant_turns(session_file)
|
||||
|
||||
# Send each turn to the auto-memory hook
|
||||
for turn in turns:
|
||||
print(json.dumps(turn, ensure_ascii=True))
|
||||
Reference in New Issue
Block a user