Files
workspace/tools/extract_turns.py

69 lines
2.5 KiB
Python

#!/usr/bin/env python3
import json
import sys
from collections import deque
def extract_last_assistant_turns(file_path, num_turns=10):
"""Extract last N assistant turns with their preceding user messages."""
messages = []
with open(file_path, 'r') as f:
for line in f:
try:
data = json.loads(line.strip())
if data.get('type') == 'message':
message_data = data.get('message', {})
role = message_data.get('role')
if role in ['user', 'assistant']:
content = message_data.get('content', [])
if isinstance(content, list) and content:
text = ''
for item in content:
if item.get('type') == 'text':
text += item.get('text', '')
elif isinstance(content, str):
text = content
else:
text = str(content)
messages.append({
'role': role,
'content': text,
'timestamp': data.get('timestamp'),
'id': data.get('id')
})
except (json.JSONDecodeError, KeyError) as e:
continue
# Find assistant turns with their preceding user messages
turns = []
for i in range(len(messages)):
if messages[i]['role'] == 'assistant':
# Look for the most recent user message before this assistant message
user_msg = None
for j in range(i-1, -1, -1):
if messages[j]['role'] == 'user':
user_msg = messages[j]['content']
break
if user_msg:
turns.append({
'user': user_msg,
'assistant': messages[i]['content'],
'agent_id': 'case',
'session': 'main'
})
# Return last N turns
return turns[-num_turns:] if len(turns) >= num_turns else turns
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python3 extract_turns.py <jsonl_file>")
sys.exit(1)
file_path = sys.argv[1]
turns = extract_last_assistant_turns(file_path)
for turn in turns:
print(json.dumps(turn))