Full sync - all projects, memory, configs
This commit is contained in:
69
tools/extract_turns.py
Normal file
69
tools/extract_turns.py
Normal file
@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import sys
|
||||
from collections import deque
|
||||
|
||||
def extract_last_assistant_turns(file_path, num_turns=10):
|
||||
"""Extract last N assistant turns with their preceding user messages."""
|
||||
messages = []
|
||||
|
||||
with open(file_path, 'r') as f:
|
||||
for line in f:
|
||||
try:
|
||||
data = json.loads(line.strip())
|
||||
if data.get('type') == 'message':
|
||||
message_data = data.get('message', {})
|
||||
role = message_data.get('role')
|
||||
if role in ['user', 'assistant']:
|
||||
content = message_data.get('content', [])
|
||||
if isinstance(content, list) and content:
|
||||
text = ''
|
||||
for item in content:
|
||||
if item.get('type') == 'text':
|
||||
text += item.get('text', '')
|
||||
elif isinstance(content, str):
|
||||
text = content
|
||||
else:
|
||||
text = str(content)
|
||||
|
||||
messages.append({
|
||||
'role': role,
|
||||
'content': text,
|
||||
'timestamp': data.get('timestamp'),
|
||||
'id': data.get('id')
|
||||
})
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
continue
|
||||
|
||||
# Find assistant turns with their preceding user messages
|
||||
turns = []
|
||||
for i in range(len(messages)):
|
||||
if messages[i]['role'] == 'assistant':
|
||||
# Look for the most recent user message before this assistant message
|
||||
user_msg = None
|
||||
for j in range(i-1, -1, -1):
|
||||
if messages[j]['role'] == 'user':
|
||||
user_msg = messages[j]['content']
|
||||
break
|
||||
|
||||
if user_msg:
|
||||
turns.append({
|
||||
'user': user_msg,
|
||||
'assistant': messages[i]['content'],
|
||||
'agent_id': 'case',
|
||||
'session': 'main'
|
||||
})
|
||||
|
||||
# Return last N turns
|
||||
return turns[-num_turns:] if len(turns) >= num_turns else turns
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python3 extract_turns.py <jsonl_file>")
|
||||
sys.exit(1)
|
||||
|
||||
file_path = sys.argv[1]
|
||||
turns = extract_last_assistant_turns(file_path)
|
||||
|
||||
for turn in turns:
|
||||
print(json.dumps(turn))
|
||||
Reference in New Issue
Block a user