Files
workspace/extract_assistant_turns.py

82 lines
3.3 KiB
Python

#!/usr/bin/env python3
import json
import os
import glob
import sys
from datetime import datetime
def extract_assistant_turns(sessions_dir, max_turns=10):
"""Extract the last N assistant turns from session files"""
# Find all .jsonl files and sort by modification time (newest first)
pattern = os.path.join(sessions_dir, "*.jsonl")
files = glob.glob(pattern)
files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
turns = []
for session_file in files[:5]: # Check the 5 most recent sessions
try:
with open(session_file, 'r') as f:
messages = []
for line in f:
try:
data = json.loads(line.strip())
if data.get("type") == "message" and "message" in data:
messages.append(data["message"])
except json.JSONDecodeError:
continue
# Find user-assistant pairs
for i in range(len(messages) - 1):
current_msg = messages[i]
next_msg = messages[i + 1]
if (current_msg.get("role") == "user" and
next_msg.get("role") == "assistant"):
# Extract text content from user message
user_text = ""
if current_msg.get("content"):
for content in current_msg["content"]:
if content.get("type") == "text":
user_text += content.get("text", "")
# Extract text content from assistant message
assistant_text = ""
if next_msg.get("content"):
for content in next_msg["content"]:
if content.get("type") == "text":
assistant_text += content.get("text", "")
# Skip if either is empty or if it's a cron message
if (user_text and assistant_text and
not user_text.startswith("[cron:") and
len(assistant_text) > 10): # Filter out very short responses
turns.append({
"user": user_text.strip(),
"assistant": assistant_text.strip(),
"agent_id": "case",
"session": "main"
})
if len(turns) >= max_turns:
return turns
except Exception as e:
print(f"Error processing {session_file}: {e}", file=sys.stderr)
continue
return turns
if __name__ == "__main__":
sessions_dir = "/home/wdjones/.openclaw/agents/main/sessions/"
turns = extract_assistant_turns(sessions_dir, max_turns=10)
print(f"Extracted {len(turns)} assistant turns")
for turn in turns:
# Output each turn as JSON for the auto-memory hook
print(json.dumps(turn))