125 lines
4.8 KiB
Python
125 lines
4.8 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
import glob
|
|
from datetime import datetime
|
|
|
|
def extract_conversations_from_sessions():
|
|
"""Extract the last 10 assistant turns from recent session files."""
|
|
|
|
sessions_dir = "/home/wdjones/.openclaw/agents/main/sessions/"
|
|
session_files = glob.glob(os.path.join(sessions_dir, "*.jsonl"))
|
|
|
|
# Filter out deleted files and sort by modification time (newest first)
|
|
session_files = [f for f in session_files if ".deleted." not in f]
|
|
session_files.sort(key=os.path.getmtime, reverse=True)
|
|
|
|
all_turns = []
|
|
|
|
print(f"Processing {len(session_files)} session files...")
|
|
|
|
for session_file in session_files:
|
|
try:
|
|
messages = []
|
|
with open(session_file, 'r') as f:
|
|
for line in f:
|
|
try:
|
|
data = json.loads(line)
|
|
if data.get('type') == 'message' and data.get('message', {}).get('role') in ['user', 'assistant']:
|
|
messages.append(data)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
# Extract user-assistant pairs (turns)
|
|
i = 0
|
|
while i < len(messages):
|
|
if (messages[i]['message']['role'] == 'user' and
|
|
i + 1 < len(messages) and
|
|
messages[i+1]['message']['role'] == 'assistant'):
|
|
|
|
user_content = messages[i]['message']['content']
|
|
assistant_content = messages[i+1]['message']['content']
|
|
|
|
# Extract text from content arrays
|
|
user_text = ''
|
|
if isinstance(user_content, list):
|
|
for item in user_content:
|
|
if item.get('type') == 'text':
|
|
user_text += item.get('text', '')
|
|
else:
|
|
user_text = str(user_content)
|
|
|
|
assistant_text = ''
|
|
if isinstance(assistant_content, list):
|
|
for item in assistant_content:
|
|
if item.get('type') == 'text':
|
|
assistant_text += item.get('text', '')
|
|
else:
|
|
assistant_text = str(assistant_content)
|
|
|
|
# Filter out system messages and cron jobs
|
|
if (user_text.strip() and assistant_text.strip() and
|
|
not user_text.startswith('[cron:') and
|
|
not user_text.startswith('[System Message]') and
|
|
len(user_text.strip()) > 10 and # Substantial content
|
|
len(assistant_text.strip()) > 10):
|
|
|
|
turn = {
|
|
'user': user_text.strip(),
|
|
'assistant': assistant_text.strip(),
|
|
'agent_id': 'case',
|
|
'session': 'main'
|
|
}
|
|
all_turns.append(turn)
|
|
|
|
i += 2
|
|
else:
|
|
i += 1
|
|
|
|
except Exception as e:
|
|
print(f"Error processing {session_file}: {e}")
|
|
continue
|
|
|
|
print(f"Found {len(all_turns)} total conversation turns")
|
|
|
|
# Get last 10 turns
|
|
last_turns = all_turns[-10:] if all_turns else []
|
|
|
|
print(f"Processing last {len(last_turns)} turns")
|
|
|
|
# Send each turn to the auto-memory hook
|
|
processed_count = 0
|
|
for i, turn in enumerate(last_turns):
|
|
try:
|
|
# Run the auto-memory hook
|
|
proc = subprocess.Popen(
|
|
['python3', '/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py'],
|
|
stdin=subprocess.PIPE,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True
|
|
)
|
|
stdout, stderr = proc.communicate(input=json.dumps(turn))
|
|
|
|
if proc.returncode == 0:
|
|
processed_count += 1
|
|
print(f"✓ Turn {i+1}: Processed successfully")
|
|
if stdout.strip():
|
|
print(f" Output: {stdout.strip()}")
|
|
else:
|
|
print(f"✗ Turn {i+1}: Failed (exit code {proc.returncode})")
|
|
if stderr.strip():
|
|
print(f" Error: {stderr.strip()}")
|
|
|
|
except Exception as e:
|
|
print(f"✗ Turn {i+1}: Exception: {e}")
|
|
|
|
print(f"\nAuto-memory indexing complete: {processed_count}/{len(last_turns)} turns processed successfully")
|
|
|
|
return processed_count
|
|
|
|
if __name__ == "__main__":
|
|
extract_conversations_from_sessions() |