Full sync - all projects, memory, configs
This commit is contained in:
98
tools/extract-memory-turns.py
Normal file
98
tools/extract-memory-turns.py
Normal file
@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import sys
|
||||
import subprocess
|
||||
import os
|
||||
import glob
|
||||
from datetime import datetime
|
||||
|
||||
def extract_conversations_from_file(filepath):
|
||||
"""Extract conversation pairs from a single JSONL session file."""
|
||||
pairs = []
|
||||
messages = []
|
||||
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
for line in f:
|
||||
try:
|
||||
data = json.loads(line.strip())
|
||||
if data.get('type') == 'message' and 'message' in data:
|
||||
messages.append(data)
|
||||
except:
|
||||
continue
|
||||
except:
|
||||
return pairs
|
||||
|
||||
# Extract conversation pairs (user + assistant)
|
||||
user_msg = None
|
||||
for msg in messages:
|
||||
role = msg['message']['role']
|
||||
if role == 'user':
|
||||
user_msg = msg
|
||||
elif role == 'assistant' and user_msg:
|
||||
# Extract text content from both messages
|
||||
user_content = ''
|
||||
for content in user_msg['message']['content']:
|
||||
if content['type'] == 'text':
|
||||
user_content += content['text']
|
||||
|
||||
assistant_content = ''
|
||||
for content in msg['message']['content']:
|
||||
if content['type'] == 'text':
|
||||
assistant_content += content['text']
|
||||
|
||||
# Skip cron job messages
|
||||
if not user_content.startswith('[cron:') and user_content.strip() and assistant_content.strip():
|
||||
pairs.append({
|
||||
'user': user_content.strip(),
|
||||
'assistant': assistant_content.strip(),
|
||||
'agent_id': 'case',
|
||||
'session': 'main',
|
||||
'timestamp': msg.get('timestamp', '')
|
||||
})
|
||||
user_msg = None
|
||||
|
||||
return pairs
|
||||
|
||||
def main():
|
||||
sessions_dir = '/home/wdjones/.openclaw/agents/main/sessions/'
|
||||
|
||||
# Get all session files sorted by modification time (newest first)
|
||||
session_files = []
|
||||
for filepath in glob.glob(os.path.join(sessions_dir, '*.jsonl')):
|
||||
mtime = os.path.getmtime(filepath)
|
||||
session_files.append((mtime, filepath))
|
||||
|
||||
session_files.sort(reverse=True)
|
||||
|
||||
# Extract conversations from recent files until we have enough
|
||||
all_pairs = []
|
||||
for mtime, filepath in session_files[:10]: # Check last 10 session files
|
||||
pairs = extract_conversations_from_file(filepath)
|
||||
all_pairs.extend(pairs)
|
||||
print(f"Found {len(pairs)} pairs in {os.path.basename(filepath)}")
|
||||
|
||||
if len(all_pairs) >= 10:
|
||||
break
|
||||
|
||||
# Get last 10 pairs
|
||||
last_pairs = all_pairs[-10:] if len(all_pairs) >= 10 else all_pairs
|
||||
|
||||
print(f'Total pairs found: {len(all_pairs)}, processing last {len(last_pairs)}')
|
||||
|
||||
# Process each pair
|
||||
for i, pair in enumerate(last_pairs):
|
||||
try:
|
||||
# Pipe to auto-memory-hook.py
|
||||
proc = subprocess.Popen(['python3', '/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py'],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
stdout, stderr = proc.communicate(json.dumps(pair))
|
||||
print(f'Pair {i+1}: Processed (exit code: {proc.returncode})')
|
||||
if stderr:
|
||||
print(f' stderr: {stderr.strip()}')
|
||||
except Exception as e:
|
||||
print(f'Pair {i+1}: Error - {e}')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user