Files
workspace/tools/extract-and-index-memory.py

125 lines
4.8 KiB
Python

#!/usr/bin/env python3
import json
import subprocess
import sys
import os
import glob
from datetime import datetime
def extract_conversations_from_sessions():
"""Extract the last 10 assistant turns from recent session files."""
sessions_dir = "/home/wdjones/.openclaw/agents/main/sessions/"
session_files = glob.glob(os.path.join(sessions_dir, "*.jsonl"))
# Filter out deleted files and sort by modification time (newest first)
session_files = [f for f in session_files if ".deleted." not in f]
session_files.sort(key=os.path.getmtime, reverse=True)
all_turns = []
print(f"Processing {len(session_files)} session files...")
for session_file in session_files:
try:
messages = []
with open(session_file, 'r') as f:
for line in f:
try:
data = json.loads(line)
if data.get('type') == 'message' and data.get('message', {}).get('role') in ['user', 'assistant']:
messages.append(data)
except json.JSONDecodeError:
continue
# Extract user-assistant pairs (turns)
i = 0
while i < len(messages):
if (messages[i]['message']['role'] == 'user' and
i + 1 < len(messages) and
messages[i+1]['message']['role'] == 'assistant'):
user_content = messages[i]['message']['content']
assistant_content = messages[i+1]['message']['content']
# Extract text from content arrays
user_text = ''
if isinstance(user_content, list):
for item in user_content:
if item.get('type') == 'text':
user_text += item.get('text', '')
else:
user_text = str(user_content)
assistant_text = ''
if isinstance(assistant_content, list):
for item in assistant_content:
if item.get('type') == 'text':
assistant_text += item.get('text', '')
else:
assistant_text = str(assistant_content)
# Filter out system messages and cron jobs
if (user_text.strip() and assistant_text.strip() and
not user_text.startswith('[cron:') and
not user_text.startswith('[System Message]') and
len(user_text.strip()) > 10 and # Substantial content
len(assistant_text.strip()) > 10):
turn = {
'user': user_text.strip(),
'assistant': assistant_text.strip(),
'agent_id': 'case',
'session': 'main'
}
all_turns.append(turn)
i += 2
else:
i += 1
except Exception as e:
print(f"Error processing {session_file}: {e}")
continue
print(f"Found {len(all_turns)} total conversation turns")
# Get last 10 turns
last_turns = all_turns[-10:] if all_turns else []
print(f"Processing last {len(last_turns)} turns")
# Send each turn to the auto-memory hook
processed_count = 0
for i, turn in enumerate(last_turns):
try:
# Run the auto-memory hook
proc = subprocess.Popen(
['python3', '/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
stdout, stderr = proc.communicate(input=json.dumps(turn))
if proc.returncode == 0:
processed_count += 1
print(f"✓ Turn {i+1}: Processed successfully")
if stdout.strip():
print(f" Output: {stdout.strip()}")
else:
print(f"✗ Turn {i+1}: Failed (exit code {proc.returncode})")
if stderr.strip():
print(f" Error: {stderr.strip()}")
except Exception as e:
print(f"✗ Turn {i+1}: Exception: {e}")
print(f"\nAuto-memory indexing complete: {processed_count}/{len(last_turns)} turns processed successfully")
return processed_count
if __name__ == "__main__":
extract_conversations_from_sessions()