Files
workspace/auto_memory_processor.py

138 lines
5.0 KiB
Python

#!/usr/bin/env python3
import json
import sys
import subprocess
import argparse
from pathlib import Path
def extract_message_pairs(jsonl_file):
"""Extract user/assistant message pairs from a JSONL session file."""
messages = []
with open(jsonl_file, 'r') as f:
for line in f:
if not line.strip():
continue
try:
data = json.loads(line)
if data.get('type') == 'message' and 'message' in data:
msg = data['message']
role = msg.get('role')
if role in ['user', 'assistant']:
content = msg.get('content', '')
# Handle content as list of objects (new format)
if isinstance(content, list):
text_content = []
for item in content:
if isinstance(item, dict) and item.get('type') == 'text':
text_content.append(item.get('text', ''))
content = '\n'.join(text_content)
elif isinstance(content, str):
# Handle content as simple string
content = content
else:
content = str(content)
messages.append({
'role': role,
'content': content,
'timestamp': data.get('timestamp', '')
})
except json.JSONDecodeError:
continue
# Now pair user messages with following assistant messages
pairs = []
i = 0
while i < len(messages):
if messages[i]['role'] == 'user':
user_msg = messages[i]
# Look for the next assistant message
for j in range(i + 1, min(i + 10, len(messages))): # Look within next 10 messages
if messages[j]['role'] == 'assistant':
assistant_msg = messages[j]
pairs.append({
'user': user_msg['content'],
'assistant': assistant_msg['content'],
'user_timestamp': user_msg['timestamp'],
'assistant_timestamp': assistant_msg['timestamp']
})
i = j # Continue from the assistant message
break
else:
i += 1 # No assistant response found, continue
else:
i += 1
return pairs
def send_to_memory_hook(pair_data, hook_script):
"""Send a user/assistant pair to the auto-memory hook script."""
json_input = json.dumps({
"user": pair_data["user"],
"assistant": pair_data["assistant"],
"agent_id": "case",
"session": "main"
})
try:
result = subprocess.run(
['python3', hook_script],
input=json_input,
text=True,
capture_output=True,
timeout=30
)
return result.returncode == 0
except subprocess.TimeoutExpired:
print("Warning: Memory hook script timed out")
return False
except Exception as e:
print(f"Error running memory hook script: {e}")
return False
def main():
parser = argparse.ArgumentParser(description='Process session transcript for auto-memory indexing')
parser.add_argument('--session-file', required=True, help='Path to session JSONL file')
parser.add_argument('--memory-hook', default='/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py',
help='Path to auto-memory-hook.py script')
parser.add_argument('--num-turns', type=int, default=10, help='Number of assistant turns to process')
args = parser.parse_args()
if not Path(args.session_file).exists():
print(f"Error: Session file {args.session_file} does not exist")
return 1
if not Path(args.memory_hook).exists():
print(f"Error: Memory hook script {args.memory_hook} does not exist")
return 1
print(f"Processing session file: {args.session_file}")
print(f"Memory hook script: {args.memory_hook}")
# Extract message pairs
pairs = extract_message_pairs(args.session_file)
if not pairs:
print("No user/assistant message pairs found")
return 0
# Take the last N turns
recent_pairs = pairs[-args.num_turns:]
print(f"Found {len(pairs)} total pairs, processing last {len(recent_pairs)} turns")
success_count = 0
for i, pair in enumerate(recent_pairs):
print(f"Processing turn {i+1}/{len(recent_pairs)}")
if send_to_memory_hook(pair, args.memory_hook):
success_count += 1
else:
print(f"Failed to process turn {i+1}")
print(f"Successfully processed {success_count}/{len(recent_pairs)} turns")
return 0
if __name__ == '__main__':
sys.exit(main())