138 lines
5.0 KiB
Python
138 lines
5.0 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import json
|
|
import sys
|
|
import subprocess
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
def extract_message_pairs(jsonl_file):
|
|
"""Extract user/assistant message pairs from a JSONL session file."""
|
|
messages = []
|
|
|
|
with open(jsonl_file, 'r') as f:
|
|
for line in f:
|
|
if not line.strip():
|
|
continue
|
|
try:
|
|
data = json.loads(line)
|
|
if data.get('type') == 'message' and 'message' in data:
|
|
msg = data['message']
|
|
role = msg.get('role')
|
|
if role in ['user', 'assistant']:
|
|
content = msg.get('content', '')
|
|
# Handle content as list of objects (new format)
|
|
if isinstance(content, list):
|
|
text_content = []
|
|
for item in content:
|
|
if isinstance(item, dict) and item.get('type') == 'text':
|
|
text_content.append(item.get('text', ''))
|
|
content = '\n'.join(text_content)
|
|
elif isinstance(content, str):
|
|
# Handle content as simple string
|
|
content = content
|
|
else:
|
|
content = str(content)
|
|
|
|
messages.append({
|
|
'role': role,
|
|
'content': content,
|
|
'timestamp': data.get('timestamp', '')
|
|
})
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
# Now pair user messages with following assistant messages
|
|
pairs = []
|
|
i = 0
|
|
while i < len(messages):
|
|
if messages[i]['role'] == 'user':
|
|
user_msg = messages[i]
|
|
# Look for the next assistant message
|
|
for j in range(i + 1, min(i + 10, len(messages))): # Look within next 10 messages
|
|
if messages[j]['role'] == 'assistant':
|
|
assistant_msg = messages[j]
|
|
pairs.append({
|
|
'user': user_msg['content'],
|
|
'assistant': assistant_msg['content'],
|
|
'user_timestamp': user_msg['timestamp'],
|
|
'assistant_timestamp': assistant_msg['timestamp']
|
|
})
|
|
i = j # Continue from the assistant message
|
|
break
|
|
else:
|
|
i += 1 # No assistant response found, continue
|
|
else:
|
|
i += 1
|
|
|
|
return pairs
|
|
|
|
def send_to_memory_hook(pair_data, hook_script):
|
|
"""Send a user/assistant pair to the auto-memory hook script."""
|
|
json_input = json.dumps({
|
|
"user": pair_data["user"],
|
|
"assistant": pair_data["assistant"],
|
|
"agent_id": "case",
|
|
"session": "main"
|
|
})
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
['python3', hook_script],
|
|
input=json_input,
|
|
text=True,
|
|
capture_output=True,
|
|
timeout=30
|
|
)
|
|
return result.returncode == 0
|
|
except subprocess.TimeoutExpired:
|
|
print("Warning: Memory hook script timed out")
|
|
return False
|
|
except Exception as e:
|
|
print(f"Error running memory hook script: {e}")
|
|
return False
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Process session transcript for auto-memory indexing')
|
|
parser.add_argument('--session-file', required=True, help='Path to session JSONL file')
|
|
parser.add_argument('--memory-hook', default='/home/wdjones/.openclaw/workspace/tools/auto-memory-hook.py',
|
|
help='Path to auto-memory-hook.py script')
|
|
parser.add_argument('--num-turns', type=int, default=10, help='Number of assistant turns to process')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not Path(args.session_file).exists():
|
|
print(f"Error: Session file {args.session_file} does not exist")
|
|
return 1
|
|
|
|
if not Path(args.memory_hook).exists():
|
|
print(f"Error: Memory hook script {args.memory_hook} does not exist")
|
|
return 1
|
|
|
|
print(f"Processing session file: {args.session_file}")
|
|
print(f"Memory hook script: {args.memory_hook}")
|
|
|
|
# Extract message pairs
|
|
pairs = extract_message_pairs(args.session_file)
|
|
|
|
if not pairs:
|
|
print("No user/assistant message pairs found")
|
|
return 0
|
|
|
|
# Take the last N turns
|
|
recent_pairs = pairs[-args.num_turns:]
|
|
print(f"Found {len(pairs)} total pairs, processing last {len(recent_pairs)} turns")
|
|
|
|
success_count = 0
|
|
for i, pair in enumerate(recent_pairs):
|
|
print(f"Processing turn {i+1}/{len(recent_pairs)}")
|
|
if send_to_memory_hook(pair, args.memory_hook):
|
|
success_count += 1
|
|
else:
|
|
print(f"Failed to process turn {i+1}")
|
|
|
|
print(f"Successfully processed {success_count}/{len(recent_pairs)} turns")
|
|
return 0
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main()) |