#!/usr/bin/env python3 """ Triage scraped X posts — identify posts with verifiable claims and links. Extracts structured "investigation tasks" for agent follow-up. Usage: python3 triage-posts.py [--output triage.json] """ import argparse import json import re import os from datetime import datetime, timezone # Patterns that suggest a verifiable claim CLAIM_PATTERNS = [ # Performance claims (r'(\d+[\d.]*)\s*%\s*(win|success|profit|return|accuracy|hit rate)', 'performance_claim'), (r'wins?\s+(\d+[\d.]*)\s*%', 'performance_claim'), (r'(\d+[\d.]*)\s*%\s+of the time', 'performance_claim'), (r'(\d+[\d.]*)x\s+(return|profit|gain)', 'multiplier_claim'), # Copy/follow trading (r'copy(ing|cat)?\s+(trader|user|bet|position|strat)', 'copy_trading'), (r'follow\s+(this|my|their)\s+(trade|bet|position|strat)', 'copy_trading'), (r'mirror(ing)?\s+(trade|bet|position)', 'copy_trading'), # Arbitrage/spread (r'(arb|arbitrage|spread|mismatch|mispriced)', 'arbitrage_opp'), (r'risk[\s-]?free', 'arbitrage_opp'), (r'guaranteed\s+(profit|return|money)', 'arbitrage_opp'), # Prediction/betting (r'(polymarket|kalshi|manifold|prediction\s+market)', 'prediction_market'), (r'(odds|probability)\s+.{0,20}\s*(\d+[\d.]*)\s*%', 'odds_claim'), (r'(yes|no)\s+shares?\s+at\s+(\d+)', 'shares_price'), # Price/target claims (r'(target|pt|price target)\s*[:\s]+\$?\s*(\d[\d,.]*)', 'price_target'), (r'(entry|buy)\s*(at|zone|point)\s*[:\s]+\$?\s*(\d[\d,.]*)', 'entry_point'), # Airdrop/free money (r'(airdrop|free\s+money|free\s+tokens?|claiming)', 'airdrop'), (r'(step\s+\d|how\s+to\s+(get|claim|earn))', 'howto'), # User/account references (r'@\w+.*?(portfolio|track\s*record|history|performance)', 'user_reference'), (r'(this\s+(guy|trader|user|person|account))\s+.{0,30}(profit|win|return|made)', 'user_reference'), ] # Link domains that are investigatable INVESTIGATABLE_DOMAINS = { 'polymarket.com': 'prediction_market', 'kalshi.com': 'prediction_market', 'manifold.markets': 'prediction_market', 'dexscreener.com': 'token_chart', 'dextools.io': 'token_chart', 'birdeye.so': 'token_chart', 'coingecko.com': 'token_info', 'coinmarketcap.com': 'token_info', 'tradingview.com': 'chart', 'etherscan.io': 'blockchain', 'solscan.io': 'blockchain', 'basescan.org': 'blockchain', 'github.com': 'code_repo', 'docs.google.com': 'document', 'notion.so': 'document', 'medium.com': 'article', 'substack.com': 'article', 'youtube.com': 'video', 'youtu.be': 'video', } def extract_claims(text): """Extract verifiable claims from post text.""" claims = [] for pattern, claim_type in CLAIM_PATTERNS: matches = re.finditer(pattern, text, re.IGNORECASE) for m in matches: claims.append({ 'type': claim_type, 'match': m.group(0), 'span': [m.start(), m.end()], }) return claims def classify_links(links): """Classify links by investigatable domain.""" classified = [] for link in links: url = link.get('url', '') link_type = 'unknown' for domain, dtype in INVESTIGATABLE_DOMAINS.items(): if domain in url: link_type = dtype break classified.append({ 'url': url, 'text': link.get('text', ''), 'type': link_type, }) return classified def triage_post(post): """Analyze a single post for investigation potential.""" text = post.get('text', '') or '' # Extract claims claims = extract_claims(text) # Classify links all_links = (post.get('links', []) or []) card = post.get('card') or {} if card.get('url'): all_links.append({'url': card['url'], 'text': card.get('title', '')}) classified_links = classify_links(all_links) investigatable_links = [l for l in classified_links if l['type'] != 'unknown'] # Has the post got something worth investigating? has_claims = len(claims) > 0 has_links = len(investigatable_links) > 0 has_any_links = len(classified_links) > 0 # Priority scoring priority = 0 if has_claims: priority += 2 # Performance claims are highest priority if any(c['type'] in ('performance_claim', 'multiplier_claim') for c in claims): priority += 2 if any(c['type'] in ('copy_trading', 'arbitrage_opp') for c in claims): priority += 1 if has_links: priority += 2 if any(l['type'] == 'prediction_market' for l in investigatable_links): priority += 2 if any(l['type'] in ('token_chart', 'blockchain') for l in investigatable_links): priority += 1 if has_any_links and not has_links: priority += 1 # Build investigation tasks tasks = [] for claim in claims: if claim['type'] == 'performance_claim': tasks.append({ 'action': 'verify_performance', 'description': f"Verify claim: {claim['match']}", 'method': 'Check linked profile/data for actual track record', }) elif claim['type'] == 'copy_trading': tasks.append({ 'action': 'verify_trader', 'description': f"Verify trader referenced: {claim['match']}", 'method': 'Check trader profile, recent bets/trades, actual P&L', }) elif claim['type'] == 'arbitrage_opp': tasks.append({ 'action': 'verify_arb', 'description': f"Verify opportunity: {claim['match']}", 'method': 'Check if spread/mismatch still exists, calculate actual risk', }) elif claim['type'] in ('odds_claim', 'shares_price', 'prediction_market'): tasks.append({ 'action': 'check_market', 'description': f"Check prediction market: {claim['match']}", 'method': 'Verify current odds, volume, resolution criteria', }) elif claim['type'] == 'price_target': tasks.append({ 'action': 'check_price', 'description': f"Verify price claim: {claim['match']}", 'method': 'Check current price vs target, chart pattern', }) elif claim['type'] == 'airdrop': tasks.append({ 'action': 'verify_airdrop', 'description': f"Check airdrop legitimacy: {claim['match']}", 'method': 'Verify project, check for scam signals, confirm eligibility', }) for link in investigatable_links: tasks.append({ 'action': f'browse_{link["type"]}', 'description': f'Follow and analyze: {link["url"]}', 'url': link['url'], 'method': f'Browse to {link["type"]} link, extract current data', }) # For unknown links that might be interesting for link in classified_links: if link['type'] == 'unknown' and link['url'].startswith('http') and 'x.com' not in link['url']: tasks.append({ 'action': 'browse_unknown', 'description': f'Check external link: {link["url"]}', 'url': link['url'], 'method': 'Browse to link, determine if relevant', }) return { 'author': post.get('author', {}), 'text': text, 'url': post.get('url', ''), 'timestamp': post.get('timestamp'), 'metrics': post.get('metrics', {}), 'claims': claims, 'links': classified_links, 'investigatable_links': investigatable_links, 'priority': priority, 'tasks': tasks, 'worth_investigating': priority >= 2, } def main(): parser = argparse.ArgumentParser(description="Triage X posts for investigation") parser.add_argument("input", help="Path to posts.json") parser.add_argument("--output", help="Output file") parser.add_argument("--min-priority", type=int, default=2, help="Min priority to include") args = parser.parse_args() with open(args.input) as f: data = json.load(f) posts = data.get('posts', []) print(f"Triaging {len(posts)} posts...") triaged = [] for post in posts: result = triage_post(post) triaged.append(result) # Sort by priority triaged.sort(key=lambda x: x['priority'], reverse=True) # Filter worth = [t for t in triaged if t['priority'] >= args.min_priority] output = { 'triaged_at': datetime.now(timezone.utc).isoformat(), 'total_posts': len(posts), 'worth_investigating': len(worth), 'posts': triaged, 'investigation_queue': worth, } output_path = args.output or os.path.join(os.path.dirname(args.input), 'triage.json') with open(output_path, 'w') as f: json.dump(output, f, indent=2) # Print summary print(f"\n=== Triage Summary ===") print(f"Total: {len(posts)} | Worth investigating: {len(worth)}") if worth: print(f"\n=== Investigation Queue ===") for t in worth: author = t['author'].get('handle', '?') claims = [c['type'] for c in t['claims']] links = [l['type'] for l in t['investigatable_links']] print(f"\n [{t['priority']}] {author}") print(f" {t['text'][:150]}...") if claims: print(f" Claims: {', '.join(claims)}") if links: print(f" Links: {', '.join(links)}") print(f" Tasks: {len(t['tasks'])}") else: print("\nNo posts met the investigation threshold.") print(f"\nSaved to {output_path}") if __name__ == "__main__": main()