#!/usr/bin/env python3 """ Triage scraped X posts — identify posts with verifiable claims and links. Extracts structured "investigation tasks" for agent follow-up. Usage: python3 triage-posts.py [--output triage.json] """ import argparse import json import re import os from datetime import datetime, timezone # Patterns that suggest a verifiable claim CLAIM_PATTERNS = [ # Specific dollar amounts and profits (r'\$(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)\s*(profit|gained|made|earned|up|return)', 'dollar_profit'), (r'made\s+\$(\d{1,3}(?:,\d{3})*(?:\.\d{2})?)', 'dollar_profit'), (r'(\d{1,3}(?:,\d{3})*)\s*(k|K)\s*(profit|gained|made|earned|up|return)', 'k_profit'), (r'(\d+(?:\.\d+)?)\s*(million|M)\s+(profit|made|earned|up)', 'million_profit'), # Performance claims (r'(\d+[\d.]*)\s*%\s*(win|success|profit|return|accuracy|hit rate)', 'performance_claim'), (r'wins?\s+(\d+[\d.]*)\s*%', 'performance_claim'), (r'(\d+[\d.]*)\s*%\s+of the time', 'performance_claim'), (r'(\d+[\d.]*)x\s+(return|profit|gain|returns)', 'multiplier_claim'), (r'(\d+[\d.]*)\s*x\s+in\s+(\d+)\s+(day|week|month|year)s?', 'timeframe_multiplier'), # Trading track record claims (r'(\d+)\s+(win|profitable)\s+(trade|bet|position)s?\s+in\s+a\s+row', 'streak_claim'), (r'(\d+)\s+out\s+of\s+(\d+)\s+(trade|bet|call)s?\s+(right|correct|profitable)', 'ratio_claim'), (r'(\d+[\d.]*)\s*%\s+accuracy\s+(this|last)\s+(week|month|year)', 'accuracy_claim'), (r'portfolio\s+up\s+(\d+[\d.]*)\s*%', 'portfolio_performance'), # Copy/follow trading (r'copy(ing|cat)?\s+(trader|user|bet|position|strat)', 'copy_trading'), (r'follow\s+(this|my|their)\s+(trade|bet|position|strat)', 'copy_trading'), (r'mirror(ing)?\s+(trade|bet|position)', 'copy_trading'), (r'best\s+(trader|performer)\s+(on|in)\s+\w+', 'top_trader'), # Crypto/DeFi specific claims (r'(\d+[\d.]*)\s+(eth|btc|sol|bnb|ada|dot)\s+(profit|gained|made|up)', 'crypto_profit'), (r'yield\s+farming\s+.{0,30}\s*(\d+[\d.]*)\s*%', 'yield_claim'), (r'staking\s+.{0,30}\s*(\d+[\d.]*)\s*%\s*(apy|apr)', 'staking_yield'), (r'liquidity\s+provider?\s+.{0,30}\s*(\d+[\d.]*)\s*%', 'lp_yield'), (r'(\d+[\d.]*)x\s+(leverage|margin)', 'leverage_claim'), # NFT claims (r'(flip|sold|bought)\s+.{0,30}\s+for\s+(\d+[\d.]*)\s+(eth|sol)', 'nft_flip'), (r'(\d+[\d.]*)\s+(eth|sol)\s+floor', 'nft_floor'), (r'mint\s+(price|cost)\s+(\d+[\d.]*)', 'mint_price'), # Arbitrage/spread (r'(arb|arbitrage|spread|mismatch|mispriced)', 'arbitrage_opp'), (r'risk[\s-]?free', 'arbitrage_opp'), (r'guaranteed\s+(profit|return|money)', 'arbitrage_opp'), (r'(\d+[\d.]*)\s*%\s+guaranteed', 'guaranteed_return'), # Sports betting / Prediction markets (r'(polymarket|kalshi|manifold|prediction\s+market)', 'prediction_market'), (r'(odds|probability)\s+.{0,20}\s*(\d+[\d.]*)\s*%', 'odds_claim'), (r'(yes|no)\s+shares?\s+at\s+(\d+)', 'shares_price'), (r'betting\s+(\d+[\d.]*)\s*%\s+on', 'betting_confidence'), (r'(\d+[\d.]*)\s*%\s+chance\s+(of|that)', 'probability_estimate'), # Price/target claims (r'(target|pt|price target)\s*[:\s]+\$?\s*(\d[\d,.]*)', 'price_target'), (r'(entry|buy)\s*(at|zone|point)\s*[:\s]+\$?\s*(\d[\d,.]*)', 'entry_point'), (r'next\s+(resistance|support)\s+at\s+\$?(\d[\d,.]*)', 'technical_level'), (r'going\s+to\s+\$?(\d[\d,.]*)', 'price_prediction'), # Airdrop/free money (r'(airdrop|free\s+money|free\s+tokens?|claiming)', 'airdrop'), (r'(step\s+\d|how\s+to\s+(get|claim|earn))', 'howto'), (r'eligible\s+for\s+(\d+[\d.]*)\s+tokens?', 'airdrop_amount'), # Signal/alert claims (r'(signal|alert|call)\s+.{0,30}\s*(\d+[\d.]*)\s*%\s+(gain|profit)', 'signal_performance'), (r'last\s+(\d+)\s+(signal|call|alert)s?\s+all\s+(profit|green|won)', 'signal_streak'), (r'(\d+[\d.]*)\s*%\s+win\s+rate\s+on\s+(signal|call)s?', 'signal_winrate'), # Time-sensitive claims (r'(today|yesterday|this\s+week)\s+.{0,30}\s*(\d+[\d.]*)\s*%\s+(up|gain|profit)', 'recent_performance'), (r'(\d+[\d.]*)\s*%\s+in\s+(\d+)\s+(hour|day|week)s?', 'timeframe_performance'), (r'last\s+(\d+)\s+(day|week|month)s?\s+.{0,30}\s*(\d+[\d.]*)\s*%', 'period_performance'), # User/account references with performance (r'@\w+.*?(portfolio|track\s*record|history|performance)', 'user_reference'), (r'(this\s+(guy|trader|user|person|account))\s+.{0,30}(profit|win|return|made)', 'user_reference'), (r'trader\s+with\s+(\d+[\d.]*)\s*%\s+(accuracy|win\s+rate)', 'trader_stats'), (r'(\d+[\d.]*)\s*(million|M|k|K)\s+follower\s+trader', 'influencer_trader'), # Strategy claims (r'(strategy|method|system)\s+that\s+.{0,30}(\d+[\d.]*)\s*%', 'strategy_claim'), (r'using\s+.{0,30}\s+(\d+[\d.]*)\s*%\s+(success|win)', 'method_performance'), (r'secret\s+(strategy|method|formula)', 'secret_method'), # Options/derivatives (r'(call|put)\s+option\s+.{0,30}\s*(\d+[\d.]*)\s*%', 'options_claim'), (r'(\d+[\d.]*)\s*%\s+(otm|itm|out\s+of\s+the\s+money|in\s+the\s+money)', 'options_probability'), (r'delta\s+(\d+[\d.]*)', 'options_delta'), # Course/education monetization (r'course\s+that\s+.{0,30}(\d+[\d.]*)\s*(k|K|dollar)', 'course_earning'), (r'teaching\s+.{0,30}\s+(\d+[\d.]*)\s*(k|K)\s+student', 'teaching_claim'), (r'(\d+[\d.]*)\s*%\s+of\s+students\s+(profit|succeed)', 'student_success'), ] # Link domains that are investigatable INVESTIGATABLE_DOMAINS = { 'polymarket.com': 'prediction_market', 'kalshi.com': 'prediction_market', 'manifold.markets': 'prediction_market', 'dexscreener.com': 'token_chart', 'dextools.io': 'token_chart', 'birdeye.so': 'token_chart', 'coingecko.com': 'token_info', 'coinmarketcap.com': 'token_info', 'tradingview.com': 'chart', 'etherscan.io': 'blockchain', 'solscan.io': 'blockchain', 'basescan.org': 'blockchain', 'github.com': 'code_repo', 'docs.google.com': 'document', 'notion.so': 'document', 'medium.com': 'article', 'substack.com': 'article', 'youtube.com': 'video', 'youtu.be': 'video', } def extract_claims(text): """Extract verifiable claims from post text.""" claims = [] for pattern, claim_type in CLAIM_PATTERNS: matches = re.finditer(pattern, text, re.IGNORECASE) for m in matches: claims.append({ 'type': claim_type, 'match': m.group(0), 'span': [m.start(), m.end()], }) return claims def classify_links(links): """Classify links by investigatable domain.""" classified = [] for link in links: url = link.get('url', '') link_type = 'unknown' for domain, dtype in INVESTIGATABLE_DOMAINS.items(): if domain in url: link_type = dtype break classified.append({ 'url': url, 'text': link.get('text', ''), 'type': link_type, }) return classified def triage_post(post): """Analyze a single post for investigation potential.""" text = post.get('text', '') or '' # Extract claims claims = extract_claims(text) # Classify links all_links = (post.get('links', []) or []) card = post.get('card') or {} if card.get('url'): all_links.append({'url': card['url'], 'text': card.get('title', '')}) classified_links = classify_links(all_links) investigatable_links = [l for l in classified_links if l['type'] != 'unknown'] # Has the post got something worth investigating? has_claims = len(claims) > 0 has_links = len(investigatable_links) > 0 has_any_links = len(classified_links) > 0 # Priority scoring priority = 0 if has_claims: priority += 2 # Performance claims are highest priority if any(c['type'] in ('performance_claim', 'multiplier_claim') for c in claims): priority += 2 if any(c['type'] in ('copy_trading', 'arbitrage_opp') for c in claims): priority += 1 if has_links: priority += 2 if any(l['type'] == 'prediction_market' for l in investigatable_links): priority += 2 if any(l['type'] in ('token_chart', 'blockchain') for l in investigatable_links): priority += 1 if has_any_links and not has_links: priority += 1 # Build investigation tasks tasks = [] for claim in claims: if claim['type'] == 'performance_claim': tasks.append({ 'action': 'verify_performance', 'description': f"Verify claim: {claim['match']}", 'method': 'Check linked profile/data for actual track record', }) elif claim['type'] == 'copy_trading': tasks.append({ 'action': 'verify_trader', 'description': f"Verify trader referenced: {claim['match']}", 'method': 'Check trader profile, recent bets/trades, actual P&L', }) elif claim['type'] == 'arbitrage_opp': tasks.append({ 'action': 'verify_arb', 'description': f"Verify opportunity: {claim['match']}", 'method': 'Check if spread/mismatch still exists, calculate actual risk', }) elif claim['type'] in ('odds_claim', 'shares_price', 'prediction_market'): tasks.append({ 'action': 'check_market', 'description': f"Check prediction market: {claim['match']}", 'method': 'Verify current odds, volume, resolution criteria', }) elif claim['type'] == 'price_target': tasks.append({ 'action': 'check_price', 'description': f"Verify price claim: {claim['match']}", 'method': 'Check current price vs target, chart pattern', }) elif claim['type'] == 'airdrop': tasks.append({ 'action': 'verify_airdrop', 'description': f"Check airdrop legitimacy: {claim['match']}", 'method': 'Verify project, check for scam signals, confirm eligibility', }) for link in investigatable_links: tasks.append({ 'action': f'browse_{link["type"]}', 'description': f'Follow and analyze: {link["url"]}', 'url': link['url'], 'method': f'Browse to {link["type"]} link, extract current data', }) # For unknown links that might be interesting for link in classified_links: if link['type'] == 'unknown' and link['url'].startswith('http') and 'x.com' not in link['url']: tasks.append({ 'action': 'browse_unknown', 'description': f'Check external link: {link["url"]}', 'url': link['url'], 'method': 'Browse to link, determine if relevant', }) return { 'author': post.get('author', {}), 'text': text, 'url': post.get('url', ''), 'timestamp': post.get('timestamp'), 'metrics': post.get('metrics', {}), 'claims': claims, 'links': classified_links, 'investigatable_links': investigatable_links, 'priority': priority, 'tasks': tasks, 'worth_investigating': priority >= 2, } def main(): parser = argparse.ArgumentParser(description="Triage X posts for investigation") parser.add_argument("input", help="Path to posts.json") parser.add_argument("--output", help="Output file") parser.add_argument("--min-priority", type=int, default=2, help="Min priority to include") args = parser.parse_args() with open(args.input) as f: data = json.load(f) posts = data.get('posts', []) print(f"Triaging {len(posts)} posts...") triaged = [] for post in posts: result = triage_post(post) triaged.append(result) # Sort by priority triaged.sort(key=lambda x: x['priority'], reverse=True) # Filter worth = [t for t in triaged if t['priority'] >= args.min_priority] output = { 'triaged_at': datetime.now(timezone.utc).isoformat(), 'total_posts': len(posts), 'worth_investigating': len(worth), 'posts': triaged, 'investigation_queue': worth, } output_path = args.output or os.path.join(os.path.dirname(args.input), 'triage.json') with open(output_path, 'w') as f: json.dump(output, f, indent=2) # Print summary print(f"\n=== Triage Summary ===") print(f"Total: {len(posts)} | Worth investigating: {len(worth)}") if worth: print(f"\n=== Investigation Queue ===") for t in worth: author = t['author'].get('handle', '?') claims = [c['type'] for c in t['claims']] links = [l['type'] for l in t['investigatable_links']] print(f"\n [{t['priority']}] {author}") print(f" {t['text'][:150]}...") if claims: print(f" Claims: {', '.join(claims)}") if links: print(f" Links: {', '.join(links)}") print(f" Tasks: {len(t['tasks'])}") else: print("\nNo posts met the investigation threshold.") print(f"\nSaved to {output_path}") if __name__ == "__main__": main()