#!/usr/bin/env python3 """Tweet Analysis Tool - Scrapes and analyzes tweets via Chrome CDP.""" import argparse import asyncio import json import re import sys from datetime import datetime try: from playwright.async_api import async_playwright except ImportError: print("ERROR: playwright not installed. Run: pip install playwright", file=sys.stderr) sys.exit(1) try: import yfinance as yf except ImportError: yf = None def extract_tickers(text: str) -> list[str]: """Extract $TICKER patterns from text.""" return list(set(re.findall(r'\$([A-Z]{1,5}(?:\.[A-Z]{1,2})?)', text.upper()))) def lookup_tickers(tickers: list[str]) -> dict: """Look up ticker data via yfinance.""" if not yf or not tickers: return {} results = {} for t in tickers[:5]: # limit to 5 try: info = yf.Ticker(t).info results[t] = { "price": info.get("currentPrice") or info.get("regularMarketPrice"), "market_cap": info.get("marketCap"), "name": info.get("shortName"), "volume": info.get("volume"), "day_change_pct": info.get("regularMarketChangePercent"), "52w_high": info.get("fiftyTwoWeekHigh"), "52w_low": info.get("fiftyTwoWeekLow"), } except Exception: results[t] = {"error": "lookup failed"} return results async def scrape_tweet(url: str) -> dict: """Connect to Chrome CDP and scrape tweet data.""" # Normalize URL url = url.replace("twitter.com", "x.com") if not url.startswith("http"): url = "https://" + url data = { "url": url, "author": None, "handle": None, "text": None, "timestamp": None, "metrics": {}, "images": [], "bio": None, "followers": None, "following": None, "reply_to": None, "replies_sample": [], "scrape_error": None, } async with async_playwright() as p: try: browser = await p.chromium.connect_over_cdp("http://localhost:9222") except Exception as e: data["scrape_error"] = f"CDP connection failed: {e}" return data try: ctx = browser.contexts[0] if browser.contexts else await browser.new_context() page = await ctx.new_page() await page.goto(url, wait_until="domcontentloaded", timeout=30000) await page.wait_for_timeout(4000) # Get the main tweet article # Try to find the focal tweet tweet_sel = 'article[data-testid="tweet"]' articles = await page.query_selector_all(tweet_sel) if not articles: data["scrape_error"] = "No tweet articles found on page" await page.close() return data # The focal tweet is typically the one with the largest text or specific structure # On a tweet permalink, it's usually the first or second article focal = None for art in articles: # The focal tweet has a different time display (absolute vs relative) time_el = await art.query_selector('time') if time_el: dt = await time_el.get_attribute('datetime') if dt: focal = art data["timestamp"] = dt break if not focal: focal = articles[0] # Author info user_links = await focal.query_selector_all('a[role="link"]') for link in user_links: href = await link.get_attribute("href") or "" if href.startswith("/") and href.count("/") == 1 and len(href) > 1: spans = await link.query_selector_all("span") for span in spans: txt = (await span.inner_text()).strip() if txt.startswith("@"): data["handle"] = txt elif txt and not data["author"] and not txt.startswith("@"): data["author"] = txt break # Tweet text text_el = await focal.query_selector('div[data-testid="tweetText"]') if text_el: data["text"] = await text_el.inner_text() # Metrics (replies, retweets, likes, views) group = await focal.query_selector('div[role="group"]') if group: buttons = await group.query_selector_all('button') metric_names = ["replies", "retweets", "likes", "bookmarks"] for i, btn in enumerate(buttons): aria = await btn.get_attribute("aria-label") or "" # Parse numbers from aria labels like "123 replies" nums = re.findall(r'[\d,]+', aria) if nums and i < len(metric_names): data["metrics"][metric_names[i]] = nums[0].replace(",", "") # Views - often in a separate span view_spans = await focal.query_selector_all('a[role="link"] span') for vs in view_spans: txt = (await vs.inner_text()).strip() if "views" in txt.lower() or "Views" in txt: nums = re.findall(r'[\d,.KkMm]+', txt) if nums: data["metrics"]["views"] = nums[0] # Images imgs = await focal.query_selector_all('img[alt="Image"]') for img in imgs: src = await img.get_attribute("src") if src: data["images"].append(src) # Check if it's a reply reply_indicators = await page.query_selector_all('div[data-testid="tweet"] a[role="link"]') # Try to get author profile info by hovering or checking # We'll grab it from the page if visible if data["handle"]: handle_clean = data["handle"].lstrip("@") # Check for bio/follower info in any hover cards or visible elements all_text = await page.inner_text("body") # Look for follower patterns follower_match = re.search(r'([\d,.]+[KkMm]?)\s+Followers', all_text) following_match = re.search(r'([\d,.]+[KkMm]?)\s+Following', all_text) if follower_match: data["followers"] = follower_match.group(1) if following_match: data["following"] = following_match.group(1) # Sample some replies (articles after the focal tweet) if len(articles) > 1: for art in articles[1:4]: reply_text_el = await art.query_selector('div[data-testid="tweetText"]') if reply_text_el: rt = await reply_text_el.inner_text() if rt: data["replies_sample"].append(rt[:200]) await page.close() except Exception as e: data["scrape_error"] = str(e) try: await page.close() except: pass return data def analyze(data: dict) -> dict: """Produce structured analysis from scraped data.""" text = data.get("text") or "" tickers = extract_tickers(text) ticker_data = lookup_tickers(tickers) # Red flags detection red_flags = [] text_lower = text.lower() promo_words = ["100x", "1000x", "moon", "gem", "rocket", "guaranteed", "easy money", "don't miss", "last chance", "about to explode", "next big", "sleeping giant", "never stops printing", "true freedom", "beat the institutions", "revolution", "empire", "vault", "get rich", "financial freedom", "life changing", "without a degree", "from a bedroom", "join this"] for w in promo_words: if w in text_lower: red_flags.append(f"Promotional language: '{w}'") if len(tickers) > 3: red_flags.append(f"Multiple tickers mentioned ({len(tickers)})") if len(text) > 2000: red_flags.append("Extremely long promotional thread") if "github" in text_lower and ("star" in text_lower or "repo" in text_lower): red_flags.append("Pushing GitHub repo (potential funnel to paid product)") if any(w in text_lower for w in ["course", "discord", "premium", "paid group", "subscribe"]): red_flags.append("Funneling to paid product/community") # Check replies for coordinated patterns replies = data.get("replies_sample", []) if replies: rocket_replies = sum(1 for r in replies if any(e in r for e in ["šŸš€", "šŸ’Ž", "šŸ”„", "LFG"])) if rocket_replies >= 2: red_flags.append("Replies show coordinated hype patterns") # Check for penny stock characteristics for t, info in ticker_data.items(): if isinstance(info, dict) and not info.get("error"): price = info.get("price") mcap = info.get("market_cap") if price and price < 1: red_flags.append(f"${t} is a penny stock (${price})") if mcap and mcap < 50_000_000: red_flags.append(f"${t} micro-cap (<$50M market cap)") # Build verdict if len(red_flags) >= 3: verdict = "High risk - multiple red flags detected, exercise extreme caution" elif len(red_flags) >= 1: verdict = "Some concerns - verify claims independently before acting" elif tickers: verdict = "Worth investigating - do your own due diligence" else: verdict = "Informational tweet - no immediate financial claims detected" return { "tweet_data": data, "tickers_found": tickers, "ticker_data": ticker_data, "red_flags": red_flags, "verdict": verdict, } def format_markdown(analysis: dict) -> str: """Format analysis as markdown.""" d = analysis["tweet_data"] lines = [f"# Tweet Analysis", ""] lines.append(f"**URL:** {d['url']}") lines.append(f"**Analyzed:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") lines.append("") # WHO lines.append("## šŸ‘¤ WHO") lines.append(f"- **Author:** {d.get('author') or 'Unknown'}") lines.append(f"- **Handle:** {d.get('handle') or 'Unknown'}") if d.get("followers"): lines.append(f"- **Followers:** {d['followers']}") if d.get("following"): lines.append(f"- **Following:** {d['following']}") if d.get("bio"): lines.append(f"- **Bio:** {d['bio']}") lines.append("") # WHAT lines.append("## šŸ“ WHAT") lines.append(f"> {d.get('text') or 'Could not extract tweet text'}") lines.append("") if d.get("timestamp"): lines.append(f"**Posted:** {d['timestamp']}") metrics = d.get("metrics", {}) if metrics: m_parts = [f"{v} {k}" for k, v in metrics.items()] lines.append(f"**Metrics:** {' | '.join(m_parts)}") if d.get("images"): lines.append(f"**Images:** {len(d['images'])} attached") lines.append("") # VERIFY lines.append("## āœ… VERIFY") tickers = analysis.get("tickers_found", []) td = analysis.get("ticker_data", {}) if tickers: lines.append(f"**Tickers mentioned:** {', '.join('$' + t for t in tickers)}") lines.append("") for t, info in td.items(): if isinstance(info, dict) and not info.get("error"): lines.append(f"### ${t}" + (f" - {info.get('name', '')}" if info.get('name') else "")) if info.get("price"): lines.append(f"- **Price:** ${info['price']}") if info.get("market_cap"): mc = info["market_cap"] if mc > 1e9: lines.append(f"- **Market Cap:** ${mc/1e9:.2f}B") else: lines.append(f"- **Market Cap:** ${mc/1e6:.1f}M") if info.get("volume"): lines.append(f"- **Volume:** {info['volume']:,}") if info.get("day_change_pct"): lines.append(f"- **Day Change:** {info['day_change_pct']:.2f}%") if info.get("52w_high") and info.get("52w_low"): lines.append(f"- **52W Range:** ${info['52w_low']} - ${info['52w_high']}") lines.append("") elif isinstance(info, dict) and info.get("error"): lines.append(f"- ${t}: lookup failed") else: lines.append("No tickers mentioned in tweet.") lines.append("") # RED FLAGS lines.append("## 🚩 RED FLAGS") flags = analysis.get("red_flags", []) if flags: for f in flags: lines.append(f"- āš ļø {f}") else: lines.append("- None detected") lines.append("") # MONEY lines.append("## šŸ’° MONEY") if tickers and not flags: lines.append("Potential opportunity identified. Research further before any position.") elif tickers and flags: lines.append("Tickers mentioned but red flags present. High risk of promoted/manipulated asset.") else: lines.append("No direct financial opportunity identified in this tweet.") lines.append("") # VERDICT lines.append("## šŸŽÆ VERDICT") lines.append(f"**{analysis['verdict']}**") lines.append("") # Scrape issues if d.get("scrape_error"): lines.append(f"---\nāš ļø *Scrape warning: {d['scrape_error']}*") return "\n".join(lines) async def main(): parser = argparse.ArgumentParser(description="Analyze a tweet") parser.add_argument("url", help="Tweet URL (x.com or twitter.com)") parser.add_argument("--json", action="store_true", dest="json_output", help="Output JSON") parser.add_argument("-o", "--output", help="Write output to file") args = parser.parse_args() # Validate URL if not re.search(r'(x\.com|twitter\.com)/.+/status/\d+', args.url): print("ERROR: Invalid tweet URL", file=sys.stderr) sys.exit(1) print("Scraping tweet...", file=sys.stderr) data = await scrape_tweet(args.url) print("Analyzing...", file=sys.stderr) analysis = analyze(data) if args.json_output: output = json.dumps(analysis, indent=2, default=str) else: output = format_markdown(analysis) if args.output: with open(args.output, "w") as f: f.write(output) print(f"Written to {args.output}", file=sys.stderr) else: print(output) if __name__ == "__main__": asyncio.run(main())