#!/usr/bin/env python3 """ X/Twitter Feed Scraper using Playwright Scrapes specific accounts for trading-related posts. Uses saved Chrome session cookies for authentication. """ import json import os import sys import time from datetime import datetime, timezone from pathlib import Path ACCOUNTS = [ "browomo", "ArchiveExplorer", "noisyb0y1", "krajekis", "Shelpid_WI3M", "polyaboretum", "0xashensoul", ] TRADING_KEYWORDS = [ "polymarket", "trade", "profit", "wallet", "arbitrage", "signal", "crypto", "bitcoin", "ethereum", "solana", "strategy", "edge", "bet", "position", "stock", "market", "pnl", "alpha", "$", "usdc", "defi", "token", "copy", "whale", "degen", "short", "long", "bullish", "bearish", "portfolio", ] DATA_DIR = Path(__file__).parent.parent / "data" / "x-feed" DATA_DIR.mkdir(parents=True, exist_ok=True) COOKIE_FILE = Path(__file__).parent / "x_cookies.json" TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN", "") TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_CHAT_ID", "6443752046") def send_telegram(message): if not TELEGRAM_BOT_TOKEN: print(f"[ALERT] {message}") return import urllib.request url = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage" data = json.dumps({"chat_id": TELEGRAM_CHAT_ID, "text": message, "parse_mode": "HTML"}).encode() req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"}) try: urllib.request.urlopen(req, timeout=10) except Exception as e: print(f"Telegram error: {e}") def save_cookies(context): cookies = context.cookies() COOKIE_FILE.write_text(json.dumps(cookies, indent=2)) print(f"Saved {len(cookies)} cookies") def load_cookies(context): if COOKIE_FILE.exists(): cookies = json.loads(COOKIE_FILE.read_text()) context.add_cookies(cookies) print(f"Loaded {len(cookies)} cookies") return True return False def export_cookies_from_chrome(): """One-time: grab cookies from the running Chrome debug instance.""" import http.client, websocket as ws_mod conn = http.client.HTTPConnection("localhost", 9222) conn.request("GET", "/json") tabs = json.loads(conn.getresponse().read()) x_tab = None for t in tabs: if "x.com" in t.get("url", ""): x_tab = t break if not x_tab: print("No X tab found in Chrome debug") return [] ws = ws_mod.create_connection(x_tab["webSocketDebuggerUrl"], timeout=10) ws.send(json.dumps({"id": 1, "method": "Network.getAllCookies"})) result = json.loads(ws.recv()) all_cookies = result.get("result", {}).get("cookies", []) ws.close() # Filter for x.com cookies and convert to Playwright format x_cookies = [] for c in all_cookies: if "x.com" in c.get("domain", "") or "twitter.com" in c.get("domain", ""): x_cookies.append({ "name": c["name"], "value": c["value"], "domain": c["domain"], "path": c.get("path", "/"), "secure": c.get("secure", False), "httpOnly": c.get("httpOnly", False), "sameSite": c.get("sameSite", "Lax"), }) COOKIE_FILE.write_text(json.dumps(x_cookies, indent=2)) print(f"Exported {len(x_cookies)} X cookies from Chrome") return x_cookies def scrape_account(page, account, max_scroll=5): """Scrape recent posts from a single account.""" posts = [] try: page.goto(f"https://x.com/{account}", wait_until="networkidle", timeout=15000) except: try: page.goto(f"https://x.com/{account}", wait_until="domcontentloaded", timeout=10000) page.wait_for_timeout(3000) except Exception as e: print(f" Failed to load @{account}: {e}") return posts seen_texts = set() for scroll in range(max_scroll): articles = page.query_selector_all("article") for article in articles: try: text = article.inner_text()[:800] # Deduplicate sig = text[:100] if sig in seen_texts: continue seen_texts.add(sig) # Extract links links = article.query_selector_all("a") urls = [l.get_attribute("href") for l in links if l.get_attribute("href")] posts.append({ "account": account, "text": text, "urls": urls[:5], "scraped_at": datetime.now(timezone.utc).isoformat(), }) except: continue # Scroll down page.evaluate("window.scrollBy(0, 1500)") page.wait_for_timeout(1500) return posts def is_trading_related(text): text_lower = text.lower() return any(kw in text_lower for kw in TRADING_KEYWORDS) def main(): from playwright.sync_api import sync_playwright print(f"=== X Feed Scraper (Playwright) ===") print(f"Time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}") # Export cookies from Chrome if we don't have them yet if not COOKIE_FILE.exists(): print("No cookies found — exporting from Chrome debug session...") export_cookies_from_chrome() with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", viewport={"width": 1280, "height": 900}, ) load_cookies(context) page = context.new_page() all_posts = [] trading_posts = [] for account in ACCOUNTS: print(f"\nScraping @{account}...", end=" ", flush=True) posts = scrape_account(page, account) print(f"{len(posts)} posts") for post in posts: all_posts.append(post) if is_trading_related(post["text"]): trading_posts.append(post) browser.close() print(f"\n{'='*50}") print(f"Total posts: {len(all_posts)}") print(f"Trading-related: {len(trading_posts)}") # Save results timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M") out_file = DATA_DIR / f"scan-{timestamp}.json" out_file.write_text(json.dumps({ "timestamp": datetime.now(timezone.utc).isoformat(), "total_posts": len(all_posts), "trading_posts": len(trading_posts), "posts": trading_posts, }, indent=2)) print(f"Saved to {out_file}") # Check for new posts we haven't seen before seen_file = DATA_DIR / "seen_posts.json" seen = set() if seen_file.exists(): try: seen = set(json.loads(seen_file.read_text())) except: pass new_posts = [] for post in trading_posts: sig = post["text"][:150] if sig not in seen: new_posts.append(post) seen.add(sig) seen_file.write_text(json.dumps(list(seen)[-5000:])) # Keep last 5000 if new_posts: print(f"\nšŸ”” {len(new_posts)} NEW trading posts!") for post in new_posts[:5]: lines = post["text"].split("\n") author = f"@{post['account']}" snippet = post["text"][:200].replace("\n", " ") print(f"\n {author}: {snippet}") # Alert on Telegram msg = f"šŸ” New from {author}\n\n{snippet[:300]}" if post.get("urls"): x_urls = [u for u in post["urls"] if "x.com" in u or "twitter.com" in u] if x_urls: msg += f"\n\n{x_urls[0]}" send_telegram(msg) else: print("\nNo new trading posts since last scan.") if __name__ == "__main__": main()