234 lines
8.0 KiB
Python
234 lines
8.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Reddit Market Intel — Scan Reddit for crypto/market sentiment and alpha.
|
|
|
|
Scans configurable subreddits for hot topics, sentiment shifts, and emerging narratives.
|
|
No API key required (uses public JSON endpoints).
|
|
|
|
Usage:
|
|
python3 reddit-market-intel.py # Full scan
|
|
python3 reddit-market-intel.py --quick # Quick scan (top posts only)
|
|
python3 reddit-market-intel.py --topic "solana" # Scan for specific topic
|
|
"""
|
|
|
|
import json
|
|
import time
|
|
import re
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from urllib.request import urlopen, Request
|
|
from urllib.error import URLError, HTTPError
|
|
from collections import Counter
|
|
|
|
DATA_DIR = Path(__file__).parent.parent / "data" / "reddit-intel"
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
USER_AGENT = "reddit-market-intel/1.0"
|
|
RATE_LIMIT = 2
|
|
last_request = 0
|
|
|
|
DEFAULT_SUBREDDITS = [
|
|
"cryptocurrency", "Bitcoin", "ethtrader", "CryptoMarkets",
|
|
"wallstreetbets", "stocks", "investing", "economy",
|
|
"solana", "defi", "polymarket"
|
|
]
|
|
|
|
SENTIMENT_POSITIVE = {"bullish", "moon", "pump", "gains", "breakout", "ath", "buy", "long",
|
|
"🚀", "💎", "accumulate", "undervalued", "rally"}
|
|
SENTIMENT_NEGATIVE = {"bearish", "dump", "crash", "sell", "short", "scam", "rug",
|
|
"overvalued", "bubble", "liquidat", "rekt", "fear"}
|
|
|
|
|
|
def fetch_json(url):
|
|
global last_request
|
|
elapsed = time.time() - last_request
|
|
if elapsed < RATE_LIMIT:
|
|
time.sleep(RATE_LIMIT - elapsed)
|
|
req = Request(url, headers={'User-Agent': USER_AGENT})
|
|
try:
|
|
with urlopen(req, timeout=15) as resp:
|
|
last_request = time.time()
|
|
return json.loads(resp.read().decode('utf-8'))
|
|
except (HTTPError, URLError) as e:
|
|
if hasattr(e, 'code') and e.code == 429:
|
|
time.sleep(10)
|
|
return fetch_json(url)
|
|
return None
|
|
|
|
|
|
def get_hot_posts(subreddit, limit=10):
|
|
data = fetch_json(f"https://www.reddit.com/r/{subreddit}/hot.json?limit={limit}")
|
|
if not data:
|
|
return []
|
|
posts = []
|
|
for child in data.get("data", {}).get("children", []):
|
|
d = child.get("data", {})
|
|
posts.append({
|
|
"title": d.get("title", ""),
|
|
"score": d.get("score", 0),
|
|
"comments": d.get("num_comments", 0),
|
|
"url": f"https://reddit.com{d.get('permalink', '')}",
|
|
"created": datetime.fromtimestamp(d.get("created_utc", 0), tz=timezone.utc).isoformat(),
|
|
"selftext": (d.get("selftext", "") or "")[:500],
|
|
"flair": d.get("link_flair_text", ""),
|
|
"subreddit": subreddit,
|
|
})
|
|
return posts
|
|
|
|
|
|
def analyze_sentiment(text):
|
|
words = set(text.lower().split())
|
|
pos = len(words & SENTIMENT_POSITIVE)
|
|
neg = len(words & SENTIMENT_NEGATIVE)
|
|
if pos > neg:
|
|
return "bullish", pos - neg
|
|
elif neg > pos:
|
|
return "bearish", neg - pos
|
|
return "neutral", 0
|
|
|
|
|
|
def extract_tickers(text):
|
|
"""Extract potential crypto/stock tickers ($BTC, $ETH, etc.)"""
|
|
return list(set(re.findall(r'\$([A-Z]{2,6})', text)))
|
|
|
|
|
|
def scan_subreddit(subreddit, limit=10):
|
|
print(f" 📡 r/{subreddit}...", end=" ", flush=True)
|
|
posts = get_hot_posts(subreddit, limit)
|
|
if not posts:
|
|
print("❌ failed")
|
|
return None
|
|
|
|
sentiments = []
|
|
all_tickers = []
|
|
high_engagement = []
|
|
|
|
for p in posts:
|
|
full_text = f"{p['title']} {p['selftext']}"
|
|
sent, strength = analyze_sentiment(full_text)
|
|
p["sentiment"] = sent
|
|
p["sentiment_strength"] = strength
|
|
sentiments.append(sent)
|
|
tickers = extract_tickers(full_text)
|
|
all_tickers.extend(tickers)
|
|
p["tickers"] = tickers
|
|
|
|
# High engagement = lots of comments relative to score
|
|
if p["comments"] > 50 or p["score"] > 500:
|
|
high_engagement.append(p)
|
|
|
|
sentiment_counts = Counter(sentiments)
|
|
ticker_counts = Counter(all_tickers)
|
|
|
|
overall = "neutral"
|
|
if sentiment_counts.get("bullish", 0) > sentiment_counts.get("bearish", 0):
|
|
overall = "bullish"
|
|
elif sentiment_counts.get("bearish", 0) > sentiment_counts.get("bullish", 0):
|
|
overall = "bearish"
|
|
|
|
print(f"✅ {len(posts)} posts | sentiment: {overall}")
|
|
|
|
return {
|
|
"subreddit": subreddit,
|
|
"posts": posts,
|
|
"overall_sentiment": overall,
|
|
"sentiment_breakdown": dict(sentiment_counts),
|
|
"trending_tickers": dict(ticker_counts.most_common(10)),
|
|
"high_engagement": high_engagement,
|
|
}
|
|
|
|
|
|
def search_topic(topic, limit=25):
|
|
"""Search Reddit for a specific topic across all subreddits."""
|
|
print(f"\n🔍 Searching Reddit for: '{topic}'")
|
|
data = fetch_json(f"https://www.reddit.com/search.json?q={topic}&sort=hot&limit={limit}")
|
|
if not data:
|
|
return []
|
|
|
|
posts = []
|
|
for child in data.get("data", {}).get("children", []):
|
|
d = child.get("data", {})
|
|
full_text = f"{d.get('title', '')} {d.get('selftext', '')[:300]}"
|
|
sent, strength = analyze_sentiment(full_text)
|
|
posts.append({
|
|
"title": d.get("title", ""),
|
|
"subreddit": d.get("subreddit", ""),
|
|
"score": d.get("score", 0),
|
|
"comments": d.get("num_comments", 0),
|
|
"sentiment": sent,
|
|
"tickers": extract_tickers(full_text),
|
|
"url": f"https://reddit.com{d.get('permalink', '')}",
|
|
})
|
|
return posts
|
|
|
|
|
|
def run_full_scan(quick=False):
|
|
print(f"\n🔎 Reddit Market Intel — {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
print("=" * 60)
|
|
|
|
limit = 5 if quick else 15
|
|
results = []
|
|
all_tickers = Counter()
|
|
sentiment_summary = Counter()
|
|
|
|
print("\n📊 Scanning subreddits:")
|
|
for sub in DEFAULT_SUBREDDITS:
|
|
result = scan_subreddit(sub, limit)
|
|
if result:
|
|
results.append(result)
|
|
all_tickers.update(result["trending_tickers"])
|
|
sentiment_summary[result["overall_sentiment"]] += 1
|
|
|
|
# Build report
|
|
report = {
|
|
"timestamp": datetime.now().isoformat(),
|
|
"subreddits_scanned": len(results),
|
|
"overall_market_sentiment": max(sentiment_summary, key=sentiment_summary.get) if sentiment_summary else "neutral",
|
|
"sentiment_by_sub": {r["subreddit"]: r["overall_sentiment"] for r in results},
|
|
"top_tickers": dict(all_tickers.most_common(20)),
|
|
"high_engagement_posts": [],
|
|
}
|
|
|
|
for r in results:
|
|
report["high_engagement_posts"].extend(r["high_engagement"][:3])
|
|
|
|
# Sort high engagement by comments
|
|
report["high_engagement_posts"].sort(key=lambda x: x["comments"], reverse=True)
|
|
report["high_engagement_posts"] = report["high_engagement_posts"][:15]
|
|
|
|
# Save
|
|
report_file = DATA_DIR / f"scan-{datetime.now().strftime('%Y%m%d-%H%M')}.json"
|
|
report_file.write_text(json.dumps(report, indent=2))
|
|
|
|
# Also save latest
|
|
(DATA_DIR / "latest.json").write_text(json.dumps(report, indent=2))
|
|
|
|
# Print summary
|
|
print(f"\n{'=' * 60}")
|
|
print(f"📈 Overall Market Sentiment: {report['overall_market_sentiment'].upper()}")
|
|
print(f"\n🏷️ Trending Tickers: {', '.join(f'${t}({c})' for t, c in all_tickers.most_common(10))}" if all_tickers else "")
|
|
|
|
if report["high_engagement_posts"]:
|
|
print(f"\n🔥 Top Engagement Posts:")
|
|
for p in report["high_engagement_posts"][:5]:
|
|
print(f" [{p['sentiment']}] r/{p['subreddit']} | ⬆{p['score']} 💬{p['comments']}")
|
|
print(f" {p['title'][:80]}")
|
|
|
|
print(f"\n💾 Report saved: {report_file}")
|
|
return report
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if "--topic" in sys.argv:
|
|
idx = sys.argv.index("--topic")
|
|
topic = sys.argv[idx + 1] if idx + 1 < len(sys.argv) else "crypto"
|
|
posts = search_topic(topic)
|
|
for p in posts[:10]:
|
|
print(f" [{p['sentiment']}] r/{p['subreddit']} ⬆{p['score']} 💬{p['comments']}")
|
|
print(f" {p['title'][:80]}")
|
|
elif "--quick" in sys.argv:
|
|
run_full_scan(quick=True)
|
|
else:
|
|
run_full_scan()
|