#!/usr/bin/env python3 """ news-feed - RSS reader and news aggregator for staying informed A simple RSS/Atom feed reader that: - Fetches and parses feeds - Stores articles locally - Generates digests - Tracks read/unread status """ import os import json import hashlib import xml.etree.ElementTree as ET from datetime import datetime from pathlib import Path from urllib.request import urlopen, Request from urllib.error import URLError from html.parser import HTMLParser import sys PROJECT_DIR = Path(__file__).parent DATA_DIR = PROJECT_DIR / "data" FEEDS_FILE = DATA_DIR / "feeds.json" ARTICLES_FILE = DATA_DIR / "articles.json" # Default feeds to get started DEFAULT_FEEDS = [ {"name": "Hacker News", "url": "https://hnrss.org/frontpage", "category": "tech"}, {"name": "Lobsters", "url": "https://lobste.rs/rss", "category": "tech"}, {"name": "r/programming", "url": "https://www.reddit.com/r/programming/.rss", "category": "tech"}, ] class MLStripper(HTMLParser): """Strip HTML tags from text.""" def __init__(self): super().__init__() self.text = [] def handle_data(self, d): self.text.append(d) def get_text(self): return ''.join(self.text) def strip_html(html: str) -> str: """Remove HTML tags from string.""" s = MLStripper() try: s.feed(html) return s.get_text() except: return html def load_feeds() -> list: """Load feed list.""" if FEEDS_FILE.exists(): with open(FEEDS_FILE) as f: return json.load(f) return DEFAULT_FEEDS def save_feeds(feeds: list): """Save feed list.""" DATA_DIR.mkdir(parents=True, exist_ok=True) with open(FEEDS_FILE, 'w') as f: json.dump(feeds, f, indent=2) def load_articles() -> dict: """Load cached articles.""" if ARTICLES_FILE.exists(): with open(ARTICLES_FILE) as f: return json.load(f) return {} def save_articles(articles: dict): """Save articles cache.""" DATA_DIR.mkdir(parents=True, exist_ok=True) with open(ARTICLES_FILE, 'w') as f: json.dump(articles, f, indent=2) def fetch_feed(url: str) -> str: """Fetch feed content.""" req = Request(url, headers={'User-Agent': 'news-feed/1.0'}) try: with urlopen(req, timeout=10) as resp: return resp.read().decode('utf-8', errors='ignore') except URLError as e: print(f" Error fetching: {e}") return None def parse_feed(content: str, feed_name: str) -> list: """Parse RSS/Atom feed content.""" articles = [] try: root = ET.fromstring(content) # Try RSS format for item in root.findall('.//item'): title = item.findtext('title', '') link = item.findtext('link', '') desc = item.findtext('description', '') pub_date = item.findtext('pubDate', '') if title and link: articles.append({ 'id': hashlib.md5(link.encode()).hexdigest()[:12], 'title': title.strip(), 'link': link.strip(), 'description': strip_html(desc)[:300] if desc else '', 'published': pub_date, 'feed': feed_name, 'fetched': datetime.now().isoformat(), 'read': False, }) # Try Atom format if no items found if not articles: ns = {'atom': 'http://www.w3.org/2005/Atom'} for entry in root.findall('.//atom:entry', ns) or root.findall('.//entry'): title = entry.findtext('atom:title', '', ns) or entry.findtext('title', '') link_elem = entry.find('atom:link', ns) or entry.find('link') link = link_elem.get('href', '') if link_elem is not None else '' summary = entry.findtext('atom:summary', '', ns) or entry.findtext('summary', '') if title and link: articles.append({ 'id': hashlib.md5(link.encode()).hexdigest()[:12], 'title': title.strip(), 'link': link.strip(), 'description': strip_html(summary)[:300] if summary else '', 'published': '', 'feed': feed_name, 'fetched': datetime.now().isoformat(), 'read': False, }) except ET.ParseError as e: print(f" Parse error: {e}") return articles def refresh(): """Refresh all feeds.""" feeds = load_feeds() articles = load_articles() new_count = 0 print(f"Refreshing {len(feeds)} feeds...\n") for feed in feeds: print(f" {feed['name']}...", end=' ', flush=True) content = fetch_feed(feed['url']) if content: items = parse_feed(content, feed['name']) for item in items: if item['id'] not in articles: articles[item['id']] = item new_count += 1 print(f"{len(items)} items") else: print("failed") save_articles(articles) print(f"\nāœ“ {new_count} new articles") def list_articles(limit: int = 20, unread_only: bool = False, feed: str = None): """List articles.""" articles = load_articles() items = list(articles.values()) if unread_only: items = [a for a in items if not a.get('read')] if feed: items = [a for a in items if feed.lower() in a['feed'].lower()] # Sort by fetched date, newest first items.sort(key=lambda x: x.get('fetched', ''), reverse=True) items = items[:limit] if not items: print("No articles found") return print(f"\nšŸ“° Articles ({len(items)} shown)\n") for item in items: status = " " if item.get('read') else "šŸ”µ" title = item['title'][:60] + "..." if len(item['title']) > 60 else item['title'] print(f"{status} [{item['id']}] {title}") print(f" {item['feed']} | {item['link'][:50]}...") print() def read_article(article_id: str): """Mark article as read and show details.""" articles = load_articles() # Find by prefix matches = [a for aid, a in articles.items() if aid.startswith(article_id)] if not matches: print(f"Article not found: {article_id}") return article = matches[0] article['read'] = True articles[article['id']] = article save_articles(articles) print(f"\nšŸ“„ {article['title']}") print(f" Feed: {article['feed']}") print(f" Link: {article['link']}") print() if article.get('description'): print(f" {article['description']}") print() def add_feed(url: str, name: str = None, category: str = "general"): """Add a new feed.""" feeds = load_feeds() # Check if already exists if any(f['url'] == url for f in feeds): print("Feed already exists") return feeds.append({ 'name': name or url, 'url': url, 'category': category, }) save_feeds(feeds) print(f"āœ“ Added: {name or url}") def list_feeds(): """List configured feeds.""" feeds = load_feeds() print(f"\nšŸ“” Feeds ({len(feeds)})\n") for feed in feeds: print(f" [{feed.get('category', 'general')}] {feed['name']}") print(f" {feed['url']}") print() def digest(): """Generate a quick digest of unread articles.""" articles = load_articles() unread = [a for a in articles.values() if not a.get('read')] # Group by feed by_feed = {} for a in unread: feed = a['feed'] if feed not in by_feed: by_feed[feed] = [] by_feed[feed].append(a) print(f"\nšŸ“° News Digest - {datetime.now().strftime('%Y-%m-%d %H:%M')}") print(f" {len(unread)} unread articles\n") for feed, items in sorted(by_feed.items()): print(f"šŸ“Œ {feed} ({len(items)})") for item in items[:3]: title = item['title'][:50] + "..." if len(item['title']) > 50 else item['title'] print(f" • {title}") if len(items) > 3: print(f" ... and {len(items) - 3} more") print() def main(): if len(sys.argv) < 2: print("Usage:") print(" news-feed refresh - Fetch new articles") print(" news-feed list [--unread] - List articles") print(" news-feed read - Read an article") print(" news-feed digest - Quick digest") print(" news-feed feeds - List feeds") print(" news-feed add [name] - Add a feed") return cmd = sys.argv[1] if cmd == 'refresh': refresh() elif cmd == 'list': unread = '--unread' in sys.argv list_articles(unread_only=unread) elif cmd == 'read' and len(sys.argv) > 2: read_article(sys.argv[2]) elif cmd == 'digest': digest() elif cmd == 'feeds': list_feeds() elif cmd == 'add' and len(sys.argv) > 2: url = sys.argv[2] name = sys.argv[3] if len(sys.argv) > 3 else None add_feed(url, name) else: print("Unknown command") if __name__ == "__main__": main()