Add news-feed project - RSS/Atom feed reader - Supports multiple feeds (HN, Lobsters, etc.) - Article caching and read tracking - Digest generation - Works out of the box

This commit is contained in:
2026-01-30 23:20:21 -06:00
parent b579251e90
commit 25c64a2b8e
5 changed files with 768 additions and 0 deletions

301
projects/news-feed/main.py Executable file
View File

@ -0,0 +1,301 @@
#!/usr/bin/env python3
"""
news-feed - RSS reader and news aggregator for staying informed
A simple RSS/Atom feed reader that:
- Fetches and parses feeds
- Stores articles locally
- Generates digests
- Tracks read/unread status
"""
import os
import json
import hashlib
import xml.etree.ElementTree as ET
from datetime import datetime
from pathlib import Path
from urllib.request import urlopen, Request
from urllib.error import URLError
from html.parser import HTMLParser
import sys
PROJECT_DIR = Path(__file__).parent
DATA_DIR = PROJECT_DIR / "data"
FEEDS_FILE = DATA_DIR / "feeds.json"
ARTICLES_FILE = DATA_DIR / "articles.json"
# Default feeds to get started
DEFAULT_FEEDS = [
{"name": "Hacker News", "url": "https://hnrss.org/frontpage", "category": "tech"},
{"name": "Lobsters", "url": "https://lobste.rs/rss", "category": "tech"},
{"name": "r/programming", "url": "https://www.reddit.com/r/programming/.rss", "category": "tech"},
]
class MLStripper(HTMLParser):
"""Strip HTML tags from text."""
def __init__(self):
super().__init__()
self.text = []
def handle_data(self, d):
self.text.append(d)
def get_text(self):
return ''.join(self.text)
def strip_html(html: str) -> str:
"""Remove HTML tags from string."""
s = MLStripper()
try:
s.feed(html)
return s.get_text()
except:
return html
def load_feeds() -> list:
"""Load feed list."""
if FEEDS_FILE.exists():
with open(FEEDS_FILE) as f:
return json.load(f)
return DEFAULT_FEEDS
def save_feeds(feeds: list):
"""Save feed list."""
DATA_DIR.mkdir(parents=True, exist_ok=True)
with open(FEEDS_FILE, 'w') as f:
json.dump(feeds, f, indent=2)
def load_articles() -> dict:
"""Load cached articles."""
if ARTICLES_FILE.exists():
with open(ARTICLES_FILE) as f:
return json.load(f)
return {}
def save_articles(articles: dict):
"""Save articles cache."""
DATA_DIR.mkdir(parents=True, exist_ok=True)
with open(ARTICLES_FILE, 'w') as f:
json.dump(articles, f, indent=2)
def fetch_feed(url: str) -> str:
"""Fetch feed content."""
req = Request(url, headers={'User-Agent': 'news-feed/1.0'})
try:
with urlopen(req, timeout=10) as resp:
return resp.read().decode('utf-8', errors='ignore')
except URLError as e:
print(f" Error fetching: {e}")
return None
def parse_feed(content: str, feed_name: str) -> list:
"""Parse RSS/Atom feed content."""
articles = []
try:
root = ET.fromstring(content)
# Try RSS format
for item in root.findall('.//item'):
title = item.findtext('title', '')
link = item.findtext('link', '')
desc = item.findtext('description', '')
pub_date = item.findtext('pubDate', '')
if title and link:
articles.append({
'id': hashlib.md5(link.encode()).hexdigest()[:12],
'title': title.strip(),
'link': link.strip(),
'description': strip_html(desc)[:300] if desc else '',
'published': pub_date,
'feed': feed_name,
'fetched': datetime.now().isoformat(),
'read': False,
})
# Try Atom format if no items found
if not articles:
ns = {'atom': 'http://www.w3.org/2005/Atom'}
for entry in root.findall('.//atom:entry', ns) or root.findall('.//entry'):
title = entry.findtext('atom:title', '', ns) or entry.findtext('title', '')
link_elem = entry.find('atom:link', ns) or entry.find('link')
link = link_elem.get('href', '') if link_elem is not None else ''
summary = entry.findtext('atom:summary', '', ns) or entry.findtext('summary', '')
if title and link:
articles.append({
'id': hashlib.md5(link.encode()).hexdigest()[:12],
'title': title.strip(),
'link': link.strip(),
'description': strip_html(summary)[:300] if summary else '',
'published': '',
'feed': feed_name,
'fetched': datetime.now().isoformat(),
'read': False,
})
except ET.ParseError as e:
print(f" Parse error: {e}")
return articles
def refresh():
"""Refresh all feeds."""
feeds = load_feeds()
articles = load_articles()
new_count = 0
print(f"Refreshing {len(feeds)} feeds...\n")
for feed in feeds:
print(f" {feed['name']}...", end=' ', flush=True)
content = fetch_feed(feed['url'])
if content:
items = parse_feed(content, feed['name'])
for item in items:
if item['id'] not in articles:
articles[item['id']] = item
new_count += 1
print(f"{len(items)} items")
else:
print("failed")
save_articles(articles)
print(f"\n{new_count} new articles")
def list_articles(limit: int = 20, unread_only: bool = False, feed: str = None):
"""List articles."""
articles = load_articles()
items = list(articles.values())
if unread_only:
items = [a for a in items if not a.get('read')]
if feed:
items = [a for a in items if feed.lower() in a['feed'].lower()]
# Sort by fetched date, newest first
items.sort(key=lambda x: x.get('fetched', ''), reverse=True)
items = items[:limit]
if not items:
print("No articles found")
return
print(f"\n📰 Articles ({len(items)} shown)\n")
for item in items:
status = " " if item.get('read') else "🔵"
title = item['title'][:60] + "..." if len(item['title']) > 60 else item['title']
print(f"{status} [{item['id']}] {title}")
print(f" {item['feed']} | {item['link'][:50]}...")
print()
def read_article(article_id: str):
"""Mark article as read and show details."""
articles = load_articles()
# Find by prefix
matches = [a for aid, a in articles.items() if aid.startswith(article_id)]
if not matches:
print(f"Article not found: {article_id}")
return
article = matches[0]
article['read'] = True
articles[article['id']] = article
save_articles(articles)
print(f"\n📄 {article['title']}")
print(f" Feed: {article['feed']}")
print(f" Link: {article['link']}")
print()
if article.get('description'):
print(f" {article['description']}")
print()
def add_feed(url: str, name: str = None, category: str = "general"):
"""Add a new feed."""
feeds = load_feeds()
# Check if already exists
if any(f['url'] == url for f in feeds):
print("Feed already exists")
return
feeds.append({
'name': name or url,
'url': url,
'category': category,
})
save_feeds(feeds)
print(f"✓ Added: {name or url}")
def list_feeds():
"""List configured feeds."""
feeds = load_feeds()
print(f"\n📡 Feeds ({len(feeds)})\n")
for feed in feeds:
print(f" [{feed.get('category', 'general')}] {feed['name']}")
print(f" {feed['url']}")
print()
def digest():
"""Generate a quick digest of unread articles."""
articles = load_articles()
unread = [a for a in articles.values() if not a.get('read')]
# Group by feed
by_feed = {}
for a in unread:
feed = a['feed']
if feed not in by_feed:
by_feed[feed] = []
by_feed[feed].append(a)
print(f"\n📰 News Digest - {datetime.now().strftime('%Y-%m-%d %H:%M')}")
print(f" {len(unread)} unread articles\n")
for feed, items in sorted(by_feed.items()):
print(f"📌 {feed} ({len(items)})")
for item in items[:3]:
title = item['title'][:50] + "..." if len(item['title']) > 50 else item['title']
print(f"{title}")
if len(items) > 3:
print(f" ... and {len(items) - 3} more")
print()
def main():
if len(sys.argv) < 2:
print("Usage:")
print(" news-feed refresh - Fetch new articles")
print(" news-feed list [--unread] - List articles")
print(" news-feed read <id> - Read an article")
print(" news-feed digest - Quick digest")
print(" news-feed feeds - List feeds")
print(" news-feed add <url> [name] - Add a feed")
return
cmd = sys.argv[1]
if cmd == 'refresh':
refresh()
elif cmd == 'list':
unread = '--unread' in sys.argv
list_articles(unread_only=unread)
elif cmd == 'read' and len(sys.argv) > 2:
read_article(sys.argv[2])
elif cmd == 'digest':
digest()
elif cmd == 'feeds':
list_feeds()
elif cmd == 'add' and len(sys.argv) > 2:
url = sys.argv[2]
name = sys.argv[3] if len(sys.argv) > 3 else None
add_feed(url, name)
else:
print("Unknown command")
if __name__ == "__main__":
main()