Add news-feed project - RSS/Atom feed reader - Supports multiple feeds (HN, Lobsters, etc.) - Article caching and read tracking - Digest generation - Works out of the box
This commit is contained in:
301
projects/news-feed/main.py
Executable file
301
projects/news-feed/main.py
Executable file
@ -0,0 +1,301 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
news-feed - RSS reader and news aggregator for staying informed
|
||||
|
||||
A simple RSS/Atom feed reader that:
|
||||
- Fetches and parses feeds
|
||||
- Stores articles locally
|
||||
- Generates digests
|
||||
- Tracks read/unread status
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from urllib.request import urlopen, Request
|
||||
from urllib.error import URLError
|
||||
from html.parser import HTMLParser
|
||||
import sys
|
||||
|
||||
PROJECT_DIR = Path(__file__).parent
|
||||
DATA_DIR = PROJECT_DIR / "data"
|
||||
FEEDS_FILE = DATA_DIR / "feeds.json"
|
||||
ARTICLES_FILE = DATA_DIR / "articles.json"
|
||||
|
||||
# Default feeds to get started
|
||||
DEFAULT_FEEDS = [
|
||||
{"name": "Hacker News", "url": "https://hnrss.org/frontpage", "category": "tech"},
|
||||
{"name": "Lobsters", "url": "https://lobste.rs/rss", "category": "tech"},
|
||||
{"name": "r/programming", "url": "https://www.reddit.com/r/programming/.rss", "category": "tech"},
|
||||
]
|
||||
|
||||
class MLStripper(HTMLParser):
|
||||
"""Strip HTML tags from text."""
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.text = []
|
||||
def handle_data(self, d):
|
||||
self.text.append(d)
|
||||
def get_text(self):
|
||||
return ''.join(self.text)
|
||||
|
||||
def strip_html(html: str) -> str:
|
||||
"""Remove HTML tags from string."""
|
||||
s = MLStripper()
|
||||
try:
|
||||
s.feed(html)
|
||||
return s.get_text()
|
||||
except:
|
||||
return html
|
||||
|
||||
def load_feeds() -> list:
|
||||
"""Load feed list."""
|
||||
if FEEDS_FILE.exists():
|
||||
with open(FEEDS_FILE) as f:
|
||||
return json.load(f)
|
||||
return DEFAULT_FEEDS
|
||||
|
||||
def save_feeds(feeds: list):
|
||||
"""Save feed list."""
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with open(FEEDS_FILE, 'w') as f:
|
||||
json.dump(feeds, f, indent=2)
|
||||
|
||||
def load_articles() -> dict:
|
||||
"""Load cached articles."""
|
||||
if ARTICLES_FILE.exists():
|
||||
with open(ARTICLES_FILE) as f:
|
||||
return json.load(f)
|
||||
return {}
|
||||
|
||||
def save_articles(articles: dict):
|
||||
"""Save articles cache."""
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with open(ARTICLES_FILE, 'w') as f:
|
||||
json.dump(articles, f, indent=2)
|
||||
|
||||
def fetch_feed(url: str) -> str:
|
||||
"""Fetch feed content."""
|
||||
req = Request(url, headers={'User-Agent': 'news-feed/1.0'})
|
||||
try:
|
||||
with urlopen(req, timeout=10) as resp:
|
||||
return resp.read().decode('utf-8', errors='ignore')
|
||||
except URLError as e:
|
||||
print(f" Error fetching: {e}")
|
||||
return None
|
||||
|
||||
def parse_feed(content: str, feed_name: str) -> list:
|
||||
"""Parse RSS/Atom feed content."""
|
||||
articles = []
|
||||
try:
|
||||
root = ET.fromstring(content)
|
||||
|
||||
# Try RSS format
|
||||
for item in root.findall('.//item'):
|
||||
title = item.findtext('title', '')
|
||||
link = item.findtext('link', '')
|
||||
desc = item.findtext('description', '')
|
||||
pub_date = item.findtext('pubDate', '')
|
||||
|
||||
if title and link:
|
||||
articles.append({
|
||||
'id': hashlib.md5(link.encode()).hexdigest()[:12],
|
||||
'title': title.strip(),
|
||||
'link': link.strip(),
|
||||
'description': strip_html(desc)[:300] if desc else '',
|
||||
'published': pub_date,
|
||||
'feed': feed_name,
|
||||
'fetched': datetime.now().isoformat(),
|
||||
'read': False,
|
||||
})
|
||||
|
||||
# Try Atom format if no items found
|
||||
if not articles:
|
||||
ns = {'atom': 'http://www.w3.org/2005/Atom'}
|
||||
for entry in root.findall('.//atom:entry', ns) or root.findall('.//entry'):
|
||||
title = entry.findtext('atom:title', '', ns) or entry.findtext('title', '')
|
||||
link_elem = entry.find('atom:link', ns) or entry.find('link')
|
||||
link = link_elem.get('href', '') if link_elem is not None else ''
|
||||
summary = entry.findtext('atom:summary', '', ns) or entry.findtext('summary', '')
|
||||
|
||||
if title and link:
|
||||
articles.append({
|
||||
'id': hashlib.md5(link.encode()).hexdigest()[:12],
|
||||
'title': title.strip(),
|
||||
'link': link.strip(),
|
||||
'description': strip_html(summary)[:300] if summary else '',
|
||||
'published': '',
|
||||
'feed': feed_name,
|
||||
'fetched': datetime.now().isoformat(),
|
||||
'read': False,
|
||||
})
|
||||
except ET.ParseError as e:
|
||||
print(f" Parse error: {e}")
|
||||
|
||||
return articles
|
||||
|
||||
def refresh():
|
||||
"""Refresh all feeds."""
|
||||
feeds = load_feeds()
|
||||
articles = load_articles()
|
||||
new_count = 0
|
||||
|
||||
print(f"Refreshing {len(feeds)} feeds...\n")
|
||||
|
||||
for feed in feeds:
|
||||
print(f" {feed['name']}...", end=' ', flush=True)
|
||||
content = fetch_feed(feed['url'])
|
||||
|
||||
if content:
|
||||
items = parse_feed(content, feed['name'])
|
||||
for item in items:
|
||||
if item['id'] not in articles:
|
||||
articles[item['id']] = item
|
||||
new_count += 1
|
||||
print(f"{len(items)} items")
|
||||
else:
|
||||
print("failed")
|
||||
|
||||
save_articles(articles)
|
||||
print(f"\n✓ {new_count} new articles")
|
||||
|
||||
def list_articles(limit: int = 20, unread_only: bool = False, feed: str = None):
|
||||
"""List articles."""
|
||||
articles = load_articles()
|
||||
|
||||
items = list(articles.values())
|
||||
|
||||
if unread_only:
|
||||
items = [a for a in items if not a.get('read')]
|
||||
|
||||
if feed:
|
||||
items = [a for a in items if feed.lower() in a['feed'].lower()]
|
||||
|
||||
# Sort by fetched date, newest first
|
||||
items.sort(key=lambda x: x.get('fetched', ''), reverse=True)
|
||||
items = items[:limit]
|
||||
|
||||
if not items:
|
||||
print("No articles found")
|
||||
return
|
||||
|
||||
print(f"\n📰 Articles ({len(items)} shown)\n")
|
||||
|
||||
for item in items:
|
||||
status = " " if item.get('read') else "🔵"
|
||||
title = item['title'][:60] + "..." if len(item['title']) > 60 else item['title']
|
||||
print(f"{status} [{item['id']}] {title}")
|
||||
print(f" {item['feed']} | {item['link'][:50]}...")
|
||||
print()
|
||||
|
||||
def read_article(article_id: str):
|
||||
"""Mark article as read and show details."""
|
||||
articles = load_articles()
|
||||
|
||||
# Find by prefix
|
||||
matches = [a for aid, a in articles.items() if aid.startswith(article_id)]
|
||||
|
||||
if not matches:
|
||||
print(f"Article not found: {article_id}")
|
||||
return
|
||||
|
||||
article = matches[0]
|
||||
article['read'] = True
|
||||
articles[article['id']] = article
|
||||
save_articles(articles)
|
||||
|
||||
print(f"\n📄 {article['title']}")
|
||||
print(f" Feed: {article['feed']}")
|
||||
print(f" Link: {article['link']}")
|
||||
print()
|
||||
if article.get('description'):
|
||||
print(f" {article['description']}")
|
||||
print()
|
||||
|
||||
def add_feed(url: str, name: str = None, category: str = "general"):
|
||||
"""Add a new feed."""
|
||||
feeds = load_feeds()
|
||||
|
||||
# Check if already exists
|
||||
if any(f['url'] == url for f in feeds):
|
||||
print("Feed already exists")
|
||||
return
|
||||
|
||||
feeds.append({
|
||||
'name': name or url,
|
||||
'url': url,
|
||||
'category': category,
|
||||
})
|
||||
save_feeds(feeds)
|
||||
print(f"✓ Added: {name or url}")
|
||||
|
||||
def list_feeds():
|
||||
"""List configured feeds."""
|
||||
feeds = load_feeds()
|
||||
print(f"\n📡 Feeds ({len(feeds)})\n")
|
||||
for feed in feeds:
|
||||
print(f" [{feed.get('category', 'general')}] {feed['name']}")
|
||||
print(f" {feed['url']}")
|
||||
print()
|
||||
|
||||
def digest():
|
||||
"""Generate a quick digest of unread articles."""
|
||||
articles = load_articles()
|
||||
unread = [a for a in articles.values() if not a.get('read')]
|
||||
|
||||
# Group by feed
|
||||
by_feed = {}
|
||||
for a in unread:
|
||||
feed = a['feed']
|
||||
if feed not in by_feed:
|
||||
by_feed[feed] = []
|
||||
by_feed[feed].append(a)
|
||||
|
||||
print(f"\n📰 News Digest - {datetime.now().strftime('%Y-%m-%d %H:%M')}")
|
||||
print(f" {len(unread)} unread articles\n")
|
||||
|
||||
for feed, items in sorted(by_feed.items()):
|
||||
print(f"📌 {feed} ({len(items)})")
|
||||
for item in items[:3]:
|
||||
title = item['title'][:50] + "..." if len(item['title']) > 50 else item['title']
|
||||
print(f" • {title}")
|
||||
if len(items) > 3:
|
||||
print(f" ... and {len(items) - 3} more")
|
||||
print()
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage:")
|
||||
print(" news-feed refresh - Fetch new articles")
|
||||
print(" news-feed list [--unread] - List articles")
|
||||
print(" news-feed read <id> - Read an article")
|
||||
print(" news-feed digest - Quick digest")
|
||||
print(" news-feed feeds - List feeds")
|
||||
print(" news-feed add <url> [name] - Add a feed")
|
||||
return
|
||||
|
||||
cmd = sys.argv[1]
|
||||
|
||||
if cmd == 'refresh':
|
||||
refresh()
|
||||
elif cmd == 'list':
|
||||
unread = '--unread' in sys.argv
|
||||
list_articles(unread_only=unread)
|
||||
elif cmd == 'read' and len(sys.argv) > 2:
|
||||
read_article(sys.argv[2])
|
||||
elif cmd == 'digest':
|
||||
digest()
|
||||
elif cmd == 'feeds':
|
||||
list_feeds()
|
||||
elif cmd == 'add' and len(sys.argv) > 2:
|
||||
url = sys.argv[2]
|
||||
name = sys.argv[3] if len(sys.argv) > 3 else None
|
||||
add_feed(url, name)
|
||||
else:
|
||||
print("Unknown command")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user