#!/usr/bin/env python3
"""
news-feed - RSS reader and news aggregator for staying informed

A simple RSS/Atom feed reader that:
- Fetches and parses feeds
- Stores articles locally
- Generates digests
- Tracks read/unread status
"""

import os
import json
import hashlib
import xml.etree.ElementTree as ET
from datetime import datetime
from pathlib import Path
from urllib.request import urlopen, Request
from urllib.error import URLError
from html.parser import HTMLParser
import sys

PROJECT_DIR = Path(__file__).parent
DATA_DIR = PROJECT_DIR / "data"
FEEDS_FILE = DATA_DIR / "feeds.json"
ARTICLES_FILE = DATA_DIR / "articles.json"

# Default feeds to get started
DEFAULT_FEEDS = [
    {"name": "Hacker News", "url": "https://hnrss.org/frontpage", "category": "tech"},
    {"name": "Lobsters", "url": "https://lobste.rs/rss", "category": "tech"},
    {"name": "r/programming", "url": "https://www.reddit.com/r/programming/.rss", "category": "tech"},
]

class MLStripper(HTMLParser):
    """Strip HTML tags from text."""
    def __init__(self):
        super().__init__()
        self.text = []
    def handle_data(self, d):
        self.text.append(d)
    def get_text(self):
        return ''.join(self.text)

def strip_html(html: str) -> str:
    """Remove HTML tags from string."""
    s = MLStripper()
    try:
        s.feed(html)
        return s.get_text()
    except:
        return html

def load_feeds() -> list:
    """Load feed list."""
    if FEEDS_FILE.exists():
        with open(FEEDS_FILE) as f:
            return json.load(f)
    return DEFAULT_FEEDS

def save_feeds(feeds: list):
    """Save feed list."""
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    with open(FEEDS_FILE, 'w') as f:
        json.dump(feeds, f, indent=2)

def load_articles() -> dict:
    """Load cached articles."""
    if ARTICLES_FILE.exists():
        with open(ARTICLES_FILE) as f:
            return json.load(f)
    return {}

def save_articles(articles: dict):
    """Save articles cache."""
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    with open(ARTICLES_FILE, 'w') as f:
        json.dump(articles, f, indent=2)

def fetch_feed(url: str) -> str:
    """Fetch feed content."""
    req = Request(url, headers={'User-Agent': 'news-feed/1.0'})
    try:
        with urlopen(req, timeout=10) as resp:
            return resp.read().decode('utf-8', errors='ignore')
    except URLError as e:
        print(f"  Error fetching: {e}")
        return None

def parse_feed(content: str, feed_name: str) -> list:
    """Parse RSS/Atom feed content."""
    articles = []
    try:
        root = ET.fromstring(content)
        
        # Try RSS format
        for item in root.findall('.//item'):
            title = item.findtext('title', '')
            link = item.findtext('link', '')
            desc = item.findtext('description', '')
            pub_date = item.findtext('pubDate', '')
            
            if title and link:
                articles.append({
                    'id': hashlib.md5(link.encode()).hexdigest()[:12],
                    'title': title.strip(),
                    'link': link.strip(),
                    'description': strip_html(desc)[:300] if desc else '',
                    'published': pub_date,
                    'feed': feed_name,
                    'fetched': datetime.now().isoformat(),
                    'read': False,
                })
        
        # Try Atom format if no items found
        if not articles:
            ns = {'atom': 'http://www.w3.org/2005/Atom'}
            for entry in root.findall('.//atom:entry', ns) or root.findall('.//entry'):
                title = entry.findtext('atom:title', '', ns) or entry.findtext('title', '')
                link_elem = entry.find('atom:link', ns) or entry.find('link')
                link = link_elem.get('href', '') if link_elem is not None else ''
                summary = entry.findtext('atom:summary', '', ns) or entry.findtext('summary', '')
                
                if title and link:
                    articles.append({
                        'id': hashlib.md5(link.encode()).hexdigest()[:12],
                        'title': title.strip(),
                        'link': link.strip(),
                        'description': strip_html(summary)[:300] if summary else '',
                        'published': '',
                        'feed': feed_name,
                        'fetched': datetime.now().isoformat(),
                        'read': False,
                    })
    except ET.ParseError as e:
        print(f"  Parse error: {e}")
    
    return articles

def refresh():
    """Refresh all feeds."""
    feeds = load_feeds()
    articles = load_articles()
    new_count = 0
    
    print(f"Refreshing {len(feeds)} feeds...\n")
    
    for feed in feeds:
        print(f"  {feed['name']}...", end=' ', flush=True)
        content = fetch_feed(feed['url'])
        
        if content:
            items = parse_feed(content, feed['name'])
            for item in items:
                if item['id'] not in articles:
                    articles[item['id']] = item
                    new_count += 1
            print(f"{len(items)} items")
        else:
            print("failed")
    
    save_articles(articles)
    print(f"\n✓ {new_count} new articles")

def list_articles(limit: int = 20, unread_only: bool = False, feed: str = None):
    """List articles."""
    articles = load_articles()
    
    items = list(articles.values())
    
    if unread_only:
        items = [a for a in items if not a.get('read')]
    
    if feed:
        items = [a for a in items if feed.lower() in a['feed'].lower()]
    
    # Sort by fetched date, newest first
    items.sort(key=lambda x: x.get('fetched', ''), reverse=True)
    items = items[:limit]
    
    if not items:
        print("No articles found")
        return
    
    print(f"\n📰 Articles ({len(items)} shown)\n")
    
    for item in items:
        status = "  " if item.get('read') else "🔵"
        title = item['title'][:60] + "..." if len(item['title']) > 60 else item['title']
        print(f"{status} [{item['id']}] {title}")
        print(f"   {item['feed']} | {item['link'][:50]}...")
        print()

def read_article(article_id: str):
    """Mark article as read and show details."""
    articles = load_articles()
    
    # Find by prefix
    matches = [a for aid, a in articles.items() if aid.startswith(article_id)]
    
    if not matches:
        print(f"Article not found: {article_id}")
        return
    
    article = matches[0]
    article['read'] = True
    articles[article['id']] = article
    save_articles(articles)
    
    print(f"\n📄 {article['title']}")
    print(f"   Feed: {article['feed']}")
    print(f"   Link: {article['link']}")
    print()
    if article.get('description'):
        print(f"   {article['description']}")
    print()

def add_feed(url: str, name: str = None, category: str = "general"):
    """Add a new feed."""
    feeds = load_feeds()
    
    # Check if already exists
    if any(f['url'] == url for f in feeds):
        print("Feed already exists")
        return
    
    feeds.append({
        'name': name or url,
        'url': url,
        'category': category,
    })
    save_feeds(feeds)
    print(f"✓ Added: {name or url}")

def list_feeds():
    """List configured feeds."""
    feeds = load_feeds()
    print(f"\n📡 Feeds ({len(feeds)})\n")
    for feed in feeds:
        print(f"  [{feed.get('category', 'general')}] {feed['name']}")
        print(f"      {feed['url']}")
        print()

def digest():
    """Generate a quick digest of unread articles."""
    articles = load_articles()
    unread = [a for a in articles.values() if not a.get('read')]
    
    # Group by feed
    by_feed = {}
    for a in unread:
        feed = a['feed']
        if feed not in by_feed:
            by_feed[feed] = []
        by_feed[feed].append(a)
    
    print(f"\n📰 News Digest - {datetime.now().strftime('%Y-%m-%d %H:%M')}")
    print(f"   {len(unread)} unread articles\n")
    
    for feed, items in sorted(by_feed.items()):
        print(f"📌 {feed} ({len(items)})")
        for item in items[:3]:
            title = item['title'][:50] + "..." if len(item['title']) > 50 else item['title']
            print(f"   • {title}")
        if len(items) > 3:
            print(f"   ... and {len(items) - 3} more")
        print()

def main():
    if len(sys.argv) < 2:
        print("Usage:")
        print("  news-feed refresh           - Fetch new articles")
        print("  news-feed list [--unread]   - List articles")
        print("  news-feed read <id>         - Read an article")
        print("  news-feed digest            - Quick digest")
        print("  news-feed feeds             - List feeds")
        print("  news-feed add <url> [name]  - Add a feed")
        return
    
    cmd = sys.argv[1]
    
    if cmd == 'refresh':
        refresh()
    elif cmd == 'list':
        unread = '--unread' in sys.argv
        list_articles(unread_only=unread)
    elif cmd == 'read' and len(sys.argv) > 2:
        read_article(sys.argv[2])
    elif cmd == 'digest':
        digest()
    elif cmd == 'feeds':
        list_feeds()
    elif cmd == 'add' and len(sys.argv) > 2:
        url = sys.argv[2]
        name = sys.argv[3] if len(sys.argv) > 3 else None
        add_feed(url, name)
    else:
        print("Unknown command")

if __name__ == "__main__":
    main()