From 25c64a2b8e9432b36b1aa92118c8256d7fed57b8 Mon Sep 17 00:00:00 2001 From: Case Date: Fri, 30 Jan 2026 23:20:21 -0600 Subject: [PATCH] Add news-feed project - RSS/Atom feed reader - Supports multiple feeds (HN, Lobsters, etc.) - Article caching and read tracking - Digest generation - Works out of the box --- projects/news-feed/.gitignore | 7 + projects/news-feed/README.md | 17 + projects/news-feed/data/articles.json | 442 ++++++++++++++++++++++++++ projects/news-feed/main.py | 301 ++++++++++++++++++ projects/news-feed/requirements.txt | 1 + 5 files changed, 768 insertions(+) create mode 100644 projects/news-feed/.gitignore create mode 100644 projects/news-feed/README.md create mode 100644 projects/news-feed/data/articles.json create mode 100755 projects/news-feed/main.py create mode 100644 projects/news-feed/requirements.txt diff --git a/projects/news-feed/.gitignore b/projects/news-feed/.gitignore new file mode 100644 index 0000000..151af41 --- /dev/null +++ b/projects/news-feed/.gitignore @@ -0,0 +1,7 @@ +venv/ +__pycache__/ +*.pyc +.env +*.egg-info/ +dist/ +build/ diff --git a/projects/news-feed/README.md b/projects/news-feed/README.md new file mode 100644 index 0000000..f963e68 --- /dev/null +++ b/projects/news-feed/README.md @@ -0,0 +1,17 @@ +# news-feed + +RSS reader and news aggregator for staying informed + +## Setup + +```bash +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## Usage + +```bash +python main.py +``` diff --git a/projects/news-feed/data/articles.json b/projects/news-feed/data/articles.json new file mode 100644 index 0000000..7b74795 --- /dev/null +++ b/projects/news-feed/data/articles.json @@ -0,0 +1,442 @@ +{ + "99a8fa2246d9": { + "id": "99a8fa2246d9", + "title": "Show HN: I trained a 9M speech model to fix my Mandarin tones", + "link": "https://simedw.com/2026/01/31/ear-pronunication-via-ctc/", + "description": "\nBuilt this because tones are killing my spoken Mandarin and I can't reliably hear my own mistakes.It's a 9M Conformer-CTC model trained on ~300h (AISHELL + Primewords), quantized to INT8 (11 MB), runs 100% in-browser via ONNX Runtime Web.Grades per-syllable pronunciation + tones with Viterbi forced", + "published": "Sat, 31 Jan 2026 00:51:27 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.364801", + "read": false + }, + "1c37e9a95d81": { + "id": "1c37e9a95d81", + "title": "Show HN: Foundry \u2013 Turns your repeated workflows into one-click commands", + "link": "https://github.com/lekt9/openclaw-foundry", + "description": "\nArticle URL: https://github.com/lekt9/openclaw-foundry\nComments URL: https://news.ycombinator.com/item?id=46831978\nPoints: 9\n# Comments: 1\n", + "published": "Sat, 31 Jan 2026 00:40:51 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.364879", + "read": false + }, + "74dd8335bb84": { + "id": "74dd8335bb84", + "title": "175K+ publicly-exposed Ollama AI instances discovered", + "link": "https://www.techradar.com/pro/security/over-175-000-publicly-exposed-ollama-ai-servers-discovered-worldwide-so-fix-now", + "description": "\nArticle URL: https://www.techradar.com/pro/security/over-175-000-publicly-exposed-ollama-ai-servers-discovered-worldwide-so-fix-now\nComments URL: https://news.ycombinator.com/item?id=46831784\nPoints: 38\n# Comments: 25\n", + "published": "Sat, 31 Jan 2026 00:12:22 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.364978", + "read": false + }, + "9329b434bb69": { + "id": "9329b434bb69", + "title": "The $100B megadeal between OpenAI and Nvidia is on ice", + "link": "https://www.wsj.com/tech/ai/the-100-billion-megadeal-between-openai-and-nvidia-is-on-ice-aa3025e3", + "description": "\nArticle URL: https://www.wsj.com/tech/ai/the-100-billion-megadeal-between-openai-and-nvidia-is-on-ice-aa3025e3\nComments URL: https://news.ycombinator.com/item?id=46831702\nPoints: 219\n# Comments: 135\n", + "published": "Sat, 31 Jan 2026 00:02:30 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365051", + "read": false + }, + "2e9305e7a8a7": { + "id": "2e9305e7a8a7", + "title": "Stonebraker on CAP theorem and Databases (2010)", + "link": "https://perspectives.mvdirona.com/2010/04/stonebraker-on-cap-theorem-and-databases/", + "description": "\nArticle URL: https://perspectives.mvdirona.com/2010/04/stonebraker-on-cap-theorem-and-databases/\nComments URL: https://news.ycombinator.com/item?id=46831592\nPoints: 51\n# Comments: 16\n", + "published": "Fri, 30 Jan 2026 23:47:28 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365115", + "read": false + }, + "0c827ff07b2c": { + "id": "0c827ff07b2c", + "title": "Show HN: I built an AI conversation partner to practice speaking languages", + "link": "https://apps.apple.com/us/app/talkbits-speak-naturally/id6756824177", + "description": "\nHi,I built TalkBits because most language apps focus on vocabulary or exercises, but not actual conversation. The hard part of learning a language is speaking naturally under pressure.TalkBits lets you have real-time spoken conversations with an AI that acts like a native speaker. You can choose di", + "published": "Fri, 30 Jan 2026 22:16:19 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365217", + "read": false + }, + "a5b023c65ec8": { + "id": "a5b023c65ec8", + "title": "I trapped an AI model inside an art installation (2025) [video]", + "link": "https://www.youtube.com/watch?v=7fNYj0EXxMs", + "description": "\nArticle URL: https://www.youtube.com/watch?v=7fNYj0EXxMs\nComments URL: https://news.ycombinator.com/item?id=46830523\nPoints: 71\n# Comments: 17\n", + "published": "Fri, 30 Jan 2026 21:58:46 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365280", + "read": false + }, + "eed4ee2be99a": { + "id": "eed4ee2be99a", + "title": "P vs. NP and the Difficulty of Computation: A ruliological approach", + "link": "https://writings.stephenwolfram.com/2026/01/p-vs-np-and-the-difficulty-of-computation-a-ruliological-approach/", + "description": "\nArticle URL: https://writings.stephenwolfram.com/2026/01/p-vs-np-and-the-difficulty-of-computation-a-ruliological-approach/\nComments URL: https://news.ycombinator.com/item?id=46830027\nPoints: 54\n# Comments: 26\n", + "published": "Fri, 30 Jan 2026 21:17:21 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365342", + "read": false + }, + "a664edebaccd": { + "id": "a664edebaccd", + "title": "Peerweb: Decentralized website hosting via WebTorrent", + "link": "https://peerweb.lol/", + "description": "\nhttps://github.com/omodaka9375/peerweb\n\nComments URL: https://news.ycombinator.com/item?id=46829582\nPoints: 207\n# Comments: 77\n", + "published": "Fri, 30 Jan 2026 20:40:00 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365413", + "read": false + }, + "db5139aae809": { + "id": "db5139aae809", + "title": "Antirender: remove the glossy shine on architectural renderings", + "link": "https://antirender.com/", + "description": "\nArticle URL: https://antirender.com/\nComments URL: https://news.ycombinator.com/item?id=46829147\nPoints: 919\n# Comments: 219\n", + "published": "Fri, 30 Jan 2026 20:05:24 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365477", + "read": false + }, + "cfda7faf1941": { + "id": "cfda7faf1941", + "title": "Kimi K2.5 Technical Report [pdf]", + "link": "https://github.com/MoonshotAI/Kimi-K2.5/blob/master/tech_report.pdf", + "description": "\nArticle URL: https://github.com/MoonshotAI/Kimi-K2.5/blob/master/tech_report.pdf\nComments URL: https://news.ycombinator.com/item?id=46826597\nPoints: 259\n# Comments: 99\n", + "published": "Fri, 30 Jan 2026 16:43:50 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365541", + "read": false + }, + "ad8e579b6337": { + "id": "ad8e579b6337", + "title": "Ask HN: Do you also \"hoard\" notes/links but struggle to turn them into actions?", + "link": "https://news.ycombinator.com/item?id=46826277", + "description": "\nHi HN \u2014 I\u2019m exploring an idea and would love your feedback.I\u2019m a builder and user of Obsidian, validating a concept called Concerns. Today it\u2019s only a landing page + short survey (no product yet) to test whether this pain is real.The core idea (2\u20133 bullets):- Many of us capture tons of useful info ", + "published": "Fri, 30 Jan 2026 16:22:05 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365723", + "read": false + }, + "69d96d34940d": { + "id": "69d96d34940d", + "title": "Self Driving Car Insurance", + "link": "https://www.lemonade.com/car/explained/self-driving-car-insurance/", + "description": "\nArticle URL: https://www.lemonade.com/car/explained/self-driving-car-insurance/\nComments URL: https://news.ycombinator.com/item?id=46825828\nPoints: 114\n# Comments: 266\n", + "published": "Fri, 30 Jan 2026 15:50:15 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365791", + "read": false + }, + "931ef498ca41": { + "id": "931ef498ca41", + "title": "Show HN: Amla Sandbox \u2013 WASM bash shell sandbox for AI agents", + "link": "https://github.com/amlalabs/amla-sandbox", + "description": "\nWASM sandbox for running LLM-generated code safely.Agents get a bash-like shell and can only call tools you provide, with constraints you define.\nNo Docker, no subprocess, no SaaS \u2014 just pip install amla-sandbox\n\nComments URL: https://news.ycombinator.com/item?id=46824877\nPoints: 129\n# Comments: 71", + "published": "Fri, 30 Jan 2026 14:34:32 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365860", + "read": false + }, + "e5abad1512c8": { + "id": "e5abad1512c8", + "title": "HTTP Cats", + "link": "https://http.cat/", + "description": "\nArticle URL: https://http.cat/\nComments URL: https://news.ycombinator.com/item?id=46824422\nPoints: 284\n# Comments: 46\n", + "published": "Fri, 30 Jan 2026 13:56:51 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.365942", + "read": false + }, + "33ed4bdcdda6": { + "id": "33ed4bdcdda6", + "title": "Pangolin (YC S25) is hiring software engineers (open-source, Go, networking)", + "link": "https://docs.pangolin.net/careers/join-us", + "description": "\nArticle URL: https://docs.pangolin.net/careers/join-us\nComments URL: https://news.ycombinator.com/item?id=46823544\nPoints: 0\n# Comments: 0\n", + "published": "Fri, 30 Jan 2026 12:11:49 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.366005", + "read": false + }, + "816e2f961ac3": { + "id": "816e2f961ac3", + "title": "Code is cheap. Show me the talk", + "link": "https://nadh.in/blog/code-is-cheap/", + "description": "\nArticle URL: https://nadh.in/blog/code-is-cheap/\nComments URL: https://news.ycombinator.com/item?id=46823485\nPoints: 190\n# Comments: 170\n", + "published": "Fri, 30 Jan 2026 12:05:50 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.366065", + "read": false + }, + "5d890bd9105c": { + "id": "5d890bd9105c", + "title": "Email experiments: filtering out external images", + "link": "https://www.terracrypt.net/posts/email-experiments-image-filtering.html", + "description": "\nArticle URL: https://www.terracrypt.net/posts/email-experiments-image-filtering.html\nComments URL: https://news.ycombinator.com/item?id=46823445\nPoints: 53\n# Comments: 27\n", + "published": "Fri, 30 Jan 2026 12:01:36 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.366127", + "read": false + }, + "6f5856283be5": { + "id": "6f5856283be5", + "title": "BoldVoice (YC S21) Is Hiring Fullstack and Machine Learning Engineers", + "link": "https://boldvoice.notion.site/careers-page?p=2e871a9bf729806c81f6e47f32e32622&pm=s", + "description": "\nArticle URL: https://boldvoice.notion.site/careers-page?p=2e871a9bf729806c81f6e47f32e32622&pm=s\nComments URL: https://news.ycombinator.com/item?id=46823430\nPoints: 0\n# Comments: 0\n", + "published": "Fri, 30 Jan 2026 12:00:12 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.366205", + "read": false + }, + "39f885ab17b6": { + "id": "39f885ab17b6", + "title": "Surely the crash of the US economy has to be soon", + "link": "https://wilsoniumite.com/2026/01/27/surely-it-has-to-be-soon/", + "description": "\nArticle URL: https://wilsoniumite.com/2026/01/27/surely-it-has-to-be-soon/\nComments URL: https://news.ycombinator.com/item?id=46822630\nPoints: 220\n# Comments: 336\n", + "published": "Fri, 30 Jan 2026 10:14:23 +0000", + "feed": "Hacker News", + "fetched": "2026-01-30T23:20:09.366271", + "read": false + }, + "d8a2b2317585": { + "id": "d8a2b2317585", + "title": "Coding Is When We\u2019re Least Productive", + "link": "https://codemanship.wordpress.com/2026/01/30/coding-is-when-were-least-productive/", + "description": "Comments", + "published": "Fri, 30 Jan 2026 06:02:49 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523294", + "read": false + }, + "ac670ac6f177": { + "id": "ac670ac6f177", + "title": "Ingress NGINX: Statement from the Kubernetes Steering and Security Response Committees", + "link": "https://kubernetes.io/blog/2026/01/29/ingress-nginx-statement/", + "description": "Comments", + "published": "Fri, 30 Jan 2026 11:37:23 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523318", + "read": false + }, + "aa814d4f9898": { + "id": "aa814d4f9898", + "title": "No, Cloudflare's Matrix server isn't an earnest project", + "link": "https://nexy.blog/2026/01/28/cf-matrix-workers/", + "description": "Comments", + "published": "Thu, 29 Jan 2026 19:43:25 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523340", + "read": false + }, + "5d1990b77195": { + "id": "5d1990b77195", + "title": "Subtypes and status-dependent data: pure relational approach", + "link": "https://minimalmodeling.substack.com/p/subtypes-and-status-dependent-data", + "description": "Comments", + "published": "Fri, 30 Jan 2026 12:05:46 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523363", + "read": false + }, + "2a97b6df35bc": { + "id": "2a97b6df35bc", + "title": "Backseat Software", + "link": "https://blog.mikeswanson.com/backseat-software/", + "description": "Comments", + "published": "Fri, 30 Jan 2026 00:10:18 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523385", + "read": false + }, + "72c852794b6f": { + "id": "72c852794b6f", + "title": "The State Of Garnet, 2026", + "link": "https://wiki.alopex.li/TheStateOfGarnet2026", + "description": "Comments", + "published": "Fri, 30 Jan 2026 19:41:19 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523407", + "read": false + }, + "6639842b6234": { + "id": "6639842b6234", + "title": "How AI Impacts Skill Formation", + "link": "https://arxiv.org/abs/2601.20245", + "description": "Comments", + "published": "Fri, 30 Jan 2026 04:00:54 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523432", + "read": false + }, + "3ac078bfc685": { + "id": "3ac078bfc685", + "title": "Book of Verse", + "link": "https://verselang.github.io/book/", + "description": "Comments", + "published": "Fri, 30 Jan 2026 13:55:53 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523456", + "read": false + }, + "b91909ff686f": { + "id": "b91909ff686f", + "title": "Some Data Should Be Code", + "link": "https://borretti.me/article/some-data-should-be-code", + "description": "Comments", + "published": "Fri, 30 Jan 2026 19:44:52 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523478", + "read": false + }, + "91dd4b47cc98": { + "id": "91dd4b47cc98", + "title": "What are you doing this weekend?", + "link": "https://lobste.rs/s/p3nxq9/what_are_you_doing_this_weekend", + "description": "Feel free to tell what you plan on doing this weekend and even ask for help or feedback.\nPlease keep in mind it\u2019s more than OK to do nothing at all too!\n", + "published": "Fri, 30 Jan 2026 03:38:39 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523507", + "read": false + }, + "ef3658398159": { + "id": "ef3658398159", + "title": "Google Disrupts Large Residential Proxy Network", + "link": "https://cloud.google.com/blog/topics/threat-intelligence/disrupting-largest-residential-proxy-network", + "description": "See also https://en.wikipedia.org/wiki/Ipidea\nComments", + "published": "Thu, 29 Jan 2026 22:24:57 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523552", + "read": false + }, + "0b611e4a3158": { + "id": "0b611e4a3158", + "title": "GNU Units", + "link": "https://www.gnu.org/software/units/", + "description": "Comments", + "published": "Fri, 30 Jan 2026 00:19:20 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523574", + "read": false + }, + "32a044155bdd": { + "id": "32a044155bdd", + "title": "The Dank Case For Scrolling Window Managers", + "link": "https://tedium.co/2026/01/29/niri-danklinux-scrolling-window-managers/", + "description": "Comments", + "published": "Thu, 29 Jan 2026 21:57:31 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523597", + "read": false + }, + "c8863b0a88d8": { + "id": "c8863b0a88d8", + "title": "The imminent risk of vibe coding", + "link": "https://basta.substack.com/p/the-imminent-risk-of-vibe-coding", + "description": "Comments", + "published": "Fri, 30 Jan 2026 17:04:28 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523619", + "read": false + }, + "f1592fae97c8": { + "id": "f1592fae97c8", + "title": "Notification underload", + "link": "https://zuma9pt5.com/notification-underload/", + "description": "Comments", + "published": "Fri, 30 Jan 2026 13:30:31 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523640", + "read": false + }, + "b7c5ce7d8924": { + "id": "b7c5ce7d8924", + "title": "How to Choose Colors for Your CLI Applications (2023)", + "link": "https://blog.xoria.org/terminal-colors/", + "description": "Comments", + "published": "Thu, 29 Jan 2026 09:20:12 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523662", + "read": false + }, + "2bfe460952eb": { + "id": "2bfe460952eb", + "title": "cli-stash: TUI tool to save and recall shell commands with fuzzy search", + "link": "https://github.com/itcaat/cli-stash", + "description": "Built this to solve my own problem of forgetting complex commands. Instead of digging through shell history or notes, cli-stash lets you:\n\nBrowse shell history and save commands (Ctrl+A)\nFuzzy search through saved commands\nSelect and insert directly into terminal prompt\nAuto-sort by usage frequency\n", + "published": "Fri, 30 Jan 2026 07:57:02 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523746", + "read": false + }, + "6c613ae56183": { + "id": "6c613ae56183", + "title": "Community Considerations Around AI Contributions", + "link": "https://blog.scientific-python.org/scientific-python/community-considerations-around-ai/", + "description": "Comments", + "published": "Fri, 30 Jan 2026 16:33:58 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523775", + "read": false + }, + "60e60e39fdaf": { + "id": "60e60e39fdaf", + "title": "10 Years of Wasm: A Retrospective", + "link": "https://bytecodealliance.org/articles/ten-years-of-webassembly-a-retrospective", + "description": "Comments", + "published": "Thu, 29 Jan 2026 18:37:02 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523797", + "read": false + }, + "be1d58fa7011": { + "id": "be1d58fa7011", + "title": "When does technology pass from being a tool to being a crutch? (2009)", + "link": "https://boston.conman.org/2009/11/03.1", + "description": "Comments", + "published": "Fri, 30 Jan 2026 17:59:21 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523819", + "read": false + }, + "04cfc607c0c3": { + "id": "04cfc607c0c3", + "title": "Amdahl\u2019s law and agentic coding", + "link": "https://evnm.substack.com/p/amdahls-law-and-agentic-coding", + "description": "Comments", + "published": "Fri, 30 Jan 2026 12:06:26 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523841", + "read": false + }, + "8e1710b16e24": { + "id": "8e1710b16e24", + "title": "Solving Fossil's ASCII art CAPTCHA in 171 characters", + "link": "https://blog.nns.ee/2026/01/29/fossil-captcha-solver/", + "description": "Comments", + "published": "Thu, 29 Jan 2026 14:57:07 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523866", + "read": false + }, + "6956876cff81": { + "id": "6956876cff81", + "title": "Paty: The most human-like AI agent you'll ever use", + "link": "https://github.com/gjtorikian/paty", + "description": "Comments", + "published": "Fri, 30 Jan 2026 11:09:17 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523903", + "read": false + }, + "cf40b6b643de": { + "id": "cf40b6b643de", + "title": "A Story of Computer-Use: Where We Started, Where We're Headed", + "link": "https://cua.ai/blog/clawdbot-computer-use-history", + "description": "Comments", + "published": "Fri, 30 Jan 2026 13:18:00 -0600", + "feed": "Lobsters", + "fetched": "2026-01-30T23:20:09.523927", + "read": false + } +} \ No newline at end of file diff --git a/projects/news-feed/main.py b/projects/news-feed/main.py new file mode 100755 index 0000000..4732b7d --- /dev/null +++ b/projects/news-feed/main.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +""" +news-feed - RSS reader and news aggregator for staying informed + +A simple RSS/Atom feed reader that: +- Fetches and parses feeds +- Stores articles locally +- Generates digests +- Tracks read/unread status +""" + +import os +import json +import hashlib +import xml.etree.ElementTree as ET +from datetime import datetime +from pathlib import Path +from urllib.request import urlopen, Request +from urllib.error import URLError +from html.parser import HTMLParser +import sys + +PROJECT_DIR = Path(__file__).parent +DATA_DIR = PROJECT_DIR / "data" +FEEDS_FILE = DATA_DIR / "feeds.json" +ARTICLES_FILE = DATA_DIR / "articles.json" + +# Default feeds to get started +DEFAULT_FEEDS = [ + {"name": "Hacker News", "url": "https://hnrss.org/frontpage", "category": "tech"}, + {"name": "Lobsters", "url": "https://lobste.rs/rss", "category": "tech"}, + {"name": "r/programming", "url": "https://www.reddit.com/r/programming/.rss", "category": "tech"}, +] + +class MLStripper(HTMLParser): + """Strip HTML tags from text.""" + def __init__(self): + super().__init__() + self.text = [] + def handle_data(self, d): + self.text.append(d) + def get_text(self): + return ''.join(self.text) + +def strip_html(html: str) -> str: + """Remove HTML tags from string.""" + s = MLStripper() + try: + s.feed(html) + return s.get_text() + except: + return html + +def load_feeds() -> list: + """Load feed list.""" + if FEEDS_FILE.exists(): + with open(FEEDS_FILE) as f: + return json.load(f) + return DEFAULT_FEEDS + +def save_feeds(feeds: list): + """Save feed list.""" + DATA_DIR.mkdir(parents=True, exist_ok=True) + with open(FEEDS_FILE, 'w') as f: + json.dump(feeds, f, indent=2) + +def load_articles() -> dict: + """Load cached articles.""" + if ARTICLES_FILE.exists(): + with open(ARTICLES_FILE) as f: + return json.load(f) + return {} + +def save_articles(articles: dict): + """Save articles cache.""" + DATA_DIR.mkdir(parents=True, exist_ok=True) + with open(ARTICLES_FILE, 'w') as f: + json.dump(articles, f, indent=2) + +def fetch_feed(url: str) -> str: + """Fetch feed content.""" + req = Request(url, headers={'User-Agent': 'news-feed/1.0'}) + try: + with urlopen(req, timeout=10) as resp: + return resp.read().decode('utf-8', errors='ignore') + except URLError as e: + print(f" Error fetching: {e}") + return None + +def parse_feed(content: str, feed_name: str) -> list: + """Parse RSS/Atom feed content.""" + articles = [] + try: + root = ET.fromstring(content) + + # Try RSS format + for item in root.findall('.//item'): + title = item.findtext('title', '') + link = item.findtext('link', '') + desc = item.findtext('description', '') + pub_date = item.findtext('pubDate', '') + + if title and link: + articles.append({ + 'id': hashlib.md5(link.encode()).hexdigest()[:12], + 'title': title.strip(), + 'link': link.strip(), + 'description': strip_html(desc)[:300] if desc else '', + 'published': pub_date, + 'feed': feed_name, + 'fetched': datetime.now().isoformat(), + 'read': False, + }) + + # Try Atom format if no items found + if not articles: + ns = {'atom': 'http://www.w3.org/2005/Atom'} + for entry in root.findall('.//atom:entry', ns) or root.findall('.//entry'): + title = entry.findtext('atom:title', '', ns) or entry.findtext('title', '') + link_elem = entry.find('atom:link', ns) or entry.find('link') + link = link_elem.get('href', '') if link_elem is not None else '' + summary = entry.findtext('atom:summary', '', ns) or entry.findtext('summary', '') + + if title and link: + articles.append({ + 'id': hashlib.md5(link.encode()).hexdigest()[:12], + 'title': title.strip(), + 'link': link.strip(), + 'description': strip_html(summary)[:300] if summary else '', + 'published': '', + 'feed': feed_name, + 'fetched': datetime.now().isoformat(), + 'read': False, + }) + except ET.ParseError as e: + print(f" Parse error: {e}") + + return articles + +def refresh(): + """Refresh all feeds.""" + feeds = load_feeds() + articles = load_articles() + new_count = 0 + + print(f"Refreshing {len(feeds)} feeds...\n") + + for feed in feeds: + print(f" {feed['name']}...", end=' ', flush=True) + content = fetch_feed(feed['url']) + + if content: + items = parse_feed(content, feed['name']) + for item in items: + if item['id'] not in articles: + articles[item['id']] = item + new_count += 1 + print(f"{len(items)} items") + else: + print("failed") + + save_articles(articles) + print(f"\nāœ“ {new_count} new articles") + +def list_articles(limit: int = 20, unread_only: bool = False, feed: str = None): + """List articles.""" + articles = load_articles() + + items = list(articles.values()) + + if unread_only: + items = [a for a in items if not a.get('read')] + + if feed: + items = [a for a in items if feed.lower() in a['feed'].lower()] + + # Sort by fetched date, newest first + items.sort(key=lambda x: x.get('fetched', ''), reverse=True) + items = items[:limit] + + if not items: + print("No articles found") + return + + print(f"\nšŸ“° Articles ({len(items)} shown)\n") + + for item in items: + status = " " if item.get('read') else "šŸ”µ" + title = item['title'][:60] + "..." if len(item['title']) > 60 else item['title'] + print(f"{status} [{item['id']}] {title}") + print(f" {item['feed']} | {item['link'][:50]}...") + print() + +def read_article(article_id: str): + """Mark article as read and show details.""" + articles = load_articles() + + # Find by prefix + matches = [a for aid, a in articles.items() if aid.startswith(article_id)] + + if not matches: + print(f"Article not found: {article_id}") + return + + article = matches[0] + article['read'] = True + articles[article['id']] = article + save_articles(articles) + + print(f"\nšŸ“„ {article['title']}") + print(f" Feed: {article['feed']}") + print(f" Link: {article['link']}") + print() + if article.get('description'): + print(f" {article['description']}") + print() + +def add_feed(url: str, name: str = None, category: str = "general"): + """Add a new feed.""" + feeds = load_feeds() + + # Check if already exists + if any(f['url'] == url for f in feeds): + print("Feed already exists") + return + + feeds.append({ + 'name': name or url, + 'url': url, + 'category': category, + }) + save_feeds(feeds) + print(f"āœ“ Added: {name or url}") + +def list_feeds(): + """List configured feeds.""" + feeds = load_feeds() + print(f"\nšŸ“” Feeds ({len(feeds)})\n") + for feed in feeds: + print(f" [{feed.get('category', 'general')}] {feed['name']}") + print(f" {feed['url']}") + print() + +def digest(): + """Generate a quick digest of unread articles.""" + articles = load_articles() + unread = [a for a in articles.values() if not a.get('read')] + + # Group by feed + by_feed = {} + for a in unread: + feed = a['feed'] + if feed not in by_feed: + by_feed[feed] = [] + by_feed[feed].append(a) + + print(f"\nšŸ“° News Digest - {datetime.now().strftime('%Y-%m-%d %H:%M')}") + print(f" {len(unread)} unread articles\n") + + for feed, items in sorted(by_feed.items()): + print(f"šŸ“Œ {feed} ({len(items)})") + for item in items[:3]: + title = item['title'][:50] + "..." if len(item['title']) > 50 else item['title'] + print(f" • {title}") + if len(items) > 3: + print(f" ... and {len(items) - 3} more") + print() + +def main(): + if len(sys.argv) < 2: + print("Usage:") + print(" news-feed refresh - Fetch new articles") + print(" news-feed list [--unread] - List articles") + print(" news-feed read - Read an article") + print(" news-feed digest - Quick digest") + print(" news-feed feeds - List feeds") + print(" news-feed add [name] - Add a feed") + return + + cmd = sys.argv[1] + + if cmd == 'refresh': + refresh() + elif cmd == 'list': + unread = '--unread' in sys.argv + list_articles(unread_only=unread) + elif cmd == 'read' and len(sys.argv) > 2: + read_article(sys.argv[2]) + elif cmd == 'digest': + digest() + elif cmd == 'feeds': + list_feeds() + elif cmd == 'add' and len(sys.argv) > 2: + url = sys.argv[2] + name = sys.argv[3] if len(sys.argv) > 3 else None + add_feed(url, name) + else: + print("Unknown command") + +if __name__ == "__main__": + main() diff --git a/projects/news-feed/requirements.txt b/projects/news-feed/requirements.txt new file mode 100644 index 0000000..46ec93d --- /dev/null +++ b/projects/news-feed/requirements.txt @@ -0,0 +1 @@ +# Add dependencies here