#!/bin/bash # Feed Hunter — Full pipeline with HTML reporting # Usage: ./run-and-report.sh [scroll_pages] [output_dir] # # Runs: scrape → triage → investigate → simulate → generate HTML report set -e PAGES=${1:-8} OUTPUT_DIR=${2:-"reports/$(date +%Y%m%d-%H%M%S)"} BASE="/home/wdjones/.openclaw/workspace" SKILL="$BASE/skills/deep-scraper/scripts" PROJECT="$BASE/projects/feed-hunter" DATA="$BASE/data/x-feed" echo "=== Feed Hunter Pipeline + Report ===" echo "$(date '+%Y-%m-%d %H:%M:%S %Z')" echo "Output: $OUTPUT_DIR" # Create output directory mkdir -p "$OUTPUT_DIR" # Ensure Chrome is running with debug port if ! curl -s http://127.0.0.1:9222/json >/dev/null 2>&1; then echo "Starting Chrome..." bash "$SKILL/launch-chrome-debug.sh" fi # Stage 1: Scrape echo "" echo "--- Stage 1: Scrape ($PAGES pages) ---" python3 -u "$SKILL/scrape-x-feed.py" --port 9222 --scroll-pages "$PAGES" 2>&1 | tee "$OUTPUT_DIR/scrape.log" # Find latest scrape LATEST=$(ls -dt "$DATA"/20* | head -1) echo "Latest scrape: $LATEST" # Stage 2: Triage echo "" echo "--- Stage 2: Triage ---" python3 "$SKILL/triage-posts.py" "$LATEST/posts.json" 2>&1 | tee "$OUTPUT_DIR/triage.log" # Stage 3: Generate investigation tasks TRIAGE="$LATEST/triage.json" INVESTIGATION_COUNT=0 if [ -f "$TRIAGE" ]; then INVESTIGATION_COUNT=$(python3 -c "import json; d=json.load(open('$TRIAGE')); print(len(d.get('investigation_queue',[])))") if [ "$INVESTIGATION_COUNT" -gt 0 ]; then echo "" echo "--- Stage 3: Investigation Tasks ---" python3 "$PROJECT/investigate.py" "$TRIAGE" --output "$LATEST/investigations" 2>&1 | tee "$OUTPUT_DIR/investigate.log" else echo "" echo ">>> No posts worth investigating this run." fi else echo ">>> No triage output found." fi # Stage 4: Update simulations if new investigations found SIMULATION_UPDATES=0 if [ -f "$PROJECT/data/investigations" ] && [ "$INVESTIGATION_COUNT" -gt 0 ]; then echo "" echo "--- Stage 4: Simulation Updates ---" python3 "$PROJECT/simulator.py" --check-investigations 2>&1 | tee "$OUTPUT_DIR/simulation.log" || true SIMULATION_UPDATES=1 fi # Stage 5: Generate HTML Report echo "" echo "--- Stage 5: Generate Report ---" # Get summary stats POSTS_COUNT=$(python3 -c "import json; d=json.load(open('$LATEST/posts.json')); print(len(d.get('posts',[])))") HIGH_VALUE=0 WORTH_INVESTIGATING=0 DISMISSED=0 if [ -f "$TRIAGE" ]; then HIGH_VALUE=$(python3 -c "import json; d=json.load(open('$TRIAGE')); print(len(d.get('high_value',[])))") WORTH_INVESTIGATING=$(python3 -c "import json; d=json.load(open('$TRIAGE')); print(len(d.get('worth_investigating',[])))") DISMISSED=$(python3 -c "import json; d=json.load(open('$TRIAGE')); print(len(d.get('dismissed',[])))") fi # Get investigation results VERIFIED_CLAIMS=0 ACTIONABLE_STRATEGIES=0 if [ -d "$LATEST/investigations" ]; then VERIFIED_CLAIMS=$(find "$LATEST/investigations" -name "*.json" -exec grep -l '"verdict".*"VERIFIED"' {} \; | wc -l) ACTIONABLE_STRATEGIES=$(find "$LATEST/investigations" -name "*.json" -exec grep -l '"actionable".*true' {} \; | wc -l) fi # Get simulation status ACTIVE_POSITIONS=0 TOTAL_PNL=0 if [ -f "$PROJECT/data/simulations/active.json" ]; then ACTIVE_POSITIONS=$(python3 -c "import json; d=json.load(open('$PROJECT/data/simulations/active.json')); print(len(d.get('positions',[])))" 2>/dev/null || echo "0") TOTAL_PNL=$(python3 -c "import json; d=json.load(open('$PROJECT/data/simulations/active.json')); print(sum(p.get('unrealized_pnl',0) for p in d.get('positions',[])))" 2>/dev/null || echo "0") fi cat > "$OUTPUT_DIR/report.html" << EOF
Scraping: Collected $POSTS_COUNT posts from $PAGES pages
Triaging: $HIGH_VALUE high-value, $WORTH_INVESTIGATING worth investigating, $DISMISSED dismissed
Investigation: $INVESTIGATION_COUNT posts analyzed, $VERIFIED_CLAIMS claims verified
Simulations: $ACTIVE_POSITIONS active positions, \$$TOTAL_PNL unrealized P&L