Add autodiscovery

jmduke · jmduke · commit f133da8a5fbc · 2026-01-08T21:29:01.000-05:00
diff --git a/next-env.d.ts b/next-env.d.ts
@@ -1,6 +1,6 @@
 /// <reference types="next" />
 /// <reference types="next/image-types/global" />
-import "./.next/dev/types/routes.d.ts";
+import "./.next/types/routes.d.ts";
 
 // NOTE: This file should not be edited
 // see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
diff --git a/src/app/page.tsx b/src/app/page.tsx
@@ -13,6 +13,22 @@ type FeedItem = {
   image?: string;
 };
 
+const CHANGELOG: { date: string; description: string }[] = [
+  {
+    date: "2026-01-08",
+    description:
+      "Added automatic RSS feed discovery from HTML pages. You can now paste any website URL and we'll try to find its RSS feed automatically.",
+  },
+  {
+    date: "2025-12-16",
+    description: "Added support for JSON Feed format (both input and output).",
+  },
+  {
+    date: "2025-11-01",
+    description: "Initial release with RSS feed merging support.",
+  },
+];
+
 const SAMPLE_FEEDS: { name: string; feeds: string[] }[] = [
   {
     name: "Tech News Bundle",
@@ -238,6 +254,20 @@ export default function Home() {
             </div>
           ))}
 
+          <div className="">
+            <h2 className="font-semibold text-gray-800">Changelog</h2>
+            <ul className="text-gray-600 space-y-2 mt-2">
+              {CHANGELOG.map((entry, index) => (
+                <li key={index} className="text-sm">
+                  <span className="font-mono text-xs text-gray-500">
+                    {entry.date}
+                  </span>
+                  <p>{entry.description}</p>
+                </li>
+              ))}
+            </ul>
+          </div>
+
           {errorMessage && (
             <div className="mt-4 p-3 border border-red-300 rounded-md bg-red-50 text-red-700">
               <p>{errorMessage}</p>
diff --git a/src/lib/rss.test.ts b/src/lib/rss.test.ts
@@ -1,9 +1,10 @@
-import { expect, it, describe } from "bun:test";
+import { expect, it, describe, mock, afterEach } from "bun:test";
 import {
   parseFeedFromXml,
   mergeFeeds,
   generateRSS,
   generateJSONFeed,
+  discoverFeedFromHtml,
 } from "./rss";
 
 describe("rss - XML to final output", () => {
@@ -224,3 +225,159 @@ describe("rss - XML to final output", () => {
     expect(jsonOutput).toMatchSnapshot();
   });
 });
+
+describe("discoverFeedFromHtml", () => {
+  const originalFetch = global.fetch;
+
+  const mockFetch = (html: string, contentType = "text/html") => {
+    global.fetch = mock(() =>
+      Promise.resolve({
+        headers: new Map([["content-type", contentType]]),
+        text: () => Promise.resolve(html),
+      } as unknown as Response),
+    ) as unknown as typeof fetch;
+  };
+
+  afterEach(() => {
+    global.fetch = originalFetch;
+  });
+
+  it("should discover RSS feed from HTML with absolute URL", async () => {
+    const html = `<!DOCTYPE html>
+<html>
+<head>
+  <title>Test Page</title>
+  <link rel="alternate" type="application/rss+xml" title="RSS Feed" href="https://example.com/feed.xml">
+</head>
+<body></body>
+</html>`;
+
+    mockFetch(html);
+
+    const feedUrl = await discoverFeedFromHtml("https://example.com/");
+    expect(feedUrl).toBe("https://example.com/feed.xml");
+  });
+
+  it("should discover Atom feed from HTML", async () => {
+    const html = `<!DOCTYPE html>
+<html>
+<head>
+  <link rel="alternate" type="application/atom+xml" href="https://example.com/atom.xml">
+</head>
+<body></body>
+</html>`;
+
+    mockFetch(html);
+
+    const feedUrl = await discoverFeedFromHtml("https://example.com/");
+    expect(feedUrl).toBe("https://example.com/atom.xml");
+  });
+
+  it("should discover JSON Feed from HTML", async () => {
+    const html = `<!DOCTYPE html>
+<html>
+<head>
+  <link rel="alternate" type="application/feed+json" href="https://example.com/feed.json">
+</head>
+<body></body>
+</html>`;
+
+    mockFetch(html);
+
+    const feedUrl = await discoverFeedFromHtml("https://example.com/");
+    expect(feedUrl).toBe("https://example.com/feed.json");
+  });
+
+  it("should handle relative URLs starting with /", async () => {
+    const html = `<!DOCTYPE html>
+<html>
+<head>
+  <link rel="alternate" type="application/rss+xml" href="/blog/feed.xml">
+</head>
+<body></body>
+</html>`;
+
+    mockFetch(html);
+
+    const feedUrl = await discoverFeedFromHtml("https://example.com/blog/");
+    expect(feedUrl).toBe("https://example.com/blog/feed.xml");
+  });
+
+  it("should handle protocol-relative URLs", async () => {
+    const html = `<!DOCTYPE html>
+<html>
+<head>
+  <link rel="alternate" type="application/rss+xml" href="//cdn.example.com/feed.xml">
+</head>
+<body></body>
+</html>`;
+
+    mockFetch(html);
+
+    const feedUrl = await discoverFeedFromHtml("https://example.com/");
+    expect(feedUrl).toBe("https://cdn.example.com/feed.xml");
+  });
+
+  it("should handle relative paths", async () => {
+    const html = `<!DOCTYPE html>
+<html>
+<head>
+  <link rel="alternate" type="application/rss+xml" href="feed.xml">
+</head>
+<body></body>
+</html>`;
+
+    mockFetch(html);
+
+    const feedUrl = await discoverFeedFromHtml(
+      "https://example.com/blog/page.html",
+    );
+    expect(feedUrl).toBe("https://example.com/blog/feed.xml");
+  });
+
+  it("should return null when no feed links found", async () => {
+    const html = `<!DOCTYPE html>
+<html>
+<head>
+  <title>No Feed</title>
+</head>
+<body></body>
+</html>`;
+
+    mockFetch(html);
+
+    const feedUrl = await discoverFeedFromHtml("https://example.com/");
+    expect(feedUrl).toBeNull();
+  });
+
+  it("should return null for non-HTML content types", async () => {
+    mockFetch("{}", "application/json");
+
+    const feedUrl = await discoverFeedFromHtml("https://example.com/api/data");
+    expect(feedUrl).toBeNull();
+  });
+
+  it("should return null on fetch error", async () => {
+    global.fetch = mock(() =>
+      Promise.reject(new Error("Network error")),
+    ) as unknown as typeof fetch;
+
+    const feedUrl = await discoverFeedFromHtml("https://example.com/");
+    expect(feedUrl).toBeNull();
+  });
+
+  it("should handle single quotes in link attributes", async () => {
+    const html = `<!DOCTYPE html>
+<html>
+<head>
+  <link rel='alternate' type='application/rss+xml' href='https://example.com/feed.xml'>
+</head>
+<body></body>
+</html>`;
+
+    mockFetch(html);
+
+    const feedUrl = await discoverFeedFromHtml("https://example.com/");
+    expect(feedUrl).toBe("https://example.com/feed.xml");
+  });
+});
diff --git a/src/lib/rss.ts b/src/lib/rss.ts
@@ -17,6 +17,89 @@ const parser = new Parser({
 
 const GENERATOR = "rssrssrssrss";
 const FEED_TITLE = "Merged Feed";
+const USER_AGENT = "rssrssrssrss (https://rssrssrssrss.com)";
+
+/**
+ * Try to discover RSS/Atom feed URLs from an HTML page by looking for
+ * <link rel="alternate"> tags with feed content types.
+ */
+export async function discoverFeedFromHtml(
+  url: string,
+): Promise<string | null> {
+  try {
+    const response = await fetch(url, {
+      headers: {
+        "User-Agent": USER_AGENT,
+        Accept: "text/html, application/xhtml+xml, */*",
+      },
+    });
+
+    const contentType = response.headers.get("content-type") || "";
+
+    // Only try to discover feeds from HTML pages
+    if (
+      !contentType.includes("text/html") &&
+      !contentType.includes("application/xhtml")
+    ) {
+      return null;
+    }
+
+    const html = await response.text();
+
+    // Look for <link rel="alternate" type="application/rss+xml" href="...">
+    // or <link rel="alternate" type="application/atom+xml" href="...">
+    // or <link rel="alternate" type="application/feed+json" href="...">
+    const feedLinkRegex = /<link[^>]*rel=["']alternate["'][^>]*>/gi;
+    const matches = html.match(feedLinkRegex);
+
+    if (!matches) {
+      return null;
+    }
+
+    for (const linkTag of matches) {
+      const typeMatch = linkTag.match(/type=["']([^"']+)["']/i);
+      const hrefMatch = linkTag.match(/href=["']([^"']+)["']/i);
+
+      if (!typeMatch || !hrefMatch) {
+        continue;
+      }
+
+      const type = typeMatch[1].toLowerCase();
+      const href = hrefMatch[1];
+
+      // Check if it's a feed type
+      if (
+        type.includes("application/rss+xml") ||
+        type.includes("application/atom+xml") ||
+        type.includes("application/feed+json") ||
+        type.includes("application/json")
+      ) {
+        // Handle relative URLs
+        if (href.startsWith("http://") || href.startsWith("https://")) {
+          return href;
+        } else if (href.startsWith("//")) {
+          // Protocol-relative URL
+          const baseUrl = new URL(url);
+          return `${baseUrl.protocol}${href}`;
+        } else if (href.startsWith("/")) {
+          // Absolute path
+          const baseUrl = new URL(url);
+          return `${baseUrl.origin}${href}`;
+        } else {
+          // Relative path
+          const baseUrl = new URL(url);
+          const pathParts = baseUrl.pathname.split("/");
+          pathParts.pop(); // Remove the current page
+          return `${baseUrl.origin}${pathParts.join("/")}/${href}`;
+        }
+      }
+    }
+
+    return null;
+  } catch {
+    return null;
+  }
+}
 
 // Helper function to try parsing as JSON Feed, returns null if not a JSON Feed
 async function tryParseAsJSONFeed(url: string): Promise<CustomFeed | null> {
@@ -68,6 +151,7 @@ async function tryParseAsJSONFeed(url: string): Promise<CustomFeed | null> {
 
 /**
  * Parse a feed from a URL (RSS or JSON Feed)
+ * If the URL is not a feed, try to discover a feed from the HTML page.
  */
 export async function parseFeedFromUrl(url: string): Promise<{
   feed: CustomFeed | null;
@@ -94,6 +178,14 @@ export async function parseFeedFromUrl(url: string): Promise<{
       };
     }
   } catch (error) {
+    // If direct parsing failed, try to discover a feed from the HTML page
+    const discoveredFeedUrl = await discoverFeedFromHtml(url);
+    if (discoveredFeedUrl) {
+      // Recursively try to parse the discovered feed URL
+      // But pass a flag to avoid infinite recursion if the discovered URL also fails
+      return parseFeedFromDiscoveredUrl(discoveredFeedUrl, url);
+    }
+
     console.error(`Error fetching feed from ${url}:`, error);
     return {
       feed: null,
@@ -102,6 +194,48 @@ export async function parseFeedFromUrl(url: string): Promise<{
   }
 }
 
+/**
+ * Parse a feed from a discovered URL (no further discovery attempts)
+ */
+async function parseFeedFromDiscoveredUrl(
+  feedUrl: string,
+  originalUrl: string,
+): Promise<{
+  feed: CustomFeed | null;
+  error: string | null;
+}> {
+  try {
+    // Check if it's a JSON Feed first
+    const jsonFeed = await tryParseAsJSONFeed(feedUrl);
+    if (jsonFeed) {
+      return { feed: jsonFeed, error: null };
+    } else {
+      // Fall back to RSS parsing
+      const feed = await parser.parseURL(feedUrl);
+      return {
+        feed: {
+          ...feed,
+          items: feed.items.map((item: CustomItem) => ({
+            ...item,
+            sourceFeedTitle: feed.title,
+            sourceFeedUrl: feedUrl,
+          })),
+        },
+        error: null,
+      };
+    }
+  } catch (error) {
+    console.error(
+      `Error fetching discovered feed from ${feedUrl} (original: ${originalUrl}):`,
+      error,
+    );
+    return {
+      feed: null,
+      error: error instanceof Error ? error.message : String(error),
+    };
+  }
+}
+
 /**
  * Parse a feed from an XML string (RSS)
  */