Skip to content

Commit f133da8

Browse files
committed
Add autodiscovery
1 parent bd50138 commit f133da8

File tree

4 files changed

+323
-2
lines changed

4 files changed

+323
-2
lines changed

next-env.d.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/// <reference types="next" />
22
/// <reference types="next/image-types/global" />
3-
import "./.next/dev/types/routes.d.ts";
3+
import "./.next/types/routes.d.ts";
44

55
// NOTE: This file should not be edited
66
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.

src/app/page.tsx

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,22 @@ type FeedItem = {
1313
image?: string;
1414
};
1515

16+
const CHANGELOG: { date: string; description: string }[] = [
17+
{
18+
date: "2026-01-08",
19+
description:
20+
"Added automatic RSS feed discovery from HTML pages. You can now paste any website URL and we'll try to find its RSS feed automatically.",
21+
},
22+
{
23+
date: "2025-12-16",
24+
description: "Added support for JSON Feed format (both input and output).",
25+
},
26+
{
27+
date: "2025-11-01",
28+
description: "Initial release with RSS feed merging support.",
29+
},
30+
];
31+
1632
const SAMPLE_FEEDS: { name: string; feeds: string[] }[] = [
1733
{
1834
name: "Tech News Bundle",
@@ -238,6 +254,20 @@ export default function Home() {
238254
</div>
239255
))}
240256

257+
<div className="">
258+
<h2 className="font-semibold text-gray-800">Changelog</h2>
259+
<ul className="text-gray-600 space-y-2 mt-2">
260+
{CHANGELOG.map((entry, index) => (
261+
<li key={index} className="text-sm">
262+
<span className="font-mono text-xs text-gray-500">
263+
{entry.date}
264+
</span>
265+
<p>{entry.description}</p>
266+
</li>
267+
))}
268+
</ul>
269+
</div>
270+
241271
{errorMessage && (
242272
<div className="mt-4 p-3 border border-red-300 rounded-md bg-red-50 text-red-700">
243273
<p>{errorMessage}</p>

src/lib/rss.test.ts

Lines changed: 158 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
import { expect, it, describe } from "bun:test";
1+
import { expect, it, describe, mock, afterEach } from "bun:test";
22
import {
33
parseFeedFromXml,
44
mergeFeeds,
55
generateRSS,
66
generateJSONFeed,
7+
discoverFeedFromHtml,
78
} from "./rss";
89

910
describe("rss - XML to final output", () => {
@@ -224,3 +225,159 @@ describe("rss - XML to final output", () => {
224225
expect(jsonOutput).toMatchSnapshot();
225226
});
226227
});
228+
229+
describe("discoverFeedFromHtml", () => {
230+
const originalFetch = global.fetch;
231+
232+
const mockFetch = (html: string, contentType = "text/html") => {
233+
global.fetch = mock(() =>
234+
Promise.resolve({
235+
headers: new Map([["content-type", contentType]]),
236+
text: () => Promise.resolve(html),
237+
} as unknown as Response),
238+
) as unknown as typeof fetch;
239+
};
240+
241+
afterEach(() => {
242+
global.fetch = originalFetch;
243+
});
244+
245+
it("should discover RSS feed from HTML with absolute URL", async () => {
246+
const html = `<!DOCTYPE html>
247+
<html>
248+
<head>
249+
<title>Test Page</title>
250+
<link rel="alternate" type="application/rss+xml" title="RSS Feed" href="https://example.com/feed.xml">
251+
</head>
252+
<body></body>
253+
</html>`;
254+
255+
mockFetch(html);
256+
257+
const feedUrl = await discoverFeedFromHtml("https://example.com/");
258+
expect(feedUrl).toBe("https://example.com/feed.xml");
259+
});
260+
261+
it("should discover Atom feed from HTML", async () => {
262+
const html = `<!DOCTYPE html>
263+
<html>
264+
<head>
265+
<link rel="alternate" type="application/atom+xml" href="https://example.com/atom.xml">
266+
</head>
267+
<body></body>
268+
</html>`;
269+
270+
mockFetch(html);
271+
272+
const feedUrl = await discoverFeedFromHtml("https://example.com/");
273+
expect(feedUrl).toBe("https://example.com/atom.xml");
274+
});
275+
276+
it("should discover JSON Feed from HTML", async () => {
277+
const html = `<!DOCTYPE html>
278+
<html>
279+
<head>
280+
<link rel="alternate" type="application/feed+json" href="https://example.com/feed.json">
281+
</head>
282+
<body></body>
283+
</html>`;
284+
285+
mockFetch(html);
286+
287+
const feedUrl = await discoverFeedFromHtml("https://example.com/");
288+
expect(feedUrl).toBe("https://example.com/feed.json");
289+
});
290+
291+
it("should handle relative URLs starting with /", async () => {
292+
const html = `<!DOCTYPE html>
293+
<html>
294+
<head>
295+
<link rel="alternate" type="application/rss+xml" href="/blog/feed.xml">
296+
</head>
297+
<body></body>
298+
</html>`;
299+
300+
mockFetch(html);
301+
302+
const feedUrl = await discoverFeedFromHtml("https://example.com/blog/");
303+
expect(feedUrl).toBe("https://example.com/blog/feed.xml");
304+
});
305+
306+
it("should handle protocol-relative URLs", async () => {
307+
const html = `<!DOCTYPE html>
308+
<html>
309+
<head>
310+
<link rel="alternate" type="application/rss+xml" href="//cdn.example.com/feed.xml">
311+
</head>
312+
<body></body>
313+
</html>`;
314+
315+
mockFetch(html);
316+
317+
const feedUrl = await discoverFeedFromHtml("https://example.com/");
318+
expect(feedUrl).toBe("https://cdn.example.com/feed.xml");
319+
});
320+
321+
it("should handle relative paths", async () => {
322+
const html = `<!DOCTYPE html>
323+
<html>
324+
<head>
325+
<link rel="alternate" type="application/rss+xml" href="feed.xml">
326+
</head>
327+
<body></body>
328+
</html>`;
329+
330+
mockFetch(html);
331+
332+
const feedUrl = await discoverFeedFromHtml(
333+
"https://example.com/blog/page.html",
334+
);
335+
expect(feedUrl).toBe("https://example.com/blog/feed.xml");
336+
});
337+
338+
it("should return null when no feed links found", async () => {
339+
const html = `<!DOCTYPE html>
340+
<html>
341+
<head>
342+
<title>No Feed</title>
343+
</head>
344+
<body></body>
345+
</html>`;
346+
347+
mockFetch(html);
348+
349+
const feedUrl = await discoverFeedFromHtml("https://example.com/");
350+
expect(feedUrl).toBeNull();
351+
});
352+
353+
it("should return null for non-HTML content types", async () => {
354+
mockFetch("{}", "application/json");
355+
356+
const feedUrl = await discoverFeedFromHtml("https://example.com/api/data");
357+
expect(feedUrl).toBeNull();
358+
});
359+
360+
it("should return null on fetch error", async () => {
361+
global.fetch = mock(() =>
362+
Promise.reject(new Error("Network error")),
363+
) as unknown as typeof fetch;
364+
365+
const feedUrl = await discoverFeedFromHtml("https://example.com/");
366+
expect(feedUrl).toBeNull();
367+
});
368+
369+
it("should handle single quotes in link attributes", async () => {
370+
const html = `<!DOCTYPE html>
371+
<html>
372+
<head>
373+
<link rel='alternate' type='application/rss+xml' href='https://example.com/feed.xml'>
374+
</head>
375+
<body></body>
376+
</html>`;
377+
378+
mockFetch(html);
379+
380+
const feedUrl = await discoverFeedFromHtml("https://example.com/");
381+
expect(feedUrl).toBe("https://example.com/feed.xml");
382+
});
383+
});

src/lib/rss.ts

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,89 @@ const parser = new Parser({
1717

1818
const GENERATOR = "rssrssrssrss";
1919
const FEED_TITLE = "Merged Feed";
20+
const USER_AGENT = "rssrssrssrss (https://rssrssrssrss.com)";
21+
22+
/**
23+
* Try to discover RSS/Atom feed URLs from an HTML page by looking for
24+
* <link rel="alternate"> tags with feed content types.
25+
*/
26+
export async function discoverFeedFromHtml(
27+
url: string,
28+
): Promise<string | null> {
29+
try {
30+
const response = await fetch(url, {
31+
headers: {
32+
"User-Agent": USER_AGENT,
33+
Accept: "text/html, application/xhtml+xml, */*",
34+
},
35+
});
36+
37+
const contentType = response.headers.get("content-type") || "";
38+
39+
// Only try to discover feeds from HTML pages
40+
if (
41+
!contentType.includes("text/html") &&
42+
!contentType.includes("application/xhtml")
43+
) {
44+
return null;
45+
}
46+
47+
const html = await response.text();
48+
49+
// Look for <link rel="alternate" type="application/rss+xml" href="...">
50+
// or <link rel="alternate" type="application/atom+xml" href="...">
51+
// or <link rel="alternate" type="application/feed+json" href="...">
52+
const feedLinkRegex = /<link[^>]*rel=["']alternate["'][^>]*>/gi;
53+
const matches = html.match(feedLinkRegex);
54+
55+
if (!matches) {
56+
return null;
57+
}
58+
59+
for (const linkTag of matches) {
60+
const typeMatch = linkTag.match(/type=["']([^"']+)["']/i);
61+
const hrefMatch = linkTag.match(/href=["']([^"']+)["']/i);
62+
63+
if (!typeMatch || !hrefMatch) {
64+
continue;
65+
}
66+
67+
const type = typeMatch[1].toLowerCase();
68+
const href = hrefMatch[1];
69+
70+
// Check if it's a feed type
71+
if (
72+
type.includes("application/rss+xml") ||
73+
type.includes("application/atom+xml") ||
74+
type.includes("application/feed+json") ||
75+
type.includes("application/json")
76+
) {
77+
// Handle relative URLs
78+
if (href.startsWith("http://") || href.startsWith("https://")) {
79+
return href;
80+
} else if (href.startsWith("//")) {
81+
// Protocol-relative URL
82+
const baseUrl = new URL(url);
83+
return `${baseUrl.protocol}${href}`;
84+
} else if (href.startsWith("/")) {
85+
// Absolute path
86+
const baseUrl = new URL(url);
87+
return `${baseUrl.origin}${href}`;
88+
} else {
89+
// Relative path
90+
const baseUrl = new URL(url);
91+
const pathParts = baseUrl.pathname.split("/");
92+
pathParts.pop(); // Remove the current page
93+
return `${baseUrl.origin}${pathParts.join("/")}/${href}`;
94+
}
95+
}
96+
}
97+
98+
return null;
99+
} catch {
100+
return null;
101+
}
102+
}
20103

21104
// Helper function to try parsing as JSON Feed, returns null if not a JSON Feed
22105
async function tryParseAsJSONFeed(url: string): Promise<CustomFeed | null> {
@@ -68,6 +151,7 @@ async function tryParseAsJSONFeed(url: string): Promise<CustomFeed | null> {
68151

69152
/**
70153
* Parse a feed from a URL (RSS or JSON Feed)
154+
* If the URL is not a feed, try to discover a feed from the HTML page.
71155
*/
72156
export async function parseFeedFromUrl(url: string): Promise<{
73157
feed: CustomFeed | null;
@@ -94,6 +178,14 @@ export async function parseFeedFromUrl(url: string): Promise<{
94178
};
95179
}
96180
} catch (error) {
181+
// If direct parsing failed, try to discover a feed from the HTML page
182+
const discoveredFeedUrl = await discoverFeedFromHtml(url);
183+
if (discoveredFeedUrl) {
184+
// Recursively try to parse the discovered feed URL
185+
// But pass a flag to avoid infinite recursion if the discovered URL also fails
186+
return parseFeedFromDiscoveredUrl(discoveredFeedUrl, url);
187+
}
188+
97189
console.error(`Error fetching feed from ${url}:`, error);
98190
return {
99191
feed: null,
@@ -102,6 +194,48 @@ export async function parseFeedFromUrl(url: string): Promise<{
102194
}
103195
}
104196

197+
/**
198+
* Parse a feed from a discovered URL (no further discovery attempts)
199+
*/
200+
async function parseFeedFromDiscoveredUrl(
201+
feedUrl: string,
202+
originalUrl: string,
203+
): Promise<{
204+
feed: CustomFeed | null;
205+
error: string | null;
206+
}> {
207+
try {
208+
// Check if it's a JSON Feed first
209+
const jsonFeed = await tryParseAsJSONFeed(feedUrl);
210+
if (jsonFeed) {
211+
return { feed: jsonFeed, error: null };
212+
} else {
213+
// Fall back to RSS parsing
214+
const feed = await parser.parseURL(feedUrl);
215+
return {
216+
feed: {
217+
...feed,
218+
items: feed.items.map((item: CustomItem) => ({
219+
...item,
220+
sourceFeedTitle: feed.title,
221+
sourceFeedUrl: feedUrl,
222+
})),
223+
},
224+
error: null,
225+
};
226+
}
227+
} catch (error) {
228+
console.error(
229+
`Error fetching discovered feed from ${feedUrl} (original: ${originalUrl}):`,
230+
error,
231+
);
232+
return {
233+
feed: null,
234+
error: error instanceof Error ? error.message : String(error),
235+
};
236+
}
237+
}
238+
105239
/**
106240
* Parse a feed from an XML string (RSS)
107241
*/

0 commit comments

Comments
 (0)