@@ -17,6 +17,89 @@ const parser = new Parser({
1717
1818const GENERATOR = "rssrssrssrss" ;
1919const FEED_TITLE = "Merged Feed" ;
20+ const USER_AGENT = "rssrssrssrss (https://rssrssrssrss.com)" ;
21+
22+ /**
23+ * Try to discover RSS/Atom feed URLs from an HTML page by looking for
24+ * <link rel="alternate"> tags with feed content types.
25+ */
26+ export async function discoverFeedFromHtml (
27+ url : string ,
28+ ) : Promise < string | null > {
29+ try {
30+ const response = await fetch ( url , {
31+ headers : {
32+ "User-Agent" : USER_AGENT ,
33+ Accept : "text/html, application/xhtml+xml, */*" ,
34+ } ,
35+ } ) ;
36+
37+ const contentType = response . headers . get ( "content-type" ) || "" ;
38+
39+ // Only try to discover feeds from HTML pages
40+ if (
41+ ! contentType . includes ( "text/html" ) &&
42+ ! contentType . includes ( "application/xhtml" )
43+ ) {
44+ return null ;
45+ }
46+
47+ const html = await response . text ( ) ;
48+
49+ // Look for <link rel="alternate" type="application/rss+xml" href="...">
50+ // or <link rel="alternate" type="application/atom+xml" href="...">
51+ // or <link rel="alternate" type="application/feed+json" href="...">
52+ const feedLinkRegex = / < l i n k [ ^ > ] * r e l = [ " ' ] a l t e r n a t e [ " ' ] [ ^ > ] * > / gi;
53+ const matches = html . match ( feedLinkRegex ) ;
54+
55+ if ( ! matches ) {
56+ return null ;
57+ }
58+
59+ for ( const linkTag of matches ) {
60+ const typeMatch = linkTag . match ( / t y p e = [ " ' ] ( [ ^ " ' ] + ) [ " ' ] / i) ;
61+ const hrefMatch = linkTag . match ( / h r e f = [ " ' ] ( [ ^ " ' ] + ) [ " ' ] / i) ;
62+
63+ if ( ! typeMatch || ! hrefMatch ) {
64+ continue ;
65+ }
66+
67+ const type = typeMatch [ 1 ] . toLowerCase ( ) ;
68+ const href = hrefMatch [ 1 ] ;
69+
70+ // Check if it's a feed type
71+ if (
72+ type . includes ( "application/rss+xml" ) ||
73+ type . includes ( "application/atom+xml" ) ||
74+ type . includes ( "application/feed+json" ) ||
75+ type . includes ( "application/json" )
76+ ) {
77+ // Handle relative URLs
78+ if ( href . startsWith ( "http://" ) || href . startsWith ( "https://" ) ) {
79+ return href ;
80+ } else if ( href . startsWith ( "//" ) ) {
81+ // Protocol-relative URL
82+ const baseUrl = new URL ( url ) ;
83+ return `${ baseUrl . protocol } ${ href } ` ;
84+ } else if ( href . startsWith ( "/" ) ) {
85+ // Absolute path
86+ const baseUrl = new URL ( url ) ;
87+ return `${ baseUrl . origin } ${ href } ` ;
88+ } else {
89+ // Relative path
90+ const baseUrl = new URL ( url ) ;
91+ const pathParts = baseUrl . pathname . split ( "/" ) ;
92+ pathParts . pop ( ) ; // Remove the current page
93+ return `${ baseUrl . origin } ${ pathParts . join ( "/" ) } /${ href } ` ;
94+ }
95+ }
96+ }
97+
98+ return null ;
99+ } catch {
100+ return null ;
101+ }
102+ }
20103
21104// Helper function to try parsing as JSON Feed, returns null if not a JSON Feed
22105async function tryParseAsJSONFeed ( url : string ) : Promise < CustomFeed | null > {
@@ -68,6 +151,7 @@ async function tryParseAsJSONFeed(url: string): Promise<CustomFeed | null> {
68151
69152/**
70153 * Parse a feed from a URL (RSS or JSON Feed)
154+ * If the URL is not a feed, try to discover a feed from the HTML page.
71155 */
72156export async function parseFeedFromUrl ( url : string ) : Promise < {
73157 feed : CustomFeed | null ;
@@ -94,6 +178,14 @@ export async function parseFeedFromUrl(url: string): Promise<{
94178 } ;
95179 }
96180 } catch ( error ) {
181+ // If direct parsing failed, try to discover a feed from the HTML page
182+ const discoveredFeedUrl = await discoverFeedFromHtml ( url ) ;
183+ if ( discoveredFeedUrl ) {
184+ // Recursively try to parse the discovered feed URL
185+ // But pass a flag to avoid infinite recursion if the discovered URL also fails
186+ return parseFeedFromDiscoveredUrl ( discoveredFeedUrl , url ) ;
187+ }
188+
97189 console . error ( `Error fetching feed from ${ url } :` , error ) ;
98190 return {
99191 feed : null ,
@@ -102,6 +194,48 @@ export async function parseFeedFromUrl(url: string): Promise<{
102194 }
103195}
104196
197+ /**
198+ * Parse a feed from a discovered URL (no further discovery attempts)
199+ */
200+ async function parseFeedFromDiscoveredUrl (
201+ feedUrl : string ,
202+ originalUrl : string ,
203+ ) : Promise < {
204+ feed : CustomFeed | null ;
205+ error : string | null ;
206+ } > {
207+ try {
208+ // Check if it's a JSON Feed first
209+ const jsonFeed = await tryParseAsJSONFeed ( feedUrl ) ;
210+ if ( jsonFeed ) {
211+ return { feed : jsonFeed , error : null } ;
212+ } else {
213+ // Fall back to RSS parsing
214+ const feed = await parser . parseURL ( feedUrl ) ;
215+ return {
216+ feed : {
217+ ...feed ,
218+ items : feed . items . map ( ( item : CustomItem ) => ( {
219+ ...item ,
220+ sourceFeedTitle : feed . title ,
221+ sourceFeedUrl : feedUrl ,
222+ } ) ) ,
223+ } ,
224+ error : null ,
225+ } ;
226+ }
227+ } catch ( error ) {
228+ console . error (
229+ `Error fetching discovered feed from ${ feedUrl } (original: ${ originalUrl } ):` ,
230+ error ,
231+ ) ;
232+ return {
233+ feed : null ,
234+ error : error instanceof Error ? error . message : String ( error ) ,
235+ } ;
236+ }
237+ }
238+
105239/**
106240 * Parse a feed from an XML string (RSS)
107241 */
0 commit comments