Skip to content

Commit 44b282c

Browse files
committed
Refactor CSSTarget model and enhance web scraping functionality with drill chain support; remove unused cookie dependencies.
1 parent b853ceb commit 44b282c

File tree

7 files changed

+443
-298
lines changed

7 files changed

+443
-298
lines changed

bun.lockb

-708 Bytes
Binary file not shown.

index.ts

Lines changed: 82 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import { file } from "bun";
2-
import { parse as parseCookie, serialize as serializeCookie } from "cookie";
3-
import * as cookieSignature from "cookie-signature";
42
import { existsSync, mkdirSync, unlink } from "fs";
53
import { readFile, readdir, writeFile } from "fs/promises";
64
import { Context, Hono } from "hono";
@@ -147,22 +145,6 @@ app.post("/", async (ctx) => {
147145

148146
const feedType = extract("feedType", "webScraping");
149147

150-
const buildCSSTarget = (prefix: string) => {
151-
const dateFormat = extract(`${prefix}Format`);
152-
const customDateFormat =
153-
dateFormat === "other" ? extract("customDateFormat") : undefined;
154-
155-
return new CSSTarget(
156-
extract(`${prefix}Selector`),
157-
extract(`${prefix}Attribute`),
158-
["on", true, "true"].includes(extract(`${prefix}StripHtml`)),
159-
extract(`${prefix}BaseUrl`),
160-
["on", true, "true"].includes(extract(`${prefix}RelativeLink`)),
161-
["on", true, "true"].includes(extract(`${prefix}TitleCase`)),
162-
extract(`${prefix}Iterator`),
163-
dateFormat === "other" ? customDateFormat : dateFormat
164-
);
165-
};
166148
const apiConfig: ApiConfig = {
167149
title: extract("feedName", "RSS Feed"),
168150
baseUrl: extract("feedUrl"),
@@ -191,11 +173,11 @@ app.post("/", async (ctx) => {
191173
feedType === "webScraping"
192174
? {
193175
iterator: new CSSTarget(extract("itemSelector")),
194-
title: buildCSSTarget("title"),
195-
description: buildCSSTarget("description"),
196-
link: buildCSSTarget("link"),
197-
enclosure: buildCSSTarget("enclosure"),
198-
date: buildCSSTarget("date"),
176+
title: buildCSSTarget("title", body),
177+
description: buildCSSTarget("description", body),
178+
link: buildCSSTarget("link", body),
179+
enclosure: buildCSSTarget("enclosure", body),
180+
date: buildCSSTarget("date", body),
199181
headers: extract("headers"),
200182
}
201183
: {},
@@ -240,24 +222,6 @@ app.post("/preview", async (ctx) => {
240222
const extract = (key: string, fallback: any = undefined) =>
241223
jsonData[key] ?? fallback;
242224

243-
const buildCSSTarget = (prefix: string) => {
244-
const dateFormat = extract(`${prefix}Format`);
245-
const customDateFormat =
246-
dateFormat === "other" ? extract("customDateFormat") : undefined;
247-
248-
return new CSSTarget(
249-
extract(`${prefix}Selector`),
250-
extract(`${prefix}Attribute`),
251-
["on", true, "true"].includes(extract(`${prefix}StripHtml`)),
252-
extract(`${prefix}BaseUrl`),
253-
["on", true, "true"].includes(extract(`${prefix}RelativeLink`)),
254-
["on", true, "true"].includes(extract(`${prefix}TitleCase`)),
255-
extract(`${prefix}Iterator`),
256-
// Pass either the standard date format or the custom format
257-
dateFormat === "other" ? customDateFormat : dateFormat
258-
);
259-
};
260-
261225
const feedType = extract("feedType", "webScraping");
262226

263227
const apiConfig: ApiConfig = {
@@ -288,12 +252,12 @@ app.post("/preview", async (ctx) => {
288252
feedType === "webScraping"
289253
? {
290254
iterator: new CSSTarget(extract("itemSelector")),
291-
title: buildCSSTarget("title"),
292-
description: buildCSSTarget("description"),
293-
link: buildCSSTarget("link"),
294-
author: buildCSSTarget("author"),
295-
date: buildCSSTarget("date"),
296-
enclosure: buildCSSTarget("enclosure"),
255+
title: buildCSSTarget("title", jsonData),
256+
description: buildCSSTarget("description", jsonData),
257+
link: buildCSSTarget("link", jsonData),
258+
author: buildCSSTarget("author", jsonData),
259+
date: buildCSSTarget("date", jsonData),
260+
enclosure: buildCSSTarget("enclosure", jsonData),
297261
}
298262
: {},
299263
apiMapping:
@@ -542,6 +506,77 @@ app.get("privacy-policy", (ctx) =>
542506
)
543507
);
544508

509+
function buildCSSTarget(prefix: string, body: Record<string, any>): CSSTarget {
510+
const extract = (k: string) => (body[k]?.toString() ?? "");
511+
512+
const dateFormat = extract(`${prefix}Format`);
513+
const customDateFormat = dateFormat === "other" ? extract("customDateFormat") : undefined;
514+
515+
const target = new CSSTarget(
516+
extract(`${prefix}Selector`),
517+
extract(`${prefix}Attribute`),
518+
["on", "true", true].includes(extract(`${prefix}StripHtml`)),
519+
extract(`${prefix}BaseUrl`),
520+
["on", "true", true].includes(extract(`${prefix}RelativeLink`)),
521+
["on", "true", true].includes(extract(`${prefix}TitleCase`)),
522+
extract(`${prefix}Iterator`),
523+
dateFormat === "other" ? customDateFormat : dateFormat
524+
);
525+
526+
// Parse the chain
527+
target.drillChain = parseDrillChain(prefix, body);
528+
return target;
529+
}
530+
531+
function parseDrillChain(
532+
prefix: string,
533+
body: Record<string, any>
534+
): Array<{
535+
selector: string;
536+
attribute: string;
537+
isRelative: boolean;
538+
baseUrl: string;
539+
}> {
540+
const key = `${prefix}DrillChain`;
541+
const rawChain = body[key];
542+
543+
if (Array.isArray(rawChain)) {
544+
return rawChain.map((step) => ({
545+
selector: step.selector ?? "",
546+
attribute: step.attribute ?? "",
547+
isRelative: ["on", "true", true].includes(step.isRelative),
548+
baseUrl: step.baseUrl ?? "",
549+
}));
550+
}
551+
552+
const chainSteps = [];
553+
const chainKeyRegex = new RegExp(`^${key}\\[(\\d+)\\]\\[(.*?)\\]$`);
554+
const tempStore: Record<string, Record<string, string>> = {};
555+
556+
for (const key of Object.keys(body)) {
557+
const match = chainKeyRegex.exec(key);
558+
if (match) {
559+
const index = match[1];
560+
const fieldName = match[2];
561+
if (!tempStore[index]) tempStore[index] = {};
562+
tempStore[index][fieldName] = body[key];
563+
}
564+
}
565+
566+
const sortedKeys = Object.keys(tempStore).sort((a, b) => parseInt(a) - parseInt(b));
567+
for (const idx of sortedKeys) {
568+
const row = tempStore[idx];
569+
chainSteps.push({
570+
selector: row.selector ?? "",
571+
attribute: row.attribute ?? "",
572+
isRelative: ["on", "true", true].includes(row.isRelative),
573+
baseUrl: row.baseUrl ?? "",
574+
});
575+
}
576+
577+
return chainSteps;
578+
}
579+
545580
function initializeWorker(feedConfig: any) {
546581
feedUpdaters.set(
547582
feedConfig.feedId,

models/csstarget.model.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,10 @@ export default class CSSTarget {
88
public titleCase?: boolean,
99
public iterator?: string,
1010
public dateFormat?: string,
11-
) {}
11+
public drillChain?: Array<{
12+
selector: string;
13+
attribute: string;
14+
isRelative: boolean;
15+
baseUrl: string;
16+
}>) {}
1217
}

package.json

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
"bun": "^1.1.33",
1313
"bun-types": "^0.1.0",
1414
"cheerio": "^1.0.0",
15-
"cookie": "^1.0.1",
16-
"cookie-signature": "^1.2.2",
1715
"dayjs": "^1.11.13",
1816
"hono": "^4.6.8",
1917
"hono-sessions": "^0.7.0",

0 commit comments

Comments
 (0)