From 66a6f919c6b939f7875d44e3e83b64aa9c6a6291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=B3ricz=20Gerg=C5=91?= Date: Wed, 6 Nov 2024 23:55:05 +0100 Subject: [PATCH] fixes --- apps/api/src/controllers/v1/types.ts | 4 +++- .../scrapeURL/engines/fire-engine/index.ts | 1 + .../scrapeURL/engines/fire-engine/scrape.ts | 1 + apps/api/src/scraper/scrapeURL/engines/index.ts | 17 +++++++++++------ apps/api/src/scraper/scrapeURL/index.ts | 15 +++++++++++++-- 5 files changed, 29 insertions(+), 9 deletions(-) diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 87e989bad..69ffcdfe1 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -505,8 +505,10 @@ export function fromLegacyScrapeOptions(pageOptions: PageOptions, extractorOptio }), internalOptions: { atsv: pageOptions.atsv, + v0DisableJsDom: pageOptions.disableJsDom, + v0UseFastMode: pageOptions.useFastMode, }, - // TODO: fallback, fetchPage Content, replaceAllPathsWithAbsolutePaths, includeLinks, useFastMode, disableJsDom + // TODO: fallback, fetchPage Content, replaceAllPathsWithAbsolutePaths, includeLinks } } diff --git a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts index bd994cbf8..c8ea6aefa 100644 --- a/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/fire-engine/index.ts @@ -176,6 +176,7 @@ export async function scrapeURLWithFireEngineTLSClient(meta: Meta): Promise= 1; - const isGoodStatusCode = engineResult.statusCode < 300; + const isLongEnough = engineResult.markdown.length >= 20; + const isGoodStatusCode = (engineResult.statusCode >= 200 && engineResult.statusCode < 300) || engineResult.statusCode === 304; const hasNoPageError = engineResult.error === undefined; results[engine] = {