Skip to content

Commit

Permalink
Merge branch 'test/e2e-tests-for-all-parameters' into mog/webscraper-…
Browse files Browse the repository at this point in the history
…refactor
  • Loading branch information
nickscamara committed Nov 7, 2024
2 parents 49801ac + ec78240 commit cc89094
Showing 1 changed file with 143 additions and 37 deletions.
180 changes: 143 additions & 37 deletions apps/api/src/__tests__/e2e_v1_withAuth_all_params/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,23 @@ const E2E_TEST_SERVER_URL = "http://firecrawl-e2e-test.vercel.app"; // @rafaelsi

describe("E2E Tests for v1 API Routes", () => {

it.concurrent('should return a successful response for a scrape with 403 page', async () => {
const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
.post('/v1/scrape')
.set('Authorization', `Bearer ${process.env.TEST_API_KEY}`)
.set('Content-Type', 'application/json')
.send({ url: 'https://httpstat.us/403' });

expect(response.statusCode).toBe(200);
expect(response.body).toHaveProperty('data');
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
expect(response.body.data).toHaveProperty('markdown');
expect(response.body.data).toHaveProperty('metadata');
expect(response.body.data.metadata.statusCode).toBe(403);
}, 30000);

it.concurrent("should handle 'formats:markdown (default)' parameter correctly",
async () => {
const scrapeRequest = {
Expand All @@ -35,6 +52,7 @@ describe("E2E Tests for v1 API Routes", () => {
expect(response.body.data.markdown).toContain("Content with id #content-1");
expect(response.body.data.markdown).toContain("Loading...");
expect(response.body.data.markdown).toContain("Click me!");
expect(response.body.data.markdown).toContain("Power your AI apps with clean data crawled from any website. It's also open-source."); // firecrawl.dev inside an iframe
expect(response.body.data.markdown).toContain("This content loads only when you see it. Don't blink! 👼"); // the browser always scroll to the bottom
expect(response.body.data.markdown).not.toContain("Header"); // Only main content is returned by default
expect(response.body.data.markdown).not.toContain("footer"); // Only main content is returned by default
Expand Down Expand Up @@ -102,7 +120,7 @@ describe("E2E Tests for v1 API Routes", () => {
// - TODO: tests for screenshot@fullPage

it.concurrent("should handle 'headers' parameter correctly", async () => {
//@ts-ignore
// @ts-ignore
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
headers: { "e2e-header-test": "firecrawl" }
Expand Down Expand Up @@ -335,32 +353,29 @@ describe("E2E Tests for v1 API Routes", () => {
},
60000);

// it.concurrent("should handle 'removeBase64Images' parameter correctly",
// async () => {
// const scrapeRequest: ScrapeRequest = {
// url: "",
// removeBase64Images: false
// };
it.concurrent("should handle 'removeBase64Images' parameter correctly",
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
removeBase64Images: true
} as ScrapeRequest;

// const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
// .post("/v1/scrape")
// .set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
// .set("Content-Type", "application/json")
// .send(scrapeRequest);
const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(scrapeRequest);

// expect(response.statusCode).toBe(200);
// // Add assertions to verify removeBase64Images is handled correctly
// },
// 30000);
expect(response.statusCode).toBe(200);
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
console.log(response.body.data.markdown)
// - TODO: not working for every image
// expect(response.body.data.markdown).toContain("Image-Removed");
},
30000);

// actions:
// -[x] Wait
// -[ ] Screenshot
// -[x] Click
// -[ ] Write text
// -[ ] Press a key
// -[ ] Scroll
// -[ ] Scrape
it.concurrent("should handle 'action wait' parameter correctly",
async () => {
const scrapeRequest = {
Expand Down Expand Up @@ -392,8 +407,34 @@ describe("E2E Tests for v1 API Routes", () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
actions: [{
type: "wait",
milliseconds: 10000
type: "screenshot"
}]
} as ScrapeRequest;

const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(scrapeRequest);

expect(response.statusCode).toBe(200);
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
expect(response.body.data.actions?.screenshots[0].length).toBeGreaterThan(0);
expect(response.body.data.actions?.screenshots[0]).toContain("https://service.firecrawl.dev/storage/v1/object/public/media/screenshot-");

// TODO compare screenshot with expected screenshot
},
30000);

it.concurrent("should handle 'action screenshot@fullPage' parameter correctly",
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
actions: [{
type: "screenshot",
fullPage: true
}]
} as ScrapeRequest;

Expand All @@ -407,8 +448,11 @@ describe("E2E Tests for v1 API Routes", () => {
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
// expect(response.body.data.markdown).not.toContain("Click me!");
// expect(response.body.data.markdown).toContain("Text changed after click!");
console.log(response.body.data.actions?.screenshots[0])
expect(response.body.data.actions?.screenshots[0].length).toBeGreaterThan(0);
expect(response.body.data.actions?.screenshots[0]).toContain("https://service.firecrawl.dev/storage/v1/object/public/media/screenshot-");

// TODO compare screenshot with expected full page screenshot
},
30000);

Expand Down Expand Up @@ -437,20 +481,19 @@ describe("E2E Tests for v1 API Routes", () => {
},
30000);

// -[ ] Write text
it.concurrent("should handle 'action write' parameter correctly",
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
formats: ["markdown", "html"],
formats: ["html"],
actions: [{
type: "write",
text: "Hello, world!",
type: "click",
selector: "#input-1"
},
{
type: "screenshot"
}
type: "write",
text: "Hello, world!"
}
]} as ScrapeRequest;

const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
Expand All @@ -463,9 +506,72 @@ describe("E2E Tests for v1 API Routes", () => {
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
console.log(response.body.data)
// expect(response.body.data.markdown).not.toContain("Click me!");
// expect(response.body.data.markdown).toContain("Text changed after click!");

expect(response.body.data.html).toContain("<input id=\"input-1\" type=\"text\" placeholder=\"Enter text here...\" style=\"padding:8px;margin:10px;border:1px solid #ccc;border-radius:4px;background-color:#000\" value=\"Hello, world!\">");
},
30000);

it.concurrent("should handle 'action pressKey' parameter correctly",
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
formats: ["markdown"],
actions: [
{
type: "press",
key: "ArrowDown"
}
]
} as ScrapeRequest;

const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(scrapeRequest);

expect(response.statusCode).toBe(200);
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
expect(response.body.data.markdown).toContain("Last Key Clicked: ArrowDown")
},
30000);

it.concurrent("should handle 'action scroll' parameter correctly",
async () => {
const scrapeRequest = {
url: E2E_TEST_SERVER_URL,
formats: ["markdown"],
actions: [
{
type: "click",
selector: "#scroll-bottom-loader"
},
{
type: "scroll",
direction: "down",
amount: 2000
}
]
} as ScrapeRequest;

const response: ScrapeResponseRequestTest = await request(FIRECRAWL_API_URL)
.post("/v1/scrape")
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
.set("Content-Type", "application/json")
.send(scrapeRequest);

console.log(response.body)
expect(response.statusCode).toBe(200);
if (!("data" in response.body)) {
throw new Error("Expected response body to have 'data' property");
}
console.log(response.body.data.markdown)
expect(response.body.data.markdown).toContain("Last Key Clicked: ArrowDown")
},
30000);

// TODO: test scrape action

});

0 comments on commit cc89094

Please sign in to comment.