From e42288205afcd6121fc53d8ae8138964975b0ca3 Mon Sep 17 00:00:00 2001 From: jhonnatan gonzalez rodriguez Date: Fri, 9 Jun 2023 16:54:40 +0200 Subject: [PATCH] improve error handling --- auction.js | 264 ++++++++++++++++++++++++---------------------------- mailer.js | 3 +- proquote.js | 20 ++-- scrapper.js | 3 +- snapshot.js | 8 ++ utils.js | 3 + 6 files changed, 147 insertions(+), 154 deletions(-) create mode 100644 snapshot.js create mode 100644 utils.js diff --git a/auction.js b/auction.js index eb79a85..277220e 100644 --- a/auction.js +++ b/auction.js @@ -1,17 +1,17 @@ import playwright, { devices } from 'playwright'; import chromium from 'chrome-aws-lambda'; +import { isDev } from './utils'; export async function collectAuctions() { - const browser = - process.env.NODE_ENV === 'development' - ? await playwright.chromium.launch({ - headless: false, - }) - : await playwright.chromium.launch({ - args: chromium.args, - executablePath: await chromium.executablePath, - headless: chromium.headless, - }); + const browser = isDev() + ? await playwright.chromium.launch({ + headless: false, + }) + : await playwright.chromium.launch({ + args: chromium.args, + executablePath: await chromium.executablePath, + headless: chromium.headless, + }); const context = await browser.newContext(devices['Desktop Chrome']); const page = await context.newPage(); @@ -30,169 +30,148 @@ export async function collectAuctions() { const endedAuctionsBtn = page.locator('#parent-radio-ended_auctions'); await endedAuctionsBtn.waitFor({ state: 'attached' }); - endedAuctionsBtn.evaluate((node) => node.click()); + await endedAuctionsBtn.evaluate((node) => node.click()); const makeOfferBtn = page.locator('label').filter({ hasText: 'Make Offer' }); await makeOfferBtn.waitFor({ state: 'visible' }); - makeOfferBtn.evaluate((node) => node.click()); + await makeOfferBtn.evaluate((node) => node.click()); const filterToggle = page.locator('#saved-search-261859'); await filterToggle.waitFor({ state: 'attached' }); await filterToggle.evaluate((node) => node.click()); - const totalResultsText = await page - .locator('.result-number', { - hasNotText: '0 Results Total Match Your 1 Saved Search', - }) - .innerText(); - - const total = parseInt(totalResultsText.split(' ')[0]); - console.log('::: Total results :::'); - console.log(total); - - const auctionIds = []; - const LIMIT = 120000; + const auctionIds = new Set(); + const LIMIT = 30; const now = Date.now(); let shouldCollect = true; while (shouldCollect) { - const responsePromise = page.waitForResponse( - 'https://easy-pass.acvauctions.com/bff/filters/auctions/buying/ended' - ); - - await page.evaluate(async () => { - window.scrollTo(0, document.body.scrollHeight); - }); - - const response = await responsePromise.catch(() => null); - - if (!response) { - continue; - } + try { + for (let car of await page.locator('.acv-infinite-scroller-item').all()) { + const link = await car.locator('a'); + const href = await link.getAttribute('href'); + auctionIds.add(href.match(/\d+/)[0]); + } - if (response.status() === 200) { - const { - data: { results }, - } = await response.json(); - console.log(results.length); + console.log('::: auctionIds.length :::'); + console.log(auctionIds.size); - auctionIds.push(...results.map((result) => result.id)); - } else { - shouldCollect = false; - } + await page.evaluate(async () => { + window.scrollTo(0, window.scrollY + 1000); - if (auctionIds.length === total) { - shouldCollect = false; + const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + await delay(500); + }); + } catch (e) { + await context.close(); + await browser.close(); + throw new Error(e); } + console.log('::: timer :::'); + console.log(Math.floor((Date.now() - now) / 1000)); if (Math.floor((Date.now() - now) / 1000) > LIMIT) { - throw new Error('Timeout to collect auctions'); + shouldCollect = false; } } await context.close(); await browser.close(); - return auctionIds; + return [...auctionIds]; } const getCarModel = async (auctionId, page) => { - try { - await page.goto(`https://app.acvauctions.com/auction/${auctionId}`); - - const propertiesToSave = [ - 'city', - 'vin', - 'odometer', - 'auction id', - 'auction date', - 'make', - 'model', - 'year', - 'color', - ]; - - let carModel = {}; - - const title = page.locator('.vehicle-header-summary__name'); - await title.waitFor({ - state: 'visible', - }); - - carModel.title = await title.innerText(); - - carModel.condition = []; - - const condition = await page.locator('.condition-report'); - const isInoperable = await condition.getByText( - /vehicle inop \(does not move\)/i - ); + await page.goto(`https://app.acvauctions.com/auction/${auctionId}`); + + const propertiesToSave = [ + 'city', + 'vin', + 'odometer', + 'auction id', + 'auction date', + 'make', + 'model', + 'year', + 'color', + ]; + + let carModel = {}; + + const title = page.locator('.vehicle-header-summary__name'); + await title.waitFor({ + state: 'visible', + }); - if ((await isInoperable.count()) === 1) { - carModel.condition.push('isInoperable'); - } + carModel.title = await title.innerText(); - const doesNotStart = await condition.getByText( - /engine cranks\, does not start/i - ); + carModel.condition = []; - if ((await doesNotStart.count()) === 1) { - carModel.condition.push('doesNotStart'); - } + const condition = await page.locator('.condition-report'); + const isInoperable = await condition.getByText( + /vehicle inop \(does not move\)/i + ); - const price = await page.locator('.price').first(); - carModel.price = parseInt( - (await price.innerText()).replace('$', '').replace(',', '') - ); + if ((await isInoperable.count()) === 1) { + carModel.condition.push('isInoperable'); + } + + const doesNotStart = await condition.getByText( + /engine cranks\, does not start/i + ); + + if ((await doesNotStart.count()) === 1) { + carModel.condition.push('doesNotStart'); + } - const details = await page.locator('.auction-vehicle-details'); - - const tableRows = await details.locator('tr').all(); - - for (const row of tableRows) { - const label = (await row.locator('.left').innerText()).toLowerCase(); - const value = await row.locator('.right').innerText(); - - if (propertiesToSave.includes(label)) { - if (label === 'odometer') { - carModel[label] = { - type: 'miles', - value: - value === 'True Mileage Unknown' - ? -1 - : parseInt(value.replace(',', '')), - }; - } else if (label === 'year') { - carModel[label] = parseInt(value); - } else if (label === 'auction date') { - carModel.auctionDate = value; - } else if (label === 'auction id') { - carModel.auctionId = parseInt(value); - } else { - carModel[label] = value; - } + const price = await page.locator('.price').first(); + carModel.price = parseInt( + (await price.innerText()).replace('$', '').replace(',', '') + ); + + const details = await page.locator('.auction-vehicle-details'); + + const tableRows = await details.locator('tr').all(); + + for (const row of tableRows) { + const label = (await row.locator('.left').innerText()).toLowerCase(); + const value = await row.locator('.right').innerText(); + + if (propertiesToSave.includes(label)) { + if (label === 'odometer') { + carModel[label] = { + type: 'miles', + value: + value === 'True Mileage Unknown' + ? -1 + : parseInt(value.replace(',', '')), + }; + } else if (label === 'year') { + carModel[label] = parseInt(value); + } else if (label === 'auction date') { + carModel.auctionDate = value; + } else if (label === 'auction id') { + carModel.auctionId = parseInt(value); + } else { + carModel[label] = value; } } - - return carModel; - } catch (e) { - console.log(e); - return null; } + + return carModel; }; export async function scrapAuctions(auctionIds) { - const browser = - process.env.NODE_ENV === 'development' - ? await playwright.chromium.launch({ - headless: false, - }) - : await playwright.chromium.launch({ - args: chromium.args, - executablePath: await chromium.executablePath, - headless: chromium.headless, - }); + const browser = isDev() + ? await playwright.chromium.launch({ + headless: false, + }) + : await playwright.chromium.launch({ + args: chromium.args, + executablePath: await chromium.executablePath, + headless: chromium.headless, + }); const context = await browser.newContext(devices['Desktop Chrome']); const page = await context.newPage(); @@ -206,19 +185,20 @@ export async function scrapAuctions(auctionIds) { .fill(process.env.ACV_AUCTIONS_PASS); await page.getByRole('button', { name: /log in/i }).click(); - const endedAuctionsBtn = page.locator('#parent-radio-ended_auctions'); - await endedAuctionsBtn.waitFor({ state: 'attached' }); + await page.waitForURL('https://app.acvauctions.com/search?l=live'); const cars = []; for (const auctionId of auctionIds) { - // TODO - remove to handle all records - // if (cars.length === 35) break; - - const car = await getCarModel(auctionId, page); + try { + const car = await getCarModel(auctionId, page); - if (car) { - cars.push(car); + if (car) { + cars.push(car); + } + } catch (e) { + console.log('::: scrapAuctions :::'); + console.log(e); } } diff --git a/mailer.js b/mailer.js index 8369222..6520192 100644 --- a/mailer.js +++ b/mailer.js @@ -1,7 +1,8 @@ import nodemailer from 'nodemailer'; +import { isDev } from './utils'; function getTransport() { - if (process.env.NODE_ENV === 'development') { + if (isDev()) { return nodemailer.createTransport({ host: process.env.NODEMAILER_HOST, port: 2525, diff --git a/proquote.js b/proquote.js index c2fe791..ed4718a 100644 --- a/proquote.js +++ b/proquote.js @@ -1,17 +1,17 @@ import playwright, { devices } from 'playwright'; import chromium from 'chrome-aws-lambda'; +import { isDev } from './utils'; export async function proQuoteCar(car) { - const browser = - process.env.NODE_ENV === 'development' - ? await playwright.chromium.launch({ - headless: false, - }) - : await playwright.chromium.launch({ - args: chromium.args, - executablePath: await chromium.executablePath, - headless: chromium.headless, - }); + const browser = isDev() + ? await playwright.chromium.launch({ + headless: false, + }) + : await playwright.chromium.launch({ + args: chromium.args, + executablePath: await chromium.executablePath, + headless: chromium.headless, + }); const context = await browser.newContext(devices['Desktop Chrome']); const page = await context.newPage(); let avgValue; diff --git a/scrapper.js b/scrapper.js index d67dc65..fb330fe 100644 --- a/scrapper.js +++ b/scrapper.js @@ -1,10 +1,11 @@ -import dotenv from 'dotenv'; import { sendReport } from './mailer.js'; import { collectAuctions, scrapAuctions } from './auction.js'; import { getViableCars } from './proquote.js'; export async function init() { const auctionIds = await collectAuctions(); + console.log('::: auctionIds :::'); + console.log(auctionIds); const carsToCompare = await scrapAuctions(auctionIds); const viableCars = await getViableCars(carsToCompare); await sendReport(viableCars); diff --git a/snapshot.js b/snapshot.js new file mode 100644 index 0000000..18f362f --- /dev/null +++ b/snapshot.js @@ -0,0 +1,8 @@ +import { Storage } from '@google-cloud/storage'; + +const storage = new Storage(); +const bucket = storage.bucket('auctions-screenshots'); + +export async function takeScreenshot(name) { + await bucket.file(name).save(await page.screenshot()); +} diff --git a/utils.js b/utils.js new file mode 100644 index 0000000..c867c0d --- /dev/null +++ b/utils.js @@ -0,0 +1,3 @@ +export function isDev() { + return process.env.NODE_ENV === 'development'; +}