Skip to content

Commit

Permalink
handle already processed cars, adding parallel scrappers with rate li…
Browse files Browse the repository at this point in the history
…miting
  • Loading branch information
xtatanx committed Jun 12, 2023
1 parent e422882 commit 7c64407
Show file tree
Hide file tree
Showing 10 changed files with 1,179 additions and 153 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,7 @@ dist
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*
.pnp.*

# Google cloud
.gcloudignore
82 changes: 22 additions & 60 deletions auction.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,7 @@
import playwright, { devices } from 'playwright';
import chromium from 'chrome-aws-lambda';
import { isDev } from './utils';

export async function collectAuctions() {
const browser = isDev()
? await playwright.chromium.launch({
headless: false,
})
: await playwright.chromium.launch({
args: chromium.args,
executablePath: await chromium.executablePath,
headless: chromium.headless,
});

const context = await browser.newContext(devices['Desktop Chrome']);
const page = await context.newPage();
import { closeBrowser, launchBrowser } from './browserController.js';
import { getProcessedAuction } from './data.js';

async function login(page) {
await page.goto('https://app.acvauctions.com/login');

await page
Expand All @@ -25,8 +11,12 @@ export async function collectAuctions() {
.getByRole('textbox', { name: /password/i })
.fill(process.env.ACV_AUCTIONS_PASS);
await page.getByRole('button', { name: /log in/i }).click();
}

// await page.waitForURL('**/search');
export async function collectAuctions() {
const { page, browser, context } = await launchBrowser();

await login(page);

const endedAuctionsBtn = page.locator('#parent-radio-ended_auctions');
await endedAuctionsBtn.waitFor({ state: 'attached' });
Expand All @@ -49,7 +39,7 @@ export async function collectAuctions() {
while (shouldCollect) {
try {
for (let car of await page.locator('.acv-infinite-scroller-item').all()) {
const link = await car.locator('a');
const link = await car.locator('a:not(.mail-to)');
const href = await link.getAttribute('href');
auctionIds.add(href.match(/\d+/)[0]);
}
Expand All @@ -64,8 +54,7 @@ export async function collectAuctions() {
await delay(500);
});
} catch (e) {
await context.close();
await browser.close();
await closeBrowser(browser, context);
throw new Error(e);
}

Expand All @@ -76,8 +65,7 @@ export async function collectAuctions() {
}
}

await context.close();
await browser.close();
await closeBrowser(browser, context);

return [...auctionIds];
}
Expand Down Expand Up @@ -163,60 +151,34 @@ const getCarModel = async (auctionId, page) => {
};

export async function scrapAuctions(auctionIds) {
const browser = isDev()
? await playwright.chromium.launch({
headless: false,
})
: await playwright.chromium.launch({
args: chromium.args,
executablePath: await chromium.executablePath,
headless: chromium.headless,
});
const context = await browser.newContext(devices['Desktop Chrome']);
const page = await context.newPage();
const { page, browser, context } = await launchBrowser();

await page.goto('https://app.acvauctions.com/login');

await page
.getByRole('textbox', { name: /email address/i })
.fill(process.env.ACV_AUCTIONS_USER);
await page
.getByRole('textbox', { name: /password/i })
.fill(process.env.ACV_AUCTIONS_PASS);
await page.getByRole('button', { name: /log in/i }).click();
await login(page);

await page.waitForURL('https://app.acvauctions.com/search?l=live');

const cars = [];

for (const auctionId of auctionIds) {
try {
const car = await getCarModel(auctionId, page);
const isAlreadyProcessed = await getProcessedAuction(auctionId);
if (!isAlreadyProcessed) {
const car = await getCarModel(auctionId, page);

if (car) {
cars.push(car);
if (car) {
cars.push(car);
}
}
} catch (e) {
console.log('::: scrapAuctions :::');
console.log('::: scrapAuctions catch :::');
console.log(e);
}
}

const result = cars.filter((car) => {
return (
!car.condition.some((report) => {
return ['isInoperable', 'doesNotStart'].includes(report);
}) && car.odometer.value !== -1
);
});

await context.close();
await browser.close();
await closeBrowser(browser, context);

console.log(':::: srcapped cars :::::');
console.log(cars.length);
console.log(':::: cars to proQuote :::::');
console.log(result.length);

return result;
return cars;
}
25 changes: 25 additions & 0 deletions browserController.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import playwright from 'playwright';
import chromium from 'chrome-aws-lambda';
import { isDev } from './utils.js';

export async function launchBrowser() {
const browser = isDev()
? await playwright.chromium.launch({
headless: false,
})
: await playwright.chromium.launch({
args: chromium.args,
executablePath: await chromium.executablePath,
headless: chromium.headless,
});

const context = await browser.newContext();
const page = await context.newPage();

return { browser, context, page };
}

export async function closeBrowser(browser, context) {
await context.close();
await browser.close();
}
59 changes: 59 additions & 0 deletions data.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import { Firestore } from '@google-cloud/firestore';

const db = new Firestore({
projectId: 'auctions-388714',
});

export async function addProcessedAuctions(cars) {
console.log('::: adding processed auctions :::');
const batch = db.batch();

for (let car of cars) {
const carRef = db.collection('processedAuctions').doc(`${car.auctionId}`);
batch.set(carRef, { auctionId: car.auctionId });
}

return batch.commit();
}

export async function addPotentialCars(cars) {
console.log('::: adding processed potential cars :::');
const batch = db.batch();

for (let car of cars) {
const carRef = db.collection('potentialCars').doc(`${car.auctionId}`);
batch.set(carRef, car);
}

return batch.commit();
}

export async function getProcessedAuction(auctionId) {
console.log(`::: get processed auction ${auctionId} :::`);
const processedAuctionRef = db
.collection('processedAuctions')
.doc(`${auctionId}`);
const doc = await processedAuctionRef.get();

if (doc.exists) {
return doc.data();
} else {
return null;
}
}

export async function getAllProcessedAuctions() {
console.log('::: get all processed auctions :::');
const processedAuctionsRef = db.collection('processedAuctions');
const snapshot = await processedAuctionsRef.get();

return snapshot.docs.map((doc) => doc.data());
}

export async function getAllPotentialCars() {
console.log('::: get all potential cars :::');
const potentialCarsRef = db.collection('potentialCars');
const snapshot = await potentialCarsRef.get();

return snapshot.docs.map((doc) => doc.data());
}
4 changes: 1 addition & 3 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import dotenv from 'dotenv';
import 'dotenv/config';
import functions from '@google-cloud/functions-framework';
import { init } from './scrapper.js';

dotenv.config();

functions.http('initScrapping', async (_, res) => {
await init();
res.send('done');
Expand Down
46 changes: 24 additions & 22 deletions mailer.js
Original file line number Diff line number Diff line change
@@ -1,31 +1,33 @@
import nodemailer from 'nodemailer';
import { isDev } from './utils';
import { isDev } from './utils.js';

function getTransport() {
if (isDev()) {
return nodemailer.createTransport({
host: process.env.NODEMAILER_HOST,
port: 2525,
auth: {
user: process.env.MAIL_TRAP_USER,
pass: process.env.MAIL_TRAP_PASS,
},
});
}
const devOptions = {
host: process.env.NODEMAILER_HOST,
port: 2525,
auth: {
user: process.env.MAIL_TRAP_USER,
pass: process.env.MAIL_TRAP_PASS,
},
};

return nodemailer.createTransport({
service: 'gmail',
host: 'smtp.gmail.com',
port: 587,
secure: false,
auth: {
user: process.env.GMAIL_USER,
pass: process.env.GMAIL_PASS,
},
});
const options = {
service: 'gmail',
host: 'smtp.gmail.com',
port: 587,
secure: false,
auth: {
user: process.env.GMAIL_USER,
pass: process.env.GMAIL_PASS,
},
};

function getTransport() {
console.log(isDev() ? devOptions : options);
return nodemailer.createTransport(isDev() ? devOptions : options);
}

export async function sendReport(viableCars) {
console.log('::: sending mail report :::');
const transport = getTransport();

await transport.sendMail({
Expand Down
Loading

0 comments on commit 7c64407

Please sign in to comment.