-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.ts
195 lines (170 loc) · 5.38 KB
/
test.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import puppeteer from "puppeteer";
import {
LUN_BASE_URL,
PAGE_PARAMS,
browserLaunchOptions,
viewport,
} from "./config/config.js";
import {
clickOnListings,
collectPhoneNumber,
collectPhotos,
} from "./modules/lun/collect.js";
import { LUN_SELECTORS_INNER } from "./modules/lun/enums.js";
import { pagesToParse } from "./config/config.js";
import {
assert,
createJSONfile,
grabAllTextContents,
grabTextContent,
randomName,
} from "./utils/common.js";
(async () => {
console.time("time for execution");
let currentPage: number = 1;
const finalListings: object[] = [];
const browser = await puppeteer.launch(browserLaunchOptions);
const page = await browser.newPage();
await page.setViewport(viewport);
// Add a listener for the 'targetcreated' event
browser.on("targetcreated", async (target) => {
if (target.type() === "page") {
const newPage = await target.page();
assert(newPage);
await newPage!.bringToFront(); // Focus the new tab
await newPage!.setViewport(viewport);
const details = await grabAllTextContents(
newPage,
LUN_SELECTORS_INNER.details,
);
type OrganizedDetails = {
rooms: string;
area: string;
floor: string;
type: string;
heating: string;
builtYear: string;
material: string;
foundDate: string;
updatedDate: string;
ceilingHeight: string;
};
const organizedDetails: OrganizedDetails = {
rooms: "no data",
area: "no data",
floor: "no data",
type: "no data",
heating: "no data",
builtYear: "no data",
material: "no data",
foundDate: "no data",
updatedDate: "no data",
ceilingHeight: "no data",
};
if (Array.isArray(details)) {
details.forEach((item) => {
assert(item);
const roomsPattern = new RegExp("кімнат|комнат");
if (roomsPattern.test(item)) {
organizedDetails.rooms = item;
}
const areaPattern = new RegExp("м²");
if (areaPattern.test(item)) {
organizedDetails.area = item;
}
const floorPattern = new RegExp("этаж|поверх");
if (floorPattern.test(item)) {
organizedDetails.floor = item;
}
const typePattern = new RegExp(
"аппс|аппс-люкс|бпс|чеський проєкт|гостинка|хрущівка|дореволюційний|совмін|серія|спец. проєкт|сталінка|аппс|аппс-люкс|бпс|чешский проект|гостинка|хрущевка|дореволюционный|совмин|серия|спец. проект|сталинка",
);
if (typePattern.test(item)) {
organizedDetails.type = item;
}
const materialPattern = new RegExp(
"блочні|монолітно-каркасний|панельні|утеплена панель|цегляний будинок|блочные|монолитно-каркасные|панельные|утепленная панель|кирпичные",
);
if (materialPattern.test(item)) {
organizedDetails.material = item;
}
const builtYearPattern = new RegExp(
/(\d{4})\s+(Рік будівництва|год постройки)/i,
);
if (builtYearPattern.test(item)) {
const onlyYearPattern = /\b\d{4}\b/;
const match = item.match(onlyYearPattern);
if (match) {
organizedDetails.builtYear = match[0];
}
}
const ceilingHeightPattern = new RegExp(
/высота потолка|висота стелі/i,
);
if (ceilingHeightPattern.test(item)) {
organizedDetails.ceilingHeight = item;
}
const heatingPattern = /(отопление|опалення)/i;
if (heatingPattern.test(item)) {
organizedDetails.heating = item;
}
});
console.log(organizedDetails);
} else {
console.log("No data available");
}
const price = await grabTextContent(
newPage,
LUN_SELECTORS_INNER.price,
);
const address = await grabTextContent(
newPage,
LUN_SELECTORS_INNER.address,
);
const description = await grabTextContent(
newPage,
LUN_SELECTORS_INNER.description,
);
const phone = await collectPhoneNumber(
newPage,
LUN_SELECTORS_INNER.showFullPhoneNumberButton,
);
const photos = await collectPhotos(newPage);
const url = newPage.url();
const apartment = {
price: price,
details: organizedDetails,
description: description,
address: address,
photos: photos,
phone: phone,
url: url,
};
finalListings.push(apartment);
// console.log(apartment);
await newPage.close();
// const furniture = await grabTextContent(newPage, detailsSelector);
// console.log(furniture);
}
});
while (currentPage <= pagesToParse) {
const urlParams = new URLSearchParams(PAGE_PARAMS);
urlParams.set("page", currentPage.toString());
const pageUrl = `${LUN_BASE_URL}?${urlParams.toString()}`;
await page.goto(pageUrl);
console.log("Current page: ", currentPage);
// console.log(pageUrl);
await clickOnListings(page);
currentPage++;
}
// input поле для поиска
// await page.waitForSelector(LUN_SELECTORS_OUTER.searchInput);
// await page.type(LUN_SELECTORS_OUTER.searchInput, inputSearchQuery);
// await page.keyboard.press("Enter", {
// delay: 1000,
// });
createJSONfile(finalListings, `new-json-${randomName(5)}`);
console.timeEnd("time for execution");
console.log("Done!");
await browser.close();
})();