-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
66 lines (52 loc) · 2.51 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import time
from bs4 import BeautifulSoup
from base_parser import BaseParser
from confix import *
from mixin import ProductDeteilMixin
class CategoryParser(BaseParser, ProductDeteilMixin):
def __init__(self):
super(CategoryParser, self).__init__()
self.DATE = {}
def category_block_parser(self, html):
soup = BeautifulSoup(html, 'html.parser')
category_links = soup.find_all('a', class_='category__link')
for category in category_links:
category_title = category.find('h2', class_='content__title').get_text(strip=True)
print(style.BLUE + category_title)
self.DATE[category_title] = []
category_link = self.host + category.get('href')
print(category_link)
category_page = self.get_html(category_link)
self.category_page_parser(category_page, category_title)
def category_page_parser(self, category_page, category_title):
soup = BeautifulSoup(category_page, 'html.parser')
section = soup.find('div', class_='products-box')
products = section.find_all('div', class_='product-item-wrapper')
for product in products[:3]:
product_name = product.find('a', class_='product-name').get_text(strip=True)
print(style.RED + product_name)
product_price = product.find('div', class_='d-flex align-center justify-between w-full').get_text(
strip=True)
print(style.YELLOW + product_price)
product_link = self.host + product.find('a', class_='product-link').get('href')
print(product_link)
product_detail_page = self.get_html(product_link)
characteristics = self.get_deteil_info(product_detail_page)
self.DATE[category_title].append({
'product_name': product_name,
'product_price': product_price,
'product_link': product_link,
'characteristics': characteristics
})
def start_category_parsing():
parser = CategoryParser()
category = input('Введите категорию: ')
category_link = 'https://texnomart.uz/ru/katalog/' + category
print('Парсер начал работу')
start = time.time()
html = parser.get_html(category_link)
parser.category_block_parser(html)
parser.save_date_to_json(category, parser.DATE)
finish = time.time()
print(f'Парсер завершил работу за {round(finish - start, 2)} секунд')
start_category_parsing()