-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathapp.py
46 lines (35 loc) · 1.54 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
__author__ = 'nhat'
from scrapy.utils.project import get_project_settings
from scrapy_service.spiders.product_spider_object_type_html import product_spider_websosanh
from scrapy_service.spiders.product_spider_object_type_html import product_spider_compare
from scrapy_service.spiders.product_spider_object_type_html import product_spider_cdiscount
from scrapy_service.spiders.product_spider_object_type_html import product_spider_hotdeal
from scrapy_service.spiders.product_spider_object_type_html import product_spider_cungmua
from scrapy_service.spiders.product_spider_object_type_xml import product_spider_lazada
from twisted.internet import reactor, defer
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
import threading
import time
# creat asynchronous function with decorator
@defer.inlineCallbacks
def crawl():
yield runner.crawl(product_spider_hotdeal)
reactor.stop()
# Because preventing the scrapy run too long time, we will set time to stop
def stop_crawler(hour=4):
time.sleep(hour*60*60)
reactor.stop()
if __name__ == '__main__':
try:
# Get cunrrent settings in settings.py
settings = get_project_settings()
runner = CrawlerRunner(settings)
# Set time to stop after x hours
# stop_threading = threading.Thread(target=stop_crawler, args=(4,))
# stop_threading.start()
crawl()
reactor.run() # the script will block here until the last crawl call is finished
except Exception as e:
print e
pass