-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathhtml_downloader.py
27 lines (23 loc) · 912 Bytes
/
html_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# -*- coding: utf-8 -*-
import urllib.request
class HtmlDownloader(object):
def download(self, new_url):
if new_url is None:
return None
request = urllib.request.Request(new_url[2], headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36'})
try:
response = urllib.request.urlopen(request)
except urllib.error.HTTPError as e:
print(e.getcode())
print(e.reason)
print(e.geturl())
print('-----------------')
print(e.info())
print(e.read())
return None
except Exception as e:
print('exception:\nurl: %s\ntype: %r\nexcept: %r' % (new_url[2], type(e), e))
return None
if response.getcode() != 200:
return None
return response.read()