-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
54 lines (45 loc) · 2.14 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os.path
import requests
from bs4 import BeautifulSoup
from Crawler.Zhihu_Crawler import login
class Zhihu():
def __init__(self):
self.headers = {'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
'Host': 'www.zhihu.com'}
zh = login.login('15225480602', '1998925sk')
self.session = requests.session()
self.session = zh.main()
self.title = ''
def getImage(self, pageUrl):
response = self.session.get(pageUrl, headers=self.headers)
html = BeautifulSoup(response.text, 'lxml')
self.title = html.find('span', class_='zm-editable-content').string
answers = html.find_all('div', class_='zm-item-answer')
for answer in answers:
if answer.find('img', class_='origin_image zh-lightbox-thumb lazy'):
self.parse(answer, self.title)
else:
pass
def parse(self, item, title):
author = item.find('a', class_='author-link').string
images = item.find_all('img', class_='origin_image zh-lightbox-thumb lazy')
image_list = [n.get('data-actualsrc') for n in images]
self.Download(image_list, author, title)
def Download(self, list, author, title):
num = 0
for i in list:
num = num + 1
print('正在下载《%s》问题下的%s的第%d张图片' % (title, author, num))
temp = i.split('/')
content = self.session.get(i)
if not os.path.exists('E:\\Python3\\Crawler\\Zhihu_Crawler\pic\\' + title + '\\' + author):
os.makedirs('E:\\Python3\\Crawler\\Zhihu_Crawler\\pic\\' + title + '\\' + author)
with open('E:\\Python3\\Crawler\\Zhihu_Crawler\\pic\\' + title + '\\' + author + '\\' + str(temp[3]),
'wb+') as file:
file.write(content.content)
print('《%s》问题下答主%s的图片已下载结束,共%d张图片' % (title, author, num))
if __name__ == "__main__":
answer = input('输入问题编号:')
url = 'https://www.zhihu.com/question/' + str(answer)
image = Zhihu()
image.getImage(url)