-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathwenku8up.py
74 lines (66 loc) · 2.54 KB
/
wenku8up.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import requests as req
import datetime
import re
import pinyin
def is_number(s):
try:
float(s)
return True
except ValueError:
pass
try:
import unicodedata
unicodedata.numeric(s)
return True
except (TypeError, ValueError):
pass
return False
def shou1(name):
return "".join([i[0] for i in pinyin.get(name, " ").split(" ")])
def shou2(name):
if is_number(shou1(name))==True:
return 0
elif 65<=ord(shou1(name))&ord(shou1(name))<=90:
return shou1(name).lower()
elif 97<=ord(shou1(name))&ord(shou1(name))<=122:
return shou1(name)
else:
return 0
if datetime.datetime.now().day <10:
todaytime = str(datetime.datetime.now().year) +'-'+str(datetime.datetime.now().month)+'-'+'0'+str(datetime.datetime.now().day)
else:
todaytime = str(datetime.datetime.now().year) +'-'+str(datetime.datetime.now().month)+'-'+str(datetime.datetime.now().day)
username = 'paranoiam'
userpasswd = 'qwe905148'
logindata = {'username':username,'password':userpasswd,'usercookie':0,'action':'login','submit':'%26%23160%3B%B5%C7%26%23160%3B%26%23160%3B%C2%BC%26%23160%3B'}
login = req.post('https://www.wenku8.net/login.php',data=logindata)
login.encoding='gbk'
logintxt = login.text
if logintxt.find('登录成功')!= -1:
print('登陆成功!即将开始爬取今日更新!')
toplist = req.get('https://www.wenku8.net/modules/article/toplist.php',cookies=login.cookies)
toplist.encoding='gbk'
toplistxt=toplist.text
if toplistxt.find(todaytime) != -1:
print('今日有'+str(toplistxt.count(todaytime))+'个更新!')
m = re.findall(r'style="font-size:13px(.*)>',toplistxt)
for i in m[:toplistxt.count(todaytime)]:
link = re.findall(r'book/(.*).htm',i)
downlink=r'http://dl.wenku8.com/down.php?type=txt&id='+str(link[0])
namelink=r'https://www.wenku8.net/book/'+str(link[0])+r'.htm'
downtext = req.get(namelink,cookies=login.cookies)
downtext.encoding='gbk'
downtxt = downtext.text
n = re.findall(r'content="(.*)小说,',downtxt)
name = n[0]
print(name)
for a in name[:1]:
shouzm = shou2(a)
d = req.get(downlink,cookies=login.cookies)
with open('./'+str(shouzm)+'/'+name+r'.txt', 'wb') as f:
f.write(d.content)
print('全部更新下载完成!')
else:
print('未检测到更新!')
else:
print('登陆失败,请检查账号密码!')