-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
78 lines (65 loc) · 2.72 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python2
# -*- coding:utf-8 -*-
"""
Created on Fri Aug 15:21:00 2018
@author: jianghao
"""
import sys
import getopt
import os
from crawler import shanbay
reload(sys)
sys.setdefaultencoding("utf-8")
__version__ = "1.0.0"
def usage():
print u"Words Crawler: version "+str(__version__)
print u"Uasage :-h help \n"
print u"-o operation for job\n"
if __name__ == '__main__':
append_dir = os.path.dirname(os.path.realpath(__file__))
sys.path.append(append_dir)
opts, args = getopt.getopt(sys.argv[1:], "v:o:h")
operation = ''
for op, value in opts:
if op == '-v':
print u"Words Crawler: version "+str(__version__)
elif op == '-o':
operation = value
elif op == '-h':
usage()
sys.exit()
if operation == '':
print u"-o 参数是必须参数"
os._exit(1)
elif operation == 'shanbay':
#url = "https://www.shanbay.com/wordlist/34/63685/?page="
#url = "https://www.shanbay.com/wordlist/34/63688/?page=" --download
#url = "https://www.shanbay.com/wordlist/34/63691/?page="
#url = "https://www.shanbay.com/wordlist/34/63694/?page="
#url = "https://www.shanbay.com/wordlist/34/63697/?page="
#url = "https://www.shanbay.com/wordlist/34/63700/?page="
#url = "https://www.shanbay.com/wordlist/34/63703/?page="
#url = "https://www.shanbay.com/wordlist/34/63706/?page="
#url = "https://www.shanbay.com/wordlist/34/63709/?page="
#url = "https://www.shanbay.com/wordlist/34/63712/?page="
#url = "https://www.shanbay.com/wordlist/34/63715/?page="
#url = "https://www.shanbay.com/wordlist/34/63718/?page="
#url = "https://www.shanbay.com/wordlist/34/63721/?page="
#url = "https://www.shanbay.com/wordlist/34/63724/?page="
#url = "https://www.shanbay.com/wordlist/34/63727/?page="
#url = "https://www.shanbay.com/wordlist/34/63730/?page="
#url = "https://www.shanbay.com/wordlist/34/63733/?page="
#url = "https://www.shanbay.com/wordlist/34/63736/?page="
# url = "https://www.shanbay.com/wordlist/34/63739/?page="??
#url = "https://www.shanbay.com/wordlist/34/63742/?page="
#url = "https://www.shanbay.com/wordlist/34/63745/?page="
#url = "https://www.shanbay.com/wordlist/34/63748/?page="
#url = "https://www.shanbay.com/wordlist/34/63751/?page="
#url = "https://www.shanbay.com/wordlist/34/63754/?page="
#url = "https://www.shanbay.com/wordlist/34/63757/?page="
#url = "https://www.shanbay.com/wordlist/34/63760/?page="
#url = "https://www.shanbay.com/wordlist/34/63763/?page="
#url = "https://www.shanbay.com/wordlist/34/63766/?page="
url = "https://www.shanbay.com/wordlist/34/108313/?page="
_shanbay = shanbay.shanbay(url, 11)
_shanbay.run()