Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

添加socks4/socks5支持,增加了几个源(包括国内外的) #149

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
修改一个编码bug,以及添加socks5支持
OoWoodOne committed Jul 9, 2018
commit 5e3a4c951945f4c3a56851b0619ebda68d1a3495
12 changes: 6 additions & 6 deletions config.py
Original file line number Diff line number Diff line change
@@ -24,6 +24,12 @@
'pattern': ".//*[@id='footer']/div/table/tr[position()>1]",
'position': {'ip': './td[1]', 'port': './td[2]', 'type': './td[4]', 'protocol': ''}
},
{
'urls': ['http://www.proxylists.net/proxylists.xml'],
'type': 'xpath',
'pattern': ".//proxy[position()>1]",
'position': {'ip': './ip', 'port': './port', 'type': '', 'protocol': ''}
},
{
'urls': ['http://cn-proxy.com/', 'http://cn-proxy.com/archives/218'],
'type': 'xpath',
@@ -95,12 +101,6 @@
'pattern': r'<tr><td>(\d+\.\d+\.\d+\.\d+)<SCRIPT type=text/javascript>document.write\(\"\:\"(.+)\)</SCRIPT></td><td>(HTTP|SOCKS4)\s*',
'position': {'ip': 0, 'port': 1, 'type': -1, 'protocol': 2}
},
{
'urls': ['http://www.proxylists.net/proxylists.xml'],
'type': 'xpath',
'pattern': ".//proxy[position()>1]",
'position': {'ip': './ip', 'port': './port', 'type': '', 'protocol': ''}
},
{
'urls': ['https://www.xroxy.com/proxyrss.xml'],
'type': 'xpath',
2 changes: 1 addition & 1 deletion db/SqlHelper.py
Original file line number Diff line number Diff line change
@@ -118,7 +118,7 @@ def select(self, count=None, conditions=None):
else:
conditions = []

query = self.session.query(Proxy.ip, Proxy.port, Proxy.score)
query = self.session.query(Proxy.ip, Proxy.port, Proxy.protocol, Proxy.score)
if len(conditions) > 0 and count:
for condition in conditions:
query = query.filter(condition)
5 changes: 4 additions & 1 deletion spider/HtmlPraser.py
Original file line number Diff line number Diff line change
@@ -49,7 +49,10 @@ def XpathPraser(self, response, parser):
:return:
'''
proxylist = []
root = etree.HTML(response)
try:
root = etree.HTML(response)
except:
root = etree.HTML(bytes(bytearray(response, encoding='utf-8')))
proxys = root.xpath(parser['pattern'])
for proxy in proxys:
try:
19 changes: 13 additions & 6 deletions validator/Validator.py
Original file line number Diff line number Diff line change
@@ -88,8 +88,7 @@ def detect_proxy(selfip, proxy, queue2=None):
'''
ip = proxy['ip']
port = proxy['port']
proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)}
protocol, types, speed = getattr(sys.modules[__name__],config.CHECK_PROXY['function'])(selfip, proxies)#checkProxy(selfip, proxies)
protocol, types, speed = getattr(sys.modules[__name__],config.CHECK_PROXY['function'])(selfip, ip, port)#checkProxy(selfip, proxies)
if protocol >= 0:
proxy['protocol'] = protocol
proxy['types'] = types
@@ -101,7 +100,7 @@ def detect_proxy(selfip, proxy, queue2=None):
return proxy


def checkProxy(selfip, proxies):
def checkProxy(selfip, ip, port):
'''
用来检测代理的类型,突然发现,免费网站写的信息不靠谱,还是要自己检测代理的类型
:param
@@ -110,6 +109,7 @@ def checkProxy(selfip, proxies):
protocol = -1
types = -1
speed = -1
proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)}
http, http_types, http_speed = _checkHttpProxy(selfip, proxies)
https, https_types, https_speed = _checkHttpProxy(selfip, proxies, False)
if http and https:
@@ -125,9 +125,16 @@ def checkProxy(selfip, proxies):
protocol = 1
speed = https_speed
else:
types = -1
protocol = -1
speed = -1
proxies = {"http": "socks5://%s:%s" % (ip, port), "https": "socks5://%s:%s" % (ip, port)}
socks5, socks5_types, socks5_speed = _checkHttpProxy(selfip, proxies)
if socks5:
types = socks5_types
protocol = 3
speed = socks5_speed
else:
types = -1
protocol = -1
speed = -1
return protocol, types, speed