-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtestcases.py
42 lines (38 loc) · 1.16 KB
/
testcases.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import bz2
import json
from hashlib import md5
import redis
import sys
import time
'''add urls to the queue, four per second, from the alexa top 1M'''
infile = sys.argv[1] if len(sys.argv) == 2 else 'top-1m.csv.bz2'
testcases = [
# 'http://fre3ecreditreport.com'
# 'http://sphider.eu',
# 'http://qq.com',
# 'http://taobao.com',
# 'http://googleusercontent.com',
# 'http://mail.ru',
# 'http://tumblr.com',
'http://pinterest.com'
# 'http://google.com.br',
# 'http://fc2.com',
# 'http://google.ru',
# 'http://conduit.com',
# 'http://paypal.com',
# 'http://xvideos.com',
# 'http://amazon.co.jp',
# 'http://ask.com',
# 'http://babylon.com',
# 'http://craigslist.org'
# 'http://localhost:9999',
# 'http://ckanich.uicbits.net/jsredir-start',
# 'http://www.searchtools.com/test/redirect/meta-refresh-1.html',
# 'http://www.searchtools.com/test/redirect/meta-refresh-10.html'
]
r = redis.StrictRedis(host='localhost',port=6379,db=0)
r.delete('resque:crawlqueue')
for url in testcases:
insertme = json.dumps({'url':url})
res = r.lpush('resque:crawlqueue',insertme)
print "inserted",url