forked from gaotianliuyun/gao
-
Notifications
You must be signed in to change notification settings - Fork 8
/
getsearchtxt.py
118 lines (111 loc) · 3.65 KB
/
getsearchtxt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
import sys
import re
import json
import requests
import time
import traceback
import gzip
p=re.compile(r'.*/s/(.*)')
skipp = re.compile(r'.*(cover|screen|频道).*',re.IGNORECASE)
reqcount=1
sharedict=set()
def getlist(w,shareid, fileid,morepage):
global p
global skipp
global reqcount
global sharedict
reqcount += 1
if reqcount % 5 == 0:
print(f"reqcount:{reqcount} shareid:{shareid} fileid:{fileid}",file=sys.stderr)
#time.sleep(1)
url = f'http://192.168.101.188:9978/proxy?do=pikpak&type=list&share_id={shareid}&file_id={fileid}&pass_code=&morepage={morepage}'
print(f"url: {url}",file=sys.stderr)
resp = requests.get(url)
content = resp.content.decode('utf-8')
lines = content.split("\n")
if "folder" not in content and len(lines)<=4:
return
isfirst=True
for line in lines:
if isfirst:
isfirst=False
print(f"first line:{line}",file=sys.stderr)
if skipp.match(line):
continue
linearr = line.split('\t')
if len(linearr)>2:
m = p.match(linearr[0])
if m:
arr = m.group(1).split("/")
else:
arr = linearr[0].split("/")
shareid=arr[0]
fileid=arr[1] if len(arr)>1 else ""
if shareid+"/"+fileid in sharedict:
print(f"skip shareid{shareid} fileid:{fileid}", file=sys.stderr)
continue
w.write(line+"\n")
w.flush()
if linearr[2] == "folder":
getlist(w,shareid,fileid,False)
if len(lines)>0:
getlist(w,shareid,fileid,True)
def main():
try:
f = gzip.open(sys.argv[1]+".raw.gz",mode="rt",encoding="utf-8")
if f is not None:
print(f"found gz raw file:{sys.argv[1]}.raw.gz, extract it",file=sys.stderr)
with(open(sys.argv[1]+".raw","w",encoding="utf-8")) as w:
while(True):
lines = f.readlines()
if len(lines)<=0:
break
for line in lines:
line=line.strip()
w.write(line+"\n")
f.seek(0)
except:
traceback.print_exc()
try:
f = open(sys.argv[1]+".raw","r",encoding="utf-8")
except:
f = None
if f is not None:
print("found old raw file")
while True:
lines = f.readlines()
if len(lines)<=0:
break
for line in lines:
linearr = line.split("\t")
m = p.match(linearr[0])
if m:
arr = m.group(1).split("/")
else:
arr = linearr[0].split("/")
if len(arr)>1:
shareid = arr[0]
fileid = arr[1]
sharedict.add(shareid+"/"+fileid)
f.close()
print(f"old raw file record:{len(sharedict)}")
else:
print("no old raw file")
with(open(sys.argv[1]+".raw","a+",encoding="utf-8")) as w:
with(open(sys.argv[1],"r",encoding="utf-8")) as f:
j = json.load(f)
for c in j:
shareid=c.get("type_id")
fileid=""
m = p.match(shareid)
if m:
arr = m.group(1).split("/")
else:
arr = shareid.split("/")
shareid=arr[0]
fileid=arr[1] if len(arr)>1 else ""
if shareid+"/"+fileid in sharedict:
continue
getlist(w,shareid,fileid,False)
main()