-
Notifications
You must be signed in to change notification settings - Fork 26
/
getmd.py
53 lines (45 loc) · 1.48 KB
/
getmd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import requests, html2text, re
# 请求头,模拟浏览器UA
headers = {'User-Agent': ' '.join(['Mozilla/5.0 (Windows NT 10.0; Win64; x64; ServiceUI 14)',
'AppleWebKit/537.36 (KHTML, like Gecko)', 'Chrome/70.0.3538.102', 'Safari/537.36','Edge/18.18363']) }
def url_to_markdown(url):
# 发送请求
r = requests.get(url=url, headers=headers)
# Python requests乱码的五种解决办法 https://blog.csdn.net/lilongsy/article/details/122140098
r.encoding = r.apparent_encoding
# html 转换 markdown
html = r.text
text = html2text.html2text(html)
return text
def pull_urls(urls_list):
texts =''
for url in urls_list:
texts += url_to_markdown(url)
return texts
# 正则搜索得到网址URL
def get_url(line):
reg_https = r'(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]'
url = re.search(reg_https, line)
return url
# 检查获得多行URL
def check_urls(urls):
ret = []
lines = urls.split('\n')
for line in lines:
url = get_url(line)
if url is not None:
ret.append(url[0])
ret = list(set(ret))
return ret
def urls_lines(urls_list):
str = '\n'.join(urls_list)
return str
import base64 , hashlib, time
# 构建 PASSKEY
def make_passkey(str=''):
s = time.strftime("%Y%m%d-%H", time.localtime()) + str
b = s.encode("utf-8")
m = hashlib.sha256()
m.update(b)
passkey = base64.b64encode(m.digest()).decode("utf-8")[8:16]
return passkey