-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathranking_detect_jpn.py
69 lines (61 loc) · 2.42 KB
/
ranking_detect_jpn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import requests
from bs4 import BeautifulSoup
from pathlib import Path
import datetime
import os
# TODO 新公式、もしかして自動化できるかも?
# GitHubではtimezoneがUTC指定。
# now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9)))
now = datetime.datetime.now()
today_str = now.strftime("%Y%m%d")
const_dir = 'out/jpn_detective/'
Path(const_dir).mkdir(parents=True, exist_ok=True)
# 6日分はさかのぼれそう。毎日実行しても動くようにはなっている
for dr in range(0,6):
today_str = str((now + datetime.timedelta(days=-dr)).strftime("%Y%m%d"))
print(today_str)
for sn in range(1,4):
url = 'https://www.redstoneonline.jp/rank?t=2&s=WORLD0'+str(sn)+'&c=&d='
print(url)
response = requests.get(url)
response.raise_for_status()
html_ranking = response.text
print(html_ranking)
mat = [] # 保存先の行列
soup = BeautifulSoup(html_ranking, 'html.parser')
# FIXME 構造みて判断する
table = soup.find('table')
# table = soup.find('table', {'data-v-d31fa134': ''})
print(table)
# theadの解析
r = []
thead = table.find('thead')
ths = thead.tr.find_all('th')
for th in ths:
r.append(th.text)
mat.append(r)
# tbodyの解析
tbody = table.find('tbody')
trs = tbody.find_all('tr')
for tr in trs:
r = []
for td in tr.find_all('td'):
r.append(td.text)
mat.append(r)
# 出力
# for r in mat:
# print(','.join(r)) # カンマ(,)で列を結合して表示
t_server = str(today_str) + '_' + str(sn)
with open(const_dir + t_server + ".csv", "w", encoding='utf-8') as f:
for r in mat:
f.write(','.join(r))
f.write('\n')
# ファイル量多くなるとGitHubから怒られるので1年以上前のファイルは削除
# gitのcommitlog辿れば過去のデータ手に入るしいいよね理論
last_year_str = str((now + datetime.timedelta(days=-367)).strftime("%Y%m%d"))
tl_server = str(last_year_str) + '_' + str(sn)
last_year_file = const_dir + tl_server + ".csv"
print("delete:" + str(last_year_file))
if os.path.exists(last_year_file):
os.remove(last_year_file)