-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathsparks_tracks.py
100 lines (74 loc) · 2.86 KB
/
sparks_tracks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# this still works so im not touching it
# it still works dont touch it
import logging
import requests
import os
from datetime import datetime
from bot import constants
# Constants
REPO_OWNER = "FNLookup"
REPO_NAME = "data"
FILE_PATH = "festival/spark-tracks.json"
API_URL = f"https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}"
COMMITS_URL = f"{API_URL}/commits"
DOWNLOAD_DIR = "./json"
GITHUB_TOKEN = constants.GITHUB_PAT
def get_commit_history():
headers = {}
if GITHUB_TOKEN:
headers["Authorization"] = f"token {GITHUB_TOKEN}"
params = {"path": FILE_PATH, "per_page": 100}
all_commits = []
page = 1
while True:
params["page"] = page
url = f'{COMMITS_URL}?' + '&'.join([f'{k}={v}' for k, v in params.items()])
logging.debug(f'[GET] {url}')
response = requests.get(url, headers=headers)
# print(response.headers)
response.raise_for_status()
commits = response.json()
if not commits:
break
all_commits.extend(commits)
page += 1
return all_commits
def format_commit_timestamp(commit_timestamp):
try:
formatted_timestamp = datetime.strptime(commit_timestamp, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%dT%H.%M.%S.%f")[:-3]
except ValueError:
formatted_timestamp = datetime.strptime(commit_timestamp, "%Y-%m-%dT%H:%M:%S%z").strftime("%Y-%m-%dT%H.%M.%S")
return formatted_timestamp
def download_file_at_commit(commit_sha, commit_timestamp):
headers = {}
if GITHUB_TOKEN:
headers["Authorization"] = f"token {GITHUB_TOKEN}"
raw_url = f"https://raw.githubusercontent.com/{REPO_OWNER}/{REPO_NAME}/{commit_sha}/{FILE_PATH}"
logging.debug(f'[GET] {raw_url}')
response = requests.get(raw_url, headers=headers)
response.raise_for_status()
if not os.path.exists(DOWNLOAD_DIR):
os.makedirs(DOWNLOAD_DIR)
formatted_timestamp = format_commit_timestamp(commit_timestamp)
file_name = f"spark-tracks_{formatted_timestamp}.json"
file_path = os.path.join(DOWNLOAD_DIR, file_name)
# Save the file
with open(file_path, 'w', encoding='utf-8') as f:
f.write(response.text)
logging.debug(f"Downloaded: {file_name}")
def already_downloaded(commit_timestamp):
formatted_timestamp = format_commit_timestamp(commit_timestamp)
file_name = f"spark-tracks_{formatted_timestamp}.json"
file_path = os.path.join(DOWNLOAD_DIR, file_name)
# Check if this file already exists
return os.path.exists(file_path)
def main():
commit_history = get_commit_history()
for commit in commit_history:
commit_sha = commit["sha"]
commit_timestamp = commit["commit"]["committer"]["date"]
if already_downloaded(commit_timestamp):
continue
download_file_at_commit(commit_sha, commit_timestamp)
if __name__ == "__main__":
main()