13
13
class Git2S3 :
14
14
def __init__ (self , ** kwargs ):
15
15
self .env = models .EnvConfig (** kwargs )
16
- self .logger = kwargs .get ("logger" , config .default_logger (self .env . log ))
16
+ self .logger = kwargs .get ("logger" , config .default_logger (self .env ))
17
17
self .repo = git .Repo ()
18
18
self .session = requests .Session ()
19
19
self .session .headers = {
20
- ' Accept' : ' application/vnd.github+json' ,
21
- ' Authorization' : f' Bearer { self .env .git_token } ' ,
22
- ' X-GitHub-Api-Version' : ' 2022-11-28' ,
23
- ' Content-Type' : ' application/x-www-form-urlencoded' ,
20
+ " Accept" : " application/vnd.github+json" ,
21
+ " Authorization" : f" Bearer { self .env .git_token } " ,
22
+ " X-GitHub-Api-Version" : " 2022-11-28" ,
23
+ " Content-Type" : " application/x-www-form-urlencoded" ,
24
24
}
25
25
26
26
def get_all_repos (self ) -> Generator [Dict [str , str ]]:
@@ -30,10 +30,9 @@ def get_all_repos(self) -> Generator[Dict[str, str]]:
30
30
Generator[Dict[str, str]]:
31
31
Yields a dictionary of each repo's information.
32
32
"""
33
- # todo: Add debug level logging for each API call, along with number of repos fetched and in break statements
34
- # Add .pre-commit config and pyproject.toml
35
33
idx = 1
36
34
while True :
35
+ self .logger .debug ("Fetching repos from page %d" , idx )
37
36
try :
38
37
response = self .session .get (
39
38
url = f"{ self .env .git_api_url } orgs/{ self .env .git_owner } /repos?per_page=1&page={ idx } "
@@ -42,9 +41,13 @@ def get_all_repos(self) -> Generator[Dict[str, str]]:
42
41
break
43
42
json_response = response .json ()
44
43
if json_response :
44
+ self .logger .debug (
45
+ "Repositories in page %d: %d" , idx , len (json_response )
46
+ )
45
47
yield from json_response
46
48
idx += 1
47
49
else :
50
+ self .logger .debug ("No repos found in page: %d, ending loop." , idx )
48
51
break
49
52
50
53
def worker (self , repo : Dict [str , str ]):
@@ -53,15 +56,15 @@ def worker(self, repo: Dict[str, str]):
53
56
Args:
54
57
repo: Repository information as JSON payload.
55
58
"""
56
- self .logger .info ("Cloning %s" , repo .get (' name' ))
57
- repo_dest = os .path .join (self .env .clone_dir , repo .get (' name' ))
59
+ self .logger .info ("Cloning %s" , repo .get (" name" ))
60
+ repo_dest = os .path .join (self .env .clone_dir , repo .get (" name" ))
58
61
if not os .path .isdir (repo_dest ):
59
62
os .makedirs (repo_dest )
60
63
try :
61
- self .repo .clone_from (repo .get (' clone_url' ), str (repo_dest ))
64
+ self .repo .clone_from (repo .get (" clone_url" ), str (repo_dest ))
62
65
except GitCommandError as error :
63
66
msg = error .stderr or error .stdout or ""
64
- msg = msg .strip ().replace (' \n ' , '' ).replace ("'" , "" ).replace ('"' , '' )
67
+ msg = msg .strip ().replace (" \n " , "" ).replace ("'" , "" ).replace ('"' , "" )
65
68
self .logger .error (msg )
66
69
# Raise an exception to indicate that the thread failed
67
70
raise Exception (msg )
@@ -76,15 +79,15 @@ def cloner(self):
76
79
with ThreadPoolExecutor (max_workers = os .cpu_count ()) as executor :
77
80
for repo in self .get_all_repos ():
78
81
future = executor .submit (self .worker , repo )
79
- futures [future ] = repo .get (' name' )
82
+ futures [future ] = repo .get (" name" )
80
83
for future in as_completed (futures ):
81
84
if future .exception ():
82
85
self .logger .error (
83
86
"Thread processing for '%s' received an exception: %s" ,
84
87
futures [future ],
85
- future .exception ()
88
+ future .exception (),
86
89
)
87
90
88
91
89
- if __name__ == ' __main__' :
92
+ if __name__ == " __main__" :
90
93
Git2S3 ().cloner ()
0 commit comments