1
+ """fetcher is responsible for downloading data."""
1
2
import asyncio
2
3
import aiohttp
3
4
import os
6
7
import requests
7
8
from itertools import repeat
8
9
9
-
10
10
if 'NEONWRANGLER_HOME' in os .environ :
11
11
fury_home = os .environ ['NEONWRANGLER_HOME' ]
12
12
else :
13
13
fury_home = pjoin (os .path .expanduser ('~' ), '.neonwranglerpy' )
14
14
15
15
16
16
async def _request (session , url ):
17
- """An asynchronous function to get the request data as json.
17
+ """Asynchronous function to get the request data as json.
18
18
19
19
Parameters
20
20
----------
@@ -35,8 +35,8 @@ async def _request(session, url):
35
35
return await response .json ()
36
36
37
37
38
- async def _download (session , url , filename , sem ,month , size = None ):
39
- """An asynchronous function to download file from url.
38
+ async def _download (session , url , filename , sem , month , size = None ):
39
+ """Asynchronous function to download file from url.
40
40
41
41
Parameters
42
42
----------
@@ -46,6 +46,8 @@ async def _download(session, url, filename, sem,month, size=None):
46
46
The URL of the downloadable file
47
47
filename : string
48
48
Name of the downloaded file (e.g. BoxTextured.gltf)
49
+ sem: asyncio.Semaphore
50
+ It keeps tracks number of requests.
49
51
size : int, optional
50
52
Length of the content in bytes
51
53
"""
@@ -68,12 +70,11 @@ async def _fetcher(data, rate_limit, headers, files_to_stack_path="filesToStack"
68
70
"""Fetcher for downloading files."""
69
71
sem = asyncio .Semaphore (rate_limit )
70
72
data = data ['data' ]
71
- dir_name = '.' .join ([
72
- 'NEON' , data ['productCode' ], data ['siteCode' ], data ['month' ], data ['release' ]
73
- ])
74
- print (f"{ data ['siteCode' ]} " + "-" + f"{ data ['month' ]} " )
73
+ dir_name = '.' .join (
74
+ ['NEON' , data ['productCode' ], data ['siteCode' ], data ['month' ], data ['release' ]])
75
75
zip_dir_path = os .path .join (files_to_stack_path , f'{ dir_name } ' )
76
- os .mkdir (zip_dir_path )
76
+ if not os .path .isdir (zip_dir_path ):
77
+ os .mkdir (zip_dir_path )
77
78
78
79
d_urls = [f ['url' ] for f in data ["files" ]]
79
80
sizes = [f ['size' ] for f in data ["files" ]]
@@ -91,11 +92,13 @@ async def _fetcher(data, rate_limit, headers, files_to_stack_path="filesToStack"
91
92
92
93
93
94
async def vst_fetcher (item , rate_limit , headers , files_to_stack_path = "filesToStack" ):
95
+ """Vst fetcher gets the urls for the files of vst data."""
94
96
data = requests .get (item ).json ()
95
97
await _fetcher (data , rate_limit , headers , files_to_stack_path )
96
98
97
99
98
100
def fetcher (batch , data_type , rate_limit , headers , files_to_stack_path ):
101
+ """Fetcher calls the vst/aop fetcher according to use case."""
99
102
try :
100
103
if data_type == 'vst' :
101
104
asyncio .run (vst_fetcher (batch , rate_limit , headers , files_to_stack_path ))
@@ -106,13 +109,21 @@ def fetcher(batch, data_type, rate_limit, headers, files_to_stack_path):
106
109
print (f"Error processing URLs: { e } " )
107
110
108
111
109
- def run_threaded_batches (batches , data_type , rate_limit , headers = None , savepath = '/filesToStack' ):
112
+ def run_threaded_batches (batches ,
113
+ data_type ,
114
+ rate_limit ,
115
+ headers = None ,
116
+ savepath = '/filesToStack' ):
117
+ """Create batches and run the async fetchers."""
110
118
num_cores = os .cpu_count () # Get the number of CPU cores
111
- num_threads = min (num_cores , len (batches )) # Limit threads to CPU cores or the number of batches, whichever is smaller
119
+ num_threads = min (
120
+ num_cores , len (batches )
121
+ ) # Limit threads to CPU cores or the number of batches, whichever is smaller
112
122
113
123
with ThreadPoolExecutor (max_workers = num_threads ) as executor :
114
124
for i in range (num_threads ):
115
125
# Distribute the batches evenly among threads
116
126
batch = batches [i ::int (num_threads )]
117
127
# executor.submit(fetcher, batch, rate_limit, headers)
118
- executor .map (fetcher , batch , repeat (data_type ), repeat (rate_limit ), repeat (headers ), repeat (savepath ))
128
+ executor .map (fetcher , batch , repeat (data_type ), repeat (rate_limit ),
129
+ repeat (headers ), repeat (savepath ))
0 commit comments