3
3
import os
4
4
from concurrent .futures import ThreadPoolExecutor
5
5
from os .path import join as pjoin
6
+ import requests
6
7
from itertools import repeat
8
+
9
+
7
10
if 'NEONWRANGLER_HOME' in os .environ :
8
11
fury_home = os .environ ['NEONWRANGLER_HOME' ]
9
12
else :
@@ -32,7 +35,7 @@ async def _request(session, url):
32
35
return await response .json ()
33
36
34
37
35
- async def _download (session , url , filename , sem , size = None ):
38
+ async def _download (session , url , filename , sem ,month , size = None ):
36
39
"""An asynchronous function to download file from url.
37
40
38
41
Parameters
@@ -46,8 +49,8 @@ async def _download(session, url, filename, sem, size=None):
46
49
size : int, optional
47
50
Length of the content in bytes
48
51
"""
52
+ # print(month)
49
53
if not os .path .exists (filename ):
50
- print (f'Downloading: { filename } ' )
51
54
async with sem :
52
55
async with session .get (url ) as response :
53
56
size = response .content_length if not size else size
@@ -61,37 +64,55 @@ async def _download(session, url, filename, sem, size=None):
61
64
# update_progressbar(progress, size)
62
65
63
66
64
- async def _fetcher (batch , rate_limit , headers ):
67
+ async def _fetcher (data , rate_limit , headers , files_to_stack_path = "filesToStack" ):
65
68
"""Fetcher for downloading files."""
66
69
sem = asyncio .Semaphore (rate_limit )
70
+ data = data ['data' ]
67
71
dir_name = '.' .join ([
68
- 'NEON' , batch ['productCode' ], batch ['siteCode' ], batch ['month' ], batch ['release' ]
72
+ 'NEON' , data ['productCode' ], data ['siteCode' ], data ['month' ], data ['release' ]
69
73
])
70
- d_urls = [file ['url' ] for file in batch ["files" ]]
71
- sizes = [file ['size' ] for file in batch ["files" ]]
72
- f_names = [file ['name' ] for file in batch ["files" ]]
73
- f_paths = [pjoin (dir_name , name ) for name in f_names ]
74
+ print (f"{ data ['siteCode' ]} " + "-" + f"{ data ['month' ]} " )
75
+ zip_dir_path = os .path .join (files_to_stack_path , f'{ dir_name } ' )
76
+ os .mkdir (zip_dir_path )
77
+
78
+ d_urls = [f ['url' ] for f in data ["files" ]]
79
+ sizes = [f ['size' ] for f in data ["files" ]]
80
+ f_names = [f ['name' ] for f in data ["files" ]]
81
+ f_paths = [pjoin (zip_dir_path , name ) for name in f_names ]
82
+ month = [data ['month' ]]
74
83
zip_url = zip (d_urls , f_paths , sizes )
75
84
async with aiohttp .ClientSession () as session :
76
85
tasks = []
77
86
for url , name , sz in zip_url :
78
- task = asyncio .create_task (_download (session , url , name , sem , sz ))
87
+ task = asyncio .create_task (_download (session , url , name , sem , month , sz ))
79
88
tasks .append (task )
80
89
81
90
await asyncio .gather (* tasks )
82
91
83
92
84
- def fetcher (batch , rate_limit , headers ):
93
+ async def vst_fetcher (item , rate_limit , headers , files_to_stack_path = "filesToStack" ):
94
+ data = requests .get (item ).json ()
95
+ await _fetcher (data , rate_limit , headers , files_to_stack_path )
96
+
97
+
98
+ def fetcher (batch , data_type , rate_limit , headers , files_to_stack_path ):
85
99
try :
86
- asyncio .run (_fetcher (batch , rate_limit , headers ))
100
+ if data_type == 'vst' :
101
+ asyncio .run (vst_fetcher (batch , rate_limit , headers , files_to_stack_path ))
102
+ elif data_type == 'aop' :
103
+ asyncio .run (_fetcher (batch , rate_limit , headers , files_to_stack_path ))
104
+
87
105
except Exception as e :
88
106
print (f"Error processing URLs: { e } " )
89
107
90
108
91
- def run_threaded_batches (batches , batch_size , rate_limit , headers = None ):
92
- max_thread = 2
93
- num_threads = (len (batches ) + batch_size - 1 ) // batch_size
94
- with ThreadPoolExecutor (max_workers = max_thread ) as executor :
109
+ def run_threaded_batches (batches , data_type , rate_limit , headers = None , savepath = '/filesToStack' ):
110
+ num_cores = os .cpu_count () # Get the number of CPU cores
111
+ num_threads = min (num_cores , len (batches )) # Limit threads to CPU cores or the number of batches, whichever is smaller
112
+
113
+ with ThreadPoolExecutor (max_workers = num_threads ) as executor :
95
114
for i in range (num_threads ):
96
- batch = batches [i * batch_size :min ((i + 1 ) * batch_size , len (batches ))]
97
- executor .map (fetcher , batch , repeat (rate_limit ), repeat (headers ))
115
+ # Distribute the batches evenly among threads
116
+ batch = batches [i ::int (num_threads )]
117
+ # executor.submit(fetcher, batch, rate_limit, headers)
118
+ executor .map (fetcher , batch , repeat (data_type ), repeat (rate_limit ), repeat (headers ), repeat (savepath ))
0 commit comments