diff --git a/modules/csv_downloader.py b/modules/csv_downloader.py index 7b2c29d..e04e4cb 100644 --- a/modules/csv_downloader.py +++ b/modules/csv_downloader.py @@ -18,6 +18,9 @@ def TTV(csv_dir, name_file, args_y): ''' CSV = os.path.join(csv_dir, name_file) error_csv(name_file, csv_dir, args_y) + + # Let the user know what's going on. Some of these files have over a GB and it takes a while to load them. + print("Loading CSV file...") df_val = pd.read_csv(CSV) return df_val @@ -37,7 +40,7 @@ def error_csv(file, csv_dir, args_y): else: ans = input(bc.OKBLUE + "Do you want to download the missing file? [Y/n] " + bc.ENDC) - if ans.lower() == 'y': + if ans.lower() == 'y' or ans == '': folder = str(os.path.basename(file)).split('-')[0] if folder != 'class': FILE_URL = str(OID_URL + folder + '/' + file) diff --git a/modules/downloader.py b/modules/downloader.py index c09f93c..f3f44d2 100644 --- a/modules/downloader.py +++ b/modules/downloader.py @@ -4,6 +4,7 @@ from modules.utils import images_options from modules.utils import bcolors as bc from multiprocessing.dummy import Pool as ThreadPool +import subprocess def download(args, df_val, folder, dataset_dir, class_name, class_code, class_list=None, threads = 20): ''' @@ -77,7 +78,7 @@ def download_img(folder, dataset_dir, class_name, images_list, threads): command = 'aws s3 --no-sign-request --only-show-errors cp s3://open-images-dataset/' + path commands.append(command) - list(tqdm(pool.imap(os.system, commands), total = len(commands) )) + list(tqdm(pool.imap(lambda c: subprocess.call(c, shell=True), commands), total = len(commands) )) print(bc.INFO + 'Done!' + bc.ENDC) pool.close()