From 3fce0a5dbd9e29f0ec87a036201f9f6176f62725 Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Mon, 18 Nov 2024 06:50:21 +0200 Subject: [PATCH] refactor: generated_data as list Remove redundant conversions from None to empty list. Signed-off-by: Costa Shulyupin --- src/instructlab/sdg/generate_data.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/instructlab/sdg/generate_data.py b/src/instructlab/sdg/generate_data.py index d97cdc27..b1e60b92 100644 --- a/src/instructlab/sdg/generate_data.py +++ b/src/instructlab/sdg/generate_data.py @@ -409,7 +409,7 @@ def generate_data( "Synthesizing new instructions. If you aren't satisfied with the generated instructions, interrupt training (Ctrl-C) and try adjusting your YAML files. Adding more examples may help." ) - generated_data = None + generated_data = [] empty_sdg_leaf_nodes = [] for leaf_node in leaf_nodes.values(): is_knowledge = False @@ -444,11 +444,8 @@ def generate_data( empty_sdg_leaf_nodes.append(leaf_node_path) logger.warning("Empty dataset for qna node: %s", leaf_node_path) continue - generated_data = ( - [new_generated_data] - if generated_data is None - else generated_data + [new_generated_data] - ) + generated_data.append(new_generated_data) + logger.info("Generated %d samples", len(generated_data)) logger.debug("Generated data: %s", generated_data) @@ -469,9 +466,6 @@ def generate_data( use_legacy_pretraining_format, ) - if generated_data is None: - generated_data = [] - _gen_train_data( generated_data, os.path.join(output_dir, output_file_train),