Skip to content

Commit

Permalink
refactor: generated_data as list
Browse files Browse the repository at this point in the history
Remove redundant conversions from None
to empty list.

Signed-off-by: Costa Shulyupin <[email protected]>
  • Loading branch information
makelinux committed Nov 18, 2024
1 parent d382166 commit 03bdf9e
Showing 1 changed file with 3 additions and 9 deletions.
12 changes: 3 additions & 9 deletions src/instructlab/sdg/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ def generate_data(
"Synthesizing new instructions. If you aren't satisfied with the generated instructions, interrupt training (Ctrl-C) and try adjusting your YAML files. Adding more examples may help."
)

generated_data = None
generated_data = []
empty_sdg_leaf_nodes = []
for leaf_node in leaf_nodes.values():
is_knowledge = False
Expand Down Expand Up @@ -444,11 +444,8 @@ def generate_data(
empty_sdg_leaf_nodes.append(leaf_node_path)
logger.warning("Empty dataset for qna node: %s", leaf_node_path)
continue
generated_data = (
[new_generated_data]
if generated_data is None
else generated_data + [new_generated_data]
)
generated_data.append(new_generated_data)

Check warning on line 447 in src/instructlab/sdg/generate_data.py

View workflow job for this annotation

GitHub Actions / pylint

W0101: Unreachable code (unreachable)

logger.info("Generated %d samples", len(generated_data))
logger.debug("Generated data: %s", generated_data)

Expand All @@ -469,9 +466,6 @@ def generate_data(
use_legacy_pretraining_format,
)

if generated_data is None:
generated_data = []

_gen_train_data(
generated_data,
os.path.join(output_dir, output_file_train),
Expand Down

0 comments on commit 03bdf9e

Please sign in to comment.