Skip to content

Commit

Permalink
Add --force-stop-iter option (#16)
Browse files Browse the repository at this point in the history
* add --force-stop-iter option

* Update megatron/training/training.py

Co-authored-by: Kouta Nakayama <[email protected]>

---------

Co-authored-by: Kouta Nakayama <[email protected]>
  • Loading branch information
odashi and k141303 authored Sep 5, 2024
1 parent 39135ac commit a4a0e22
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
2 changes: 2 additions & 0 deletions megatron/training/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -1688,5 +1688,7 @@ def _add_experimental_args(parser):
'`transformer_block.py`, or `transformer_layer.py`')
group.add_argument('--yaml-cfg', type=str, default=None,
help = 'Config file to add additional arguments')
group.add_argument('--force-stop-iter', type=int, default=None,
help="Stop training process at this iteration regardless of any other configs.")

return parser
2 changes: 1 addition & 1 deletion megatron/training/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ def track_e2e_metrics():
'validation_iterations_time_msecs_avg': validation_iterations_time_msecs_avg
})

while iteration < args.train_iters:
while iteration < args.train_iters and (args.force_stop_iter is None or iteration < args.force_stop_iter):
if (
# train_data_iterator is not None
args.skip_train_iteration_range is not None
Expand Down

0 comments on commit a4a0e22

Please sign in to comment.