nvidia-cosmos
diff --git a/‎…s-reason1/cosmos-reason1-7b-tp4-sft.toml‎ ‎…reason1/cosmos-reason1-7b-fsdp2-sft.toml‎configs/cosmos-reason1/cosmos-reason1-7b-tp4-sft.toml renamed to configs/cosmos-reason1/cosmos-reason1-7b-fsdp2-sft.toml
Lines changed: 6 additions & 5 deletions b/‎…s-reason1/cosmos-reason1-7b-tp4-sft.toml‎ ‎…reason1/cosmos-reason1-7b-fsdp2-sft.toml‎configs/cosmos-reason1/cosmos-reason1-7b-tp4-sft.toml renamed to configs/cosmos-reason1/cosmos-reason1-7b-fsdp2-sft.toml
Lines changed: 6 additions & 5 deletions
diff --git a/‎…son1/cosmos-reason1-7b-tp2-fsdp-sft.toml‎ ‎…reason1/cosmos-reason1-7b-fsdp4-sft.toml‎configs/cosmos-reason1/cosmos-reason1-7b-tp2-fsdp-sft.toml renamed to configs/cosmos-reason1/cosmos-reason1-7b-fsdp4-sft.toml
Lines changed: 6 additions & 5 deletions b/‎…son1/cosmos-reason1-7b-tp2-fsdp-sft.toml‎ ‎…reason1/cosmos-reason1-7b-fsdp4-sft.toml‎configs/cosmos-reason1/cosmos-reason1-7b-tp2-fsdp-sft.toml renamed to configs/cosmos-reason1/cosmos-reason1-7b-fsdp4-sft.toml
Lines changed: 6 additions & 5 deletions
diff --git a/‎…1/cosmos-reason1-7b-tp2-sft-profile.toml‎ ‎…reason1/cosmos-reason1-7b-fsdp8-sft.toml‎configs/cosmos-reason1/cosmos-reason1-7b-tp2-sft-profile.toml renamed to configs/cosmos-reason1/cosmos-reason1-7b-fsdp8-sft.toml
Lines changed: 8 additions & 19 deletions b/‎…1/cosmos-reason1-7b-tp2-sft-profile.toml‎ ‎…reason1/cosmos-reason1-7b-fsdp8-sft.toml‎configs/cosmos-reason1/cosmos-reason1-7b-tp2-sft-profile.toml renamed to configs/cosmos-reason1/cosmos-reason1-7b-fsdp8-sft.toml
Lines changed: 8 additions & 19 deletions
diff --git a/‎configs/cosmos-reason1/cosmos-reason1-7b-p-fsdp1-tp2-r-tp1-pp2-grpo.toml‎
Lines changed: 0 additions & 84 deletions b/‎configs/cosmos-reason1/cosmos-reason1-7b-p-fsdp1-tp2-r-tp1-pp2-grpo.toml‎
Lines changed: 0 additions & 84 deletions
diff --git a/‎configs/cosmos-reason1/cosmos-reason1-7b-p-fsdp1-tp2-r-tp2-pp1-grpo-fp8.toml‎
Lines changed: 0 additions & 90 deletions b/‎configs/cosmos-reason1/cosmos-reason1-7b-p-fsdp1-tp2-r-tp2-pp1-grpo-fp8.toml‎
Lines changed: 0 additions & 90 deletions
diff --git a/‎configs/cosmos-reason1/cosmos-reason1-7b-p-fsdp1-tp2-r-tp2-pp1-grpo.toml‎
Lines changed: 3 additions & 10 deletions b/‎configs/cosmos-reason1/cosmos-reason1-7b-p-fsdp1-tp2-r-tp2-pp1-grpo.toml‎
Lines changed: 3 additions & 10 deletions
@@ -3,10 +3,10 @@ redis = "12800"
 [train]
 resume = false
 epoch = 1
-output_dir = "./outputs/cosmos-reason1-7b-tp4-sft"
+output_dir = "./outputs/cosmos-reason1-7b-fsdp2-sft"
 epsilon = 1e-6
 optm_name = "AdamW"
-optm_lr = 1e-6
+optm_lr = 2e-6
 optm_impl = "fused"
 optm_weight_decay = 0.01
 optm_betas = [ 0.9, 0.999,]
@@ -18,7 +18,7 @@ param_dtype = "bfloat16"
 fsdp_reduce_dtype = "float32"
 fsdp_offload = false
 fsdp_reshard_after_forward = "default"
-train_batch_per_replica = 4
+train_batch_per_replica = 32
 sync_weight_interval = 1
 enable_validation = true
 validation_step = 30
@@ -44,6 +44,7 @@ enable_dataset_cache = false
 dataloader_num_workers = 4
 dataloader_prefetch_factor = 4
 conversation_column_name = "conversations"
+mini_batch = 4
 
 [train.ckpt]
 enable_checkpoint = true
@@ -52,9 +53,9 @@ save_mode = "async"
 
 [policy.parallelism]
 n_init_replicas = 1
-tp_size = 4
+tp_size = 1
 cp_size = 1
-dp_shard_size = -1
+dp_shard_size = 2
 pp_size = 1
 dp_replicate_size = 1
 cp_rotate_method = "allgather"
@@ -3,10 +3,10 @@ redis = "12800"
 [train]
 resume = false
 epoch = 1
-output_dir = "./outputs/cosmos-reason1-7b-tp2-dpx-sft"
+output_dir = "./outputs/cosmos-reason1-7b-fsdp4-sft"
 epsilon = 1e-6
 optm_name = "AdamW"
-optm_lr = 1e-6
+optm_lr = 2e-6
 optm_impl = "fused"
 optm_weight_decay = 0.01
 optm_betas = [ 0.9, 0.999,]
@@ -18,7 +18,7 @@ param_dtype = "bfloat16"
 fsdp_reduce_dtype = "float32"
 fsdp_offload = false
 fsdp_reshard_after_forward = "default"
-train_batch_per_replica = 4
+train_batch_per_replica = 32
 sync_weight_interval = 1
 enable_validation = true
 validation_step = 30
@@ -44,6 +44,7 @@ enable_dataset_cache = false
 dataloader_num_workers = 4
 dataloader_prefetch_factor = 4
 conversation_column_name = "conversations"
+mini_batch = 4
 
 [train.ckpt]
 enable_checkpoint = true
@@ -52,9 +53,9 @@ save_mode = "async"
 
 [policy.parallelism]
 n_init_replicas = 1
-tp_size = 2
+tp_size = 1
 cp_size = 1
-dp_shard_size = -1
+dp_shard_size = 4
 pp_size = 1
 dp_replicate_size = 1
 cp_rotate_method = "allgather"
@@ -3,10 +3,10 @@ redis = "12800"
 [train]
 resume = false
 epoch = 1
-output_dir = "./outputs/cosmos-reason1-7b-tp2-sft-profile"
+output_dir = "./outputs/cosmos-reason1-7b-fsdp8-sft"
 epsilon = 1e-6
 optm_name = "AdamW"
-optm_lr = 1e-6
+optm_lr = 2e-6
 optm_impl = "fused"
 optm_weight_decay = 0.01
 optm_betas = [ 0.9, 0.999,]
@@ -18,10 +18,10 @@ param_dtype = "bfloat16"
 fsdp_reduce_dtype = "float32"
 fsdp_offload = false
 fsdp_reshard_after_forward = "default"
-train_batch_per_replica = 4
+train_batch_per_replica = 32
 sync_weight_interval = 1
 enable_validation = true
-validation_step = 100
+validation_step = 30
 validation_batch_per_replica = 2
 
 [policy]
@@ -34,17 +34,6 @@ logger = ['console', 'wandb']
 project_name = "cosmos_reason1"
 experiment_name = "cosmos-reason1-sft"
 
-[profiler]
-enable_profiler = true
-
-[profiler.sub_profiler_config]
-active_steps = 2
-rank_filter = [0]
-record_shape = true
-profile_memory = false
-with_stack = false
-with_modules = false
-
 [train.train_policy]
 type = "sft"
 dataset.name = "nvidia/Cosmos-Reason1-SFT-Dataset"
@@ -55,18 +44,18 @@ enable_dataset_cache = false
 dataloader_num_workers = 4
 dataloader_prefetch_factor = 4
 conversation_column_name = "conversations"
+mini_batch = 4
 
 [train.ckpt]
 enable_checkpoint = true
-save_freq = 100
-max_keep = 2
+save_freq = 30
 save_mode = "async"
 
 [policy.parallelism]
 n_init_replicas = 1
-tp_size = 2
+tp_size = 1
 cp_size = 1
-dp_shard_size = 1
+dp_shard_size = 8
 pp_size = 1
 dp_replicate_size = 1
 cp_rotate_method = "allgather"
@@ -2,7 +2,7 @@ redis = "12800"
 
 [train]
 resume = false
-epoch = 1 
+epoch = 80
 output_dir = "./outputs/cosmos-reason1-7b-p-fsdp1-tp2-r-tp2-pp1-grpo"
 epsilon = 1e-6
 optm_name = "AdamW"
@@ -18,7 +18,7 @@ param_dtype = "bfloat16"
 fsdp_reduce_dtype = "float32"
 fsdp_offload = false
 fsdp_reshard_after_forward = "default"
-train_batch_per_replica = 8
+train_batch_per_replica = 128
 sync_weight_interval = 1
 
 [rollout]
@@ -29,13 +29,6 @@ n_generation = 8
 batch_size = 4
 quantization = "none"
 
-
-[rollout.sampling_config]
-temperature = 0.6
-top_p = 0.95
-top_k = 50
-repetition_penalty = 1.05
-
 [policy]
 model_name_or_path = "nvidia/Cosmos-Reason1-7B"
 model_max_length = 10240
@@ -62,7 +55,7 @@ epsilon_high = 0.2
 kl_beta = 0.0
 mu_iterations = 1
 min_filter_prefix_tokens = 1
-mini_batch = 1
+mini_batch = 4
 
 [train.ckpt]
 enable_checkpoint = true