Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove minibatch_size and minibatches_per_step #623

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ model:
maxq_learning: true
temperature: 1.0
double_q_learning: true
minibatches_per_step: 1
num_atoms: 21
qmin: 0
qmax: 40
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ model:
maxq_learning: true
temperature: 1.0
double_q_learning: true
minibatches_per_step: 1
optimizer:
Adam:
lr: 0.01
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ model:
maxq_learning: true
temperature: 1.0
double_q_learning: true
minibatches_per_step: 1
num_atoms: 11
optimizer:
AdamW:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ model:
maxq_learning: true
temperature: 1.0
double_q_learning: true
minibatches_per_step: 1
optimizer:
AdamW:
lr: 0.001
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ model:
maxq_learning: false
temperature: 0.35
double_q_learning: true
minibatches_per_step: 1
optimizer:
Adam:
lr: 0.05
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ model:
maxq_learning: true
temperature: 1.0
double_q_learning: true
minibatches_per_step: 1
optimizer:
Adam:
lr: 0.05
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ model:
temperature: 0.01
q_network_loss: mse
double_q_learning: true
minibatches_per_step: 1
optimizer:
Adam:
lr: 0.01
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ model:
maxq_learning: true
temperature: 10.0
double_q_learning: true
minibatches_per_step: 1
optimizer:
AdamW:
lr: 0.005
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ train_model:
maxq_learning: true
q_network_loss: mse
double_q_learning: true
minibatch_size: 1024
minibatches_per_step: 1
optimizer:
Adam:
lr: 0.001
Expand Down
9 changes: 1 addition & 8 deletions reagent/gym/tests/test_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def run_test_replay_buffer(
passing_score_bar: float,
num_eval_episodes: int,
use_gpu: bool,
minibatch_size: Optional[int] = None,
minibatch_size: int,
):
"""
Run an online learning test with a replay buffer. The replay buffer is pre-filled, then the training starts.
Expand All @@ -212,13 +212,6 @@ def run_test_replay_buffer(
)
training_policy = manager.create_policy(trainer, serving=False)

if not isinstance(trainer, pl.LightningModule):
if minibatch_size is None:
minibatch_size = trainer.minibatch_size
assert minibatch_size == trainer.minibatch_size

assert minibatch_size is not None

replay_buffer = ReplayBuffer(
replay_capacity=replay_memory_size, batch_size=minibatch_size
)
Expand Down
7 changes: 0 additions & 7 deletions reagent/training/c51_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ def __init__(
actions: List[str] = field(default_factory=list), # noqa: B008
rl: RLParameters = field(default_factory=RLParameters), # noqa: B008
double_q_learning: bool = True,
minibatch_size: int = 1024,
minibatches_per_step: int = 1,
num_atoms: int = 51,
qmin: float = -100,
qmax: float = 200,
Expand All @@ -45,9 +43,6 @@ def __init__(
rl (optional): an instance of the RLParameter class, which
defines relevant hyperparameters
double_q_learning (optional): whether or not double Q learning, enabled by default,
minibatch_size (optional): the size of the minibatch
minibatches_per_step (optional): the number of minibatch updates
per training step
num_atoms (optional): number of "canonical returns"in the discretized value distributions
qmin (optional): minimum q-value
qmax (optional): maximum q-value
Expand All @@ -56,8 +51,6 @@ def __init__(
"""
super().__init__()
self.double_q_learning = double_q_learning
self.minibatch_size = minibatch_size
self.minibatches_per_step = minibatches_per_step
self._actions = actions
self.q_network = q_network
self.q_network_target = q_network_target
Expand Down
6 changes: 0 additions & 6 deletions reagent/training/dqn_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ def __init__(
rl: RLParameters = field(default_factory=RLParameters), # noqa: B008
double_q_learning: bool = True,
bcq: Optional[BCQConfig] = None,
minibatch_size: int = 1024,
minibatches_per_step: int = 1,
optimizer: Optimizer__Union = field( # noqa: B008
default_factory=Optimizer__Union.default
),
Expand All @@ -62,8 +60,6 @@ def __init__(
rl: RLParameters
double_q_learning: boolean flag to use double-q learning
bcq: a config file for batch-constrained q-learning, defaults to normal
minibatch_size: samples per minibatch
minibatches_per_step: minibatch updates per step
optimizer: q-network optimizer
evaluation: evaluation params, primarily whether to use CPE in eval or not
"""
Expand All @@ -75,8 +71,6 @@ def __init__(
)
assert self._actions is not None, "Discrete-action DQN needs action names"
self.double_q_learning = double_q_learning
self.minibatch_size = minibatch_size
self.minibatches_per_step = minibatches_per_step or 1

self.q_network = q_network
self.q_network_target = q_network_target
Expand Down
2 changes: 0 additions & 2 deletions reagent/training/parametric_dqn_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def __init__(
# Start ParametricDQNTrainerParameters
rl: rlp.RLParameters = field(default_factory=rlp.RLParameters), # noqa: B008
double_q_learning: bool = True,
minibatches_per_step: int = 1,
optimizer: Optimizer__Union = field( # noqa: B008
default_factory=Optimizer__Union.default
),
Expand All @@ -38,7 +37,6 @@ def __init__(
self.rl_parameters = rl

self.double_q_learning = double_q_learning
self.minibatches_per_step = minibatches_per_step or 1

self.q_network = q_network
self.q_network_target = q_network_target
Expand Down
4 changes: 0 additions & 4 deletions reagent/training/qrdqn_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ def __init__(
rl: RLParameters = field(default_factory=RLParameters), # noqa: B008
double_q_learning: bool = True,
num_atoms: int = 51,
minibatch_size: int = 1024,
minibatches_per_step: int = 1,
optimizer: Optimizer__Union = field( # noqa: B008
default_factory=Optimizer__Union.default
),
Expand All @@ -57,8 +55,6 @@ def __init__(
)
# TODO: check to ensure no rl parameter value is set that isn't actively used by class
self.double_q_learning = double_q_learning
self.minibatch_size = minibatch_size
self.minibatches_per_step = minibatches_per_step
self._actions = actions

self.q_network = q_network
Expand Down
7 changes: 0 additions & 7 deletions reagent/training/td3_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,9 @@ def __init__(
actor_network_optimizer: Optimizer__Union = field( # noqa: B008
default_factory=Optimizer__Union.default
),
minibatch_size: int = 64,
noise_variance: float = 0.2,
noise_clip: float = 0.5,
delayed_policy_update: int = 2,
minibatches_per_step: int = 1,
) -> None:
"""
Args:
Expand All @@ -54,20 +52,15 @@ def __init__(
q_network_optimizer (optional): the optimizer class and
optimizer hyperparameters for the q network(s) optimizer
actor_network_optimizer (optional): see q_network_optimizer
minibatch_size (optional): the size of the minibatch
noise_variance (optional): the variance of action noise added to smooth
q-value estimates
noise_clip (optional): the maximum absolute value of action noise added
to smooth q-value estimates
delayed_policy_update (optional): the ratio of q network updates
to target and policy network updates
minibatches_per_step (optional, TODO: currently unused): the number of minibatch updates
per training step
"""
super().__init__()
self.rl_parameters = rl
self.minibatch_size = minibatch_size
self.minibatches_per_step = minibatches_per_step or 1

self.q1_network = q1_network
self.q1_network_target = copy.deepcopy(self.q1_network)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ model:
softmax_policy: false
q_network_loss: mse
double_q_learning: true
minibatch_size: 512
minibatches_per_step: 1
optimizer:
Adam:
lr: 0.01
Expand Down
2 changes: 0 additions & 2 deletions serving/examples/ecommerce/training/contextual_bandit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ model:
softmax_policy: false
q_network_loss: mse
double_q_learning: true
minibatch_size: 128
minibatches_per_step: 1
optimizer:
Adam:
lr: 0.01
Expand Down