combined
allennlp.training.learning_rate_schedulers.combined
CombinedLearningRateScheduler¶
@LearningRateScheduler.register("combined")
class CombinedLearningRateScheduler(LearningRateScheduler):
| def __init__(
| self,
| optimizer: torch.optim.Optimizer,
| schedulers: List[Tuple[int, Lazy[LearningRateScheduler]]],
| num_steps_per_epoch: Optional[int] = None,
| last_epoch: int = -1
| ) -> None
This LearningRateScheduler can be used to apply an arbitrary number of other schedulers
one after the other.
These schedulers are defined though the schedulers parameter, which takes
a list of Tuple[int, Lazy[LearningRateScheduler]]. The first field of the
tuple, the int, specifies how many epochs the corresponding scheduler will
be used before the next scheduler takes its place.
While it usually makes sense for the sum
sum(n_epochs for (n_epochs, _) in schedulers)
to equal the total number of training epochs, it is not a requirement.
If training continues beyond the last defined scheduler, both step() and
step_batch() will be a no-op. In effect, this causes the learning rate to
stay constant.
Example¶
Config for using the CombinedLearningRateScheduler Learning Rate Scheduler
with the following arguments:
- Use
PolynomialDecayfor the first15epochs. - Use
NoamLRfor the next15epochs. - Use a constant LR for the remaining epochs.
{
...
"trainer":{
...
"learning_rate_scheduler": {
"type": "combined",
"schedulers": [
[
15, {
"type": "polynomial_decay",
"power": 2,
"warmup_steps": 50,
"end_learning_rate": 1e-10
}
],
[
15, {
"type": "noam",
"warmup_steps": 1,
"model_size": 128,
"factor": 0.5
}
]
]
},
...
}
}
optimizer key to the Learning rate scheduler.
current_scheduler¶
class CombinedLearningRateScheduler(LearningRateScheduler):
| ...
| @property
| def current_scheduler(self) -> Optional[LearningRateScheduler]
state_dict¶
class CombinedLearningRateScheduler(LearningRateScheduler):
| ...
| def state_dict(self) -> Dict[str, Any]
load_state_dict¶
class CombinedLearningRateScheduler(LearningRateScheduler):
| ...
| def load_state_dict(self, state_dict: Dict[str, Any]) -> None
get_values¶
class CombinedLearningRateScheduler(LearningRateScheduler):
| ...
| def get_values(self)
This should never be called directly.
step_batch¶
class CombinedLearningRateScheduler(LearningRateScheduler):
| ...
| def step_batch(self, batch_num_total: int = None) -> None
step¶
class CombinedLearningRateScheduler(LearningRateScheduler):
| ...
| def step(self, metric: float = None) -> None