Skip to content

combined

allennlp.training.learning_rate_schedulers.combined

[SOURCE]


CombinedLearningRateScheduler

@LearningRateScheduler.register("combined")
class CombinedLearningRateScheduler(LearningRateScheduler):
 | def __init__(
 |     self,
 |     optimizer: torch.optim.Optimizer,
 |     schedulers: List[Tuple[int, Lazy[LearningRateScheduler]]],
 |     num_steps_per_epoch: Optional[int] = None,
 |     last_epoch: int = -1
 | ) -> None

This LearningRateScheduler can be used to apply an arbitrary number of other schedulers one after the other.

These schedulers are defined though the schedulers parameter, which takes a list of Tuple[int, Lazy[LearningRateScheduler]]. The first field of the tuple, the int, specifies how many epochs the corresponding scheduler will be used before the next scheduler takes its place.

While it usually makes sense for the sum

sum(n_epochs for (n_epochs, _) in schedulers)

to equal the total number of training epochs, it is not a requirement. If training continues beyond the last defined scheduler, both step() and step_batch() will be a no-op. In effect, this causes the learning rate to stay constant.

Example

Config for using the CombinedLearningRateScheduler Learning Rate Scheduler with the following arguments:

  • Use PolynomialDecay for the first 15 epochs.
  • Use NoamLR for the next 15 epochs.
  • Use a constant LR for the remaining epochs.

{
    ...
   "trainer":{
        ...
        "learning_rate_scheduler": {
            "type": "combined",
            "schedulers": [
                [
                    15, {
                        "type": "polynomial_decay",
                        "power": 2,
                        "warmup_steps": 50,
                        "end_learning_rate": 1e-10
                    }
                ],
                [
                    15, {
                        "type": "noam",
                        "warmup_steps": 1,
                        "model_size": 128,
                        "factor": 0.5
                    }
                ]
            ]
        },
        ...
   }
}
Note that you do NOT pass a optimizer key to the Learning rate scheduler.

current_scheduler

class CombinedLearningRateScheduler(LearningRateScheduler):
 | ...
 | @property
 | def current_scheduler(self) -> Optional[LearningRateScheduler]

state_dict

class CombinedLearningRateScheduler(LearningRateScheduler):
 | ...
 | def state_dict(self) -> Dict[str, Any]

load_state_dict

class CombinedLearningRateScheduler(LearningRateScheduler):
 | ...
 | def load_state_dict(self, state_dict: Dict[str, Any]) -> None

get_values

class CombinedLearningRateScheduler(LearningRateScheduler):
 | ...
 | def get_values(self)

This should never be called directly.

step_batch

class CombinedLearningRateScheduler(LearningRateScheduler):
 | ...
 | def step_batch(self, batch_num_total: int = None) -> None

step

class CombinedLearningRateScheduler(LearningRateScheduler):
 | ...
 | def step(self, metric: float = None) -> None