polynomial_decay

[ allennlp.training.learning_rate_schedulers.polynomial_decay ]

PolynomialDecay Objects#

class PolynomialDecay(LearningRateScheduler):
 | def __init__(
 |     self,
 |     optimizer: torch.optim.Optimizer,
 |     total_steps,
 |     power=1.0,
 |     warmup_steps=0,
 |     end_learning_rate=0.0,
 |     last_epoch: int = -1
 | )

Implements polynomial decay Learning rate scheduling. The learning rate is first linearly increased for the first warmup_steps training steps. Then it is decayed for total_steps - warmup_steps from the initial learning rate to end_learning_rate using a polynomial of degree power.

Formally,

lr = (initial_lr - end_learning_rate) * ((total_steps - steps)/(total_steps - warmup_steps)) ** power

Parameters

total_steps : int
The total number of steps to adjust the learning rate for.
warmup_steps : int
The number of steps to linearly increase the learning rate.
power : float, optional (default = 1.0)
The power of the polynomial used for decaying.
end_learning_rate : float, optional (default = 0.0)
Final learning rate to decay towards.

get_values#

 | @overrides
 | def get_values(self)

step#

 | @overrides
 | def step(self, metric: float = None) -> None

step_batch#

 | @overrides
 | def step_batch(self, batch_num_total: int = None) -> None