Source code for kospeech.optim.lr_scheduler.transformer_lr_scheduler
# Copyright (c) 2020, Soohwan Kim. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
from kospeech.optim.lr_scheduler.lr_scheduler import LearningRateScheduler
[docs]class TransformerLRScheduler(LearningRateScheduler):
""" Transformer Learning Rate Scheduler proposed in "Attention Is All You Need" """
def __init__(self, optimizer, peak_lr, final_lr, final_lr_scale, warmup_steps, decay_steps):
assert isinstance(warmup_steps, int), "warmup_steps should be inteager type"
assert isinstance(decay_steps, int), "total_steps should be inteager type"
super(TransformerLRScheduler, self).__init__(optimizer, 0.0)
self.final_lr = final_lr
self.peak_lr = peak_lr
self.warmup_steps = warmup_steps
self.decay_steps = decay_steps
self.warmup_rate = self.peak_lr / self.warmup_steps
self.decay_factor = -math.log(final_lr_scale) / self.decay_steps
self.lr = self.init_lr
self.update_step = 0
def _decide_stage(self):
if self.update_step < self.warmup_steps:
return 0, self.update_step
if self.warmup_steps <= self.update_step < self.warmup_steps + self.decay_steps:
return 1, self.update_step - self.warmup_steps
return 2, None
def step(self):
self.update_step += 1
stage, steps_in_stage = self._decide_stage()
if stage == 0:
self.lr = self.update_step * self.warmup_rate
elif stage == 1:
self.lr = self.peak_lr * math.exp(-self.decay_factor * steps_in_stage)
elif stage == 2:
self.lr = self.final_lr
else:
raise ValueError("Undefined stage")
self.set_lr(self.optimizer, self.lr)
return self.lr