Source code for kospeech.optim.lr_scheduler.transformer_lr_scheduler

# Copyright (c) 2020, Soohwan Kim. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math
from kospeech.optim.lr_scheduler.lr_scheduler import LearningRateScheduler


[docs]class TransformerLRScheduler(LearningRateScheduler): """ Transformer Learning Rate Scheduler proposed in "Attention Is All You Need" """ def __init__(self, optimizer, peak_lr, final_lr, final_lr_scale, warmup_steps, decay_steps): assert isinstance(warmup_steps, int), "warmup_steps should be inteager type" assert isinstance(decay_steps, int), "total_steps should be inteager type" super(TransformerLRScheduler, self).__init__(optimizer, 0.0) self.final_lr = final_lr self.peak_lr = peak_lr self.warmup_steps = warmup_steps self.decay_steps = decay_steps self.warmup_rate = self.peak_lr / self.warmup_steps self.decay_factor = -math.log(final_lr_scale) / self.decay_steps self.lr = self.init_lr self.update_step = 0 def _decide_stage(self): if self.update_step < self.warmup_steps: return 0, self.update_step if self.warmup_steps <= self.update_step < self.warmup_steps + self.decay_steps: return 1, self.update_step - self.warmup_steps return 2, None def step(self): self.update_step += 1 stage, steps_in_stage = self._decide_stage() if stage == 0: self.lr = self.update_step * self.warmup_rate elif stage == 1: self.lr = self.peak_lr * math.exp(-self.decay_factor * steps_in_stage) elif stage == 2: self.lr = self.final_lr else: raise ValueError("Undefined stage") self.set_lr(self.optimizer, self.lr) return self.lr