网站编排页面,上海宏波工程咨询管理有限公司,网站可视区最多可以做多大,wordpress代码修改没反应就是取前几个epoch的weight的平均值#xff0c;可以缓解微调时的灾难性遗忘#xff08;因为新数据引导#xff0c;模型权重逐渐#xff0c;偏离训练时学到的数据分布#xff0c;忘记之前学好的先验知识#xff09;
class EMA():def __init__(self, model, decay):self.…就是取前几个epoch的weight的平均值可以缓解微调时的灾难性遗忘因为新数据引导模型权重逐渐偏离训练时学到的数据分布忘记之前学好的先验知识
class EMA():def __init__(self, model, decay):self.model modelself.decay decay # decay rateself.shadow {} # old weightself.backup {} # new weightdef register(self): # deep copy weight for initfor name, param in self.model.named_parameters():if param.requires_grad:self.shadow[name] param.data.clone()def update(self): # emaaverage weight for trainfor name, param in self.model.named_parameters():if param.requires_grad:assert name in self.shadownew_average (1.0 - self.decay) * param.data self.decay * self.shadow[name]self.shadow[name] new_average.clone()def apply_shadow(self): # load old weight for eval beginfor name, param in self.model.named_parameters():if param.requires_grad:assert name in self.shadowself.backup[name] param.dataparam.data self.shadow[name]def restore(self): # load new weight for eval endfor name, param in self.model.named_parameters():if param.requires_grad:assert name in self.backupparam.data self.backup[name]self.backup {}# 初始化
ema EMA(model, 0.999)
ema.register()# 训练过程中更新完参数后同步update shadow weights
def train():optimizer.step()ema.update()# eval前apply shadow weightseval之后恢复原来模型的参数
def evaluate():ema.apply_shadow()# evaluateema.restore()