-
Notifications
You must be signed in to change notification settings - Fork 0
/
EMA.py
90 lines (73 loc) · 3.24 KB
/
EMA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# ---------------------------------------------------------------
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# This work is licensed under the NVIDIA Source Code License
# for Denoising Diffusion GAN. To view a copy of this license, see the LICENSE file.
# ---------------------------------------------------------------
"""
Codes adapted from https://github.com/NVlabs/LSGM/blob/main/util/ema.py
"""
import warnings
import torch
from torch.optim import Optimizer
class EMA(Optimizer):
def __init__(self, opt, ema_decay):
self.ema_decay = ema_decay
self.apply_ema = self.ema_decay > 0.0
self.optimizer = opt
self.state = opt.state
self.param_groups = opt.param_groups
def step(self, *args, **kwargs):
retval = self.optimizer.step(*args, **kwargs)
# stop here if we are not applying EMA
if not self.apply_ema:
return retval
ema, params = {}, {}
for group in self.optimizer.param_groups:
for i, p in enumerate(group["params"]):
if p.grad is None:
continue
state = self.optimizer.state[p]
# State initialization
if "ema" not in state:
state["ema"] = p.data.clone()
if p.shape not in params:
params[p.shape] = {"idx": 0, "data": []}
ema[p.shape] = []
params[p.shape]["data"].append(p.data)
ema[p.shape].append(state["ema"])
for i in params:
params[i]["data"] = torch.stack(params[i]["data"], dim=0)
ema[i] = torch.stack(ema[i], dim=0)
ema[i].mul_(self.ema_decay).add_(params[i]["data"], alpha=1.0 - self.ema_decay)
for p in group["params"]:
if p.grad is None:
continue
idx = params[p.shape]["idx"]
self.optimizer.state[p]["ema"] = ema[p.shape][idx, :]
params[p.shape]["idx"] += 1
return retval
def load_state_dict(self, state_dict):
super(EMA, self).load_state_dict(state_dict)
# load_state_dict loads the data to self.state and self.param_groups. We need to pass this data to
# the underlying optimizer too.
self.optimizer.state = self.state
self.optimizer.param_groups = self.param_groups
def swap_parameters_with_ema(self, store_params_in_ema):
"""This function swaps parameters with their ema values. It records original parameters in the ema
parameters, if store_params_in_ema is true."""
# stop here if we are not applying EMA
if not self.apply_ema:
warnings.warn("swap_parameters_with_ema was called when there is no EMA weights.")
return
for group in self.optimizer.param_groups:
for i, p in enumerate(group["params"]):
if not p.requires_grad:
continue
ema = self.optimizer.state[p]["ema"]
if store_params_in_ema:
tmp = p.data.detach()
p.data = ema.detach()
self.optimizer.state[p]["ema"] = tmp
else:
p.data = ema.detach()