diff --git a/torchbenchmark/models/dlrm/dlrm_s_pytorch.py b/torchbenchmark/models/dlrm/dlrm_s_pytorch.py index 584c7a53b2..59b24045d3 100644 --- a/torchbenchmark/models/dlrm/dlrm_s_pytorch.py +++ b/torchbenchmark/models/dlrm/dlrm_s_pytorch.py @@ -149,8 +149,7 @@ def create_mlp(self, ln, sigmoid_layer): mean = 0.0 # std_dev = np.sqrt(variance) std_dev = np.sqrt(2 / (m + n)) # np.sqrt(1 / m) # np.sqrt(1 / n) W = np.random.normal(mean, std_dev, size=(m, n)).astype(np.float32) - std_dev = np.sqrt(1 / m) # np.sqrt(2 / (m + 1)) - bt = np.random.normal(mean, std_dev, size=m).astype(np.float32) + bt = np.zeros(m).astype(np.float32) # see upstream PR at https://github.com/facebookresearch/dlrm/pull/358 # approach 1 LL.weight.data = torch.tensor(W, requires_grad=True) LL.bias.data = torch.tensor(bt, requires_grad=True)