-
Notifications
You must be signed in to change notification settings - Fork 4
/
multimodal_pathway.py
61 lines (49 loc) · 2.61 KB
/
multimodal_pathway.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import torch
import torch.nn as nn
import torch.nn.functional as F
class CrossModalReparamLinear(nn.Linear):
def __init__(self, in_features, out_features, bias=True,
origin_layer=None,
aux_weight=None,
is_aux_trainable=True):
super().__init__(in_features, out_features, bias)
self.cross_modal_scale = nn.Parameter(torch.zeros(1))
assert self.weight.size() == aux_weight.size(), 'Target weight and aux weight must have the same shape'
self.aux_weight = aux_weight
self.aux_weight.requires_grad_(is_aux_trainable)
if origin_layer is not None:
with torch.no_grad():
self.weight.copy_(origin_layer.weight)
self.bias.copy_(origin_layer.bias)
def forward(self, input):
weight = self.weight + self.cross_modal_scale * self.aux_weight
return F.linear(input, weight, self.bias)
def build_cross_modal_reparam_linear(origin_layer, aux_layer):
assert origin_layer.weight.size() == aux_layer.weight.size()
return CrossModalReparamLinear(in_features=origin_layer.in_features, out_features=origin_layer.out_features, origin_layer=origin_layer,
bias=origin_layer.bias is not None,
aux_weight=aux_layer.weight)
def _get_attr_by_name(obj, attr_name):
attrs = attr_name.split('.')
for a in attrs:
obj = obj.__getattr__(a)
return obj
def _set_attr_by_name(obj, attr_name, attr_value):
owner = obj
attr_names = attr_name.split('.')
if len(attr_names) > 1:
for a in attr_names[:-1]:
owner = owner.__getattr__(a)
owner.__setattr__(attr_names[-1], attr_value)
def change_original_linear_to_reparam(target_module, aux_module, layer_name):
origin_linear_layer = _get_attr_by_name(target_module, layer_name)
aux_linear_layer = _get_attr_by_name(aux_module, layer_name)
reparam_layer = build_cross_modal_reparam_linear(origin_linear_layer, aux_linear_layer)
_set_attr_by_name(target_module, layer_name, reparam_layer)
def reparameterize_aux_into_target_model(target_model, aux_model,
layer_names=('attn.qkv', 'attn.proj', 'mlp.fc1','mlp.fc2'), main_body_name='blocks'):
target_transformer_blocks = _get_attr_by_name(target_model, main_body_name)
aux_transformer_blocks = _get_attr_by_name(aux_model, main_body_name)
for target_block, aux_block in zip(target_transformer_blocks, aux_transformer_blocks):
for layer_name in layer_names:
change_original_linear_to_reparam(target_block, aux_block, layer_name)