forked from LBH1024/CAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
158 lines (133 loc) · 5.52 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import cv2
import yaml
import math
import torch
import numpy as np
from difflib import SequenceMatcher
def load_config(yaml_path):
try:
with open(yaml_path, 'r') as f:
params = yaml.load(f, Loader=yaml.FullLoader)
except:
print('尝试UTF-8编码....')
with open(yaml_path, 'r', encoding='UTF-8') as f:
params = yaml.load(f, Loader=yaml.FullLoader)
if not params['experiment']:
print('实验名不能为空!')
exit(-1)
if not params['train_image_path']:
print('训练图片路径不能为空!')
exit(-1)
if not params['train_label_path']:
print('训练label路径不能为空!')
exit(-1)
if not params['word_path']:
print('word dict路径不能为空!')
exit(-1)
if 'train_parts' not in params:
params['train_parts'] = 1
if 'valid_parts' not in params:
params['valid_parts'] = 1
if 'valid_start' not in params:
params['valid_start'] = 0
if 'word_conv_kernel' not in params['attention']:
params['attention']['word_conv_kernel'] = 1
return params
def update_lr(optimizer, current_epoch, current_step, steps, epochs, initial_lr):
if current_epoch < 1:
new_lr = initial_lr / steps * (current_step + 1)
elif 1 <= current_epoch <= 200:
new_lr = 0.5 * (1 + math.cos((current_step + 1 + (current_epoch - 1) * steps) * math.pi / (200 * steps))) * initial_lr
else:
new_lr = 0.5 * (1 + math.cos((current_step + 1 + (current_epoch - 1) * steps) * math.pi / (epochs * steps))) * initial_lr
for param_group in optimizer.param_groups:
param_group['lr'] = new_lr
def save_checkpoint(model, optimizer, word_score, ExpRate_score, epoch, optimizer_save=False, path='checkpoints', multi_gpu=False, local_rank=0):
filename = f'{os.path.join(path, model.name)}/{model.name}_WordRate-{word_score:.4f}_ExpRate-{ExpRate_score:.4f}_{epoch}.pth'
if optimizer_save:
state = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
}
else:
state = {
'model': model.state_dict()
}
torch.save(state, filename)
print(f'Save checkpoint: {filename}\n')
return filename
def load_checkpoint(model, optimizer, path):
state = torch.load(path, map_location='cpu')
if optimizer is not None and 'optimizer' in state:
optimizer.load_state_dict(state['optimizer'])
else:
print(f'No optimizer in the pretrained model')
model.load_state_dict(state['model'])
class Meter:
def __init__(self, alpha=0.9):
self.nums = []
self.exp_mean = 0
self.alpha = alpha
@property
def mean(self):
return np.mean(self.nums)
def add(self, num):
if len(self.nums) == 0:
self.exp_mean = num
self.nums.append(num)
self.exp_mean = self.alpha * self.exp_mean + (1 - self.alpha) * num
def cal_score(word_probs, word_label, mask):
line_right = 0
if word_probs is not None:
_, word_pred = word_probs.max(2)
word_scores = [SequenceMatcher(None, s1[:int(np.sum(s3))], s2[:int(np.sum(s3))], autojunk=False).ratio() * (len(s1[:int(np.sum(s3))]) + len(s2[:int(np.sum(s3))])) / len(s1[:int(np.sum(s3))]) / 2
for s1, s2, s3 in zip(word_label.cpu().detach().numpy(), word_pred.cpu().detach().numpy(), mask.cpu().detach().numpy())]
batch_size = len(word_scores)
for i in range(batch_size):
if word_scores[i] == 1:
line_right += 1
ExpRate = line_right / batch_size
word_scores = np.mean(word_scores)
return word_scores, ExpRate
def draw_attention_map(image, attention):
h, w = image.shape
attention = cv2.resize(attention, (w, h))
attention_heatmap = ((attention - np.min(attention)) / (np.max(attention) - np.min(attention))*255).astype(np.uint8)
attention_heatmap = cv2.applyColorMap(attention_heatmap, cv2.COLORMAP_JET)
image_new = np.stack((image, image, image), axis=-1).astype(np.uint8)
attention_map = cv2.addWeighted(attention_heatmap, 0.4, image_new, 0.6, 0.)
return attention_map
def draw_counting_map(image, counting_attention):
h, w = image.shape
counting_attention = torch.clamp(counting_attention, 0.0, 1.0).numpy()
counting_attention = cv2.resize(counting_attention, (w, h))
counting_attention_heatmap = (counting_attention * 255).astype(np.uint8)
counting_attention_heatmap = cv2.applyColorMap(counting_attention_heatmap, cv2.COLORMAP_JET)
image_new = np.stack((image, image, image), axis=-1).astype(np.uint8)
counting_map = cv2.addWeighted(counting_attention_heatmap, 0.4, image_new, 0.6, 0.)
return counting_map
def cal_distance(word1, word2):
m = len(word1)
n = len(word2)
if m*n == 0:
return m+n
dp = [[0]*(n+1) for _ in range(m+1)]
for i in range(m+1):
dp[i][0] = i
for j in range(n+1):
dp[0][j] = j
for i in range(1, m+1):
for j in range(1, n+1):
a = dp[i-1][j] + 1
b = dp[i][j-1] + 1
c = dp[i-1][j-1]
if word1[i-1] != word2[j-1]:
c += 1
dp[i][j] = min(a, b, c)
return dp[m][n]
def compute_edit_distance(prediction, label):
prediction = prediction.strip().split(' ')
label = label.strip().split(' ')
distance = cal_distance(prediction, label)
return distance