-
Notifications
You must be signed in to change notification settings - Fork 20
/
load_label.py
101 lines (88 loc) · 4.89 KB
/
load_label.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import numpy as np
import pandas as pd
def load_excel(dataset_name):
if(dataset_name == 'CASME_sq'):
xl = pd.ExcelFile(dataset_name + '/code_final.xlsx') #Specify directory of excel file
colsName = ['subject', 'video', 'onset', 'apex', 'offset', 'au', 'emotion', 'type', 'selfReport']
codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName) #Get data
videoNames = []
for videoName in codeFinal.iloc[:,1]:
videoNames.append(videoName.split('_')[0])
codeFinal['videoName'] = videoNames
naming1 = xl.parse(xl.sheet_names[2], header=None, converters={0: str})
dictVideoName = dict(zip(naming1.iloc[:,1], naming1.iloc[:,0]))
codeFinal['videoCode'] = [dictVideoName[i] for i in codeFinal['videoName']]
naming2 = xl.parse(xl.sheet_names[1], header=None)
dictSubject = dict(zip(naming2.iloc[:,2], naming2.iloc[:,1]))
codeFinal['subjectCode'] = [dictSubject[i] for i in codeFinal['subject']]
elif(dataset_name=='SAMMLV'):
xl = pd.ExcelFile(dataset_name + '/SAMM_LongVideos_V2_Release.xlsx')
colsName = ['Subject', 'Filename', 'Inducement Code', 'Onset', 'Apex', 'Offset', 'Duration', 'Type', 'Action Units', 'Notes']
codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName, skiprows=[0,1,2,3,4,5,6,7,8,9])
videoNames = []
subjectName = []
for videoName in codeFinal.iloc[:,1]:
videoNames.append(str(videoName).split('_')[0] + '_' + str(videoName).split('_')[1])
subjectName.append(str(videoName).split('_')[0])
codeFinal['videoCode'] = videoNames
codeFinal['subjectCode'] = subjectName
#Synchronize the columns name with CAS(ME)^2
codeFinal.rename(columns={'Type':'type', 'Onset':'onset', 'Offset':'offset', 'Apex':'apex'}, inplace=True)
print('Data Columns:', codeFinal.columns) #Final data column
return codeFinal
def load_gt(dataset_name, expression_type, images, subjectsVideos, subjects, codeFinal):
dataset_expression_type = expression_type
if(dataset_name == 'SAMMLV' and expression_type=='micro-expression'):
dataset_expression_type = 'Micro - 1/2'
elif(dataset_name == 'SAMMLV' and expression_type=='macro-expression'):
dataset_expression_type = 'Macro'
vid_need = []
vid_count = 0
ground_truth = []
for sub_video_each_index, sub_vid_each in enumerate(subjectsVideos):
ground_truth.append([])
for videoIndex, videoCode in enumerate(sub_vid_each):
on_off = []
for i, row in codeFinal.iterrows():
if (row['subjectCode']==subjects[sub_video_each_index]): #S15, S16... for CAS(ME)^2, 001, 002... for SAMMLV
if (row['videoCode']==videoCode):
if (row['type']==dataset_expression_type): #Micro-expression or macro-expression
if (row['offset']==0): #Take apex if offset is 0
on_off.append([int(row['onset']-1), int(row['apex']-1)])
else:
if(dataset_expression_type!='Macro' or int(row['onset'])!=0): #Ignore the samples that is extremely long in SAMMLV
on_off.append([int(row['onset']-1), int(row['offset']-1)])
if(len(on_off)>0):
vid_need.append(vid_count) #To get the video that is needed
ground_truth[-1].append(on_off)
vid_count+=1
#Remove unused video
final_samples = []
final_videos = []
final_subjects = []
count = 0
for subjectIndex, subject in enumerate(ground_truth):
final_samples.append([])
final_videos.append([])
for samplesIndex, samples in enumerate(subject):
if (count in vid_need):
final_samples[-1].append(samples)
final_videos[-1].append(subjectsVideos[subjectIndex][samplesIndex])
final_subjects.append(subjects[subjectIndex])
count += 1
#Remove the empty data in array
final_subjects = np.unique(final_subjects)
final_videos = [ele for ele in final_videos if ele != []]
final_samples = [ele for ele in final_samples if ele != []]
final_images = [images[i] for i in vid_need]
print('Total Videos:', len(final_images))
return final_images, final_videos, final_subjects, final_samples
def cal_k(dataset_name, expression_type, final_samples):
samples = [samples for subjects in final_samples for videos in subjects for samples in videos]
total_duration = 0
for sample in samples:
total_duration += sample[1]-sample[0]
N=total_duration/len(samples)
k=int((N+1)/2)
print('k (Half of average length of expression) =', k)
return k