-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrsa.py
194 lines (140 loc) · 6.34 KB
/
rsa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import scipy
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
from scipy.spatial import distance
from itertools import combinations
from pingouin import partial_corr
from numpy import arctanh
def make_rdm_from_1darray(arr, metric='correlation'):
'''1d array to rdm'''
flat_distances = distance.pdist(np.atleast_2d(arr).T, metric=metric)
rdm = distance.squareform(flat_distances)
return rdm
def make_rdm_from_2darray(arr, metric='correlation'):
'''
2d array to rdm.
arr : 2D array (e.g. trials x sensors; trials x word2vec_features). RDMs over first dimension (trials)
(Currently using this by subsetting at a specific time point and getting rdm accross trials and sensors)
'''
flat_distances = distance.pdist(arr, metric=metric)
rdm = distance.squareform(flat_distances)
return rdm
def correlate_model_and_rdm_timecourses(rdm_tc, model_rdm):
'''
Correlate model RDM with timecourse of RDMs, at each time-point
rdm_tc : np.array
one rdm per time point, shape is (ntrials, ntrials, ntimes)
model_rdm : np.array (ntrials x ntrials)
returns
----------
corrs : Shape ntimes for orrelation coefficients of the rdms
pvals : Shape ntimes for pvals of the correlation coefficients of the rdms
'''
# calculate correlation accross words and images, at each time point
corrs = np.zeros(rdm_tc.shape[2])
pvals = np.zeros(rdm_tc.shape[2])
ntimes = rdm_tc.shape[2]
rdm_tc_flat = rdm_tc.reshape(-1,ntimes)
model_rdm_flat = model_rdm.reshape(-1)
for i in range(ntimes):
sub_rdm = rdm_tc_flat[:,i]
corrs[i], pvals[i] = pearsonr(sub_rdm, model_rdm_flat)
return corrs, pvals
def partial_correlation_model_and_rdm_timecourses(rdm_tc, model1_rdm, model2_rdm, method='pearson'):
'''
Partial correlation of model1 RDM with timecourse of RDMs, at each time-point
after partialling out model2 RDM.
rdm_tc : np.array
one rdm per time point, shape is (ntrials, ntrials, ntimes)
model1_rdm : np.array (ntrials x ntrials)
model2_rdm : np.array (ntrials x ntrials)
method: str
Correlation type (see pingouin.partial_corr)
returns
----------
corrs : Shape ntimes for orrelation coefficients of the rdms
pvals : Shape ntimes for pvals of the correlation coefficients of the rdms
'''
# calculate correlation accross words and images, at each time point
corrs = np.zeros(rdm_tc.shape[2])
pvals = np.zeros(rdm_tc.shape[2])
ntimes = rdm_tc.shape[2]
rdm_tc_flat = rdm_tc.reshape(-1,ntimes)
model1_rdm_flat = model1_rdm.reshape(-1)
model2_rdm_flat = model2_rdm.reshape(-1)
for i in range(ntimes):
sub_rdm = rdm_tc_flat[:,i]
df = pd.DataFrame({'sub_rdm':sub_rdm,
'model1_rdm_flat':model1_rdm_flat,
'model2_rdm_flat':model2_rdm_flat})
stats = partial_corr(df, x='sub_rdm', y='model1_rdm_flat', y_covar='model2_rdm_flat', method=method)
corrs[i] = stats['r']
pvals[i] = stats['p-val']
return corrs, pvals
def correlate_rdm_timecourses(rdm_tc1, rdm_tc2):
'''
Correlate 2 timecourses of RDMs, usually empirical RDMs from epochs/evokeds
rdm_tc1, rdm_tc2 : np.array
one rdm per time point, shape is (ntrials, ntrials, ntimes)
returns
----------
corrs : ntimes x ntimes correlation coefficients of the rdms
pvals : ntimes x ntimes pvals of the correlation coefficients of the rdms
'''
# calculate correlation accross words and images, at each time point
corrs = np.zeros([rdm_tc1.shape[2], rdm_tc2.shape[2]])
pvals = np.zeros([rdm_tc1.shape[2], rdm_tc2.shape[2]])
ntimes = rdm_tc1.shape[2]
rdm1_flat = rdm_tc1.reshape(-1,ntimes)
rdm2_flat = rdm_tc2.reshape(-1,ntimes)
for i in range(ntimes):
sub_rdm1 = rdm1_flat[:,i]
for j in range(ntimes):
sub_rdm2 = rdm2_flat[:,j]
corrs[i,j], pvals[i,j] = pearsonr(sub_rdm1, sub_rdm2)
return corrs, pvals
def rsa_anderson_etal_2016(data_rdm, model_rdm, verbose=True):
'''
Decoding using the RSA methods in:
Anderson, A. J., Zinszer, B. D., & Raizada, R. D. (2016). Representational similarity encoding for fMRI: Pattern-based synthesis to predict brain activity using stimulus-model-similarities. NeuroImage, 128, 44-53.
data_rdm : np.array of size (n_exemplars x n_exemplars)
model_rdm : np.array of size (n_exemplars x n_exemplars)
returns
--------
accuracy : float
Percentage accuracy over all possible pairs
'''
assert data_rdm.shape == model_rdm.shape, 'Model and data rdm do not have the same shape'
# Get all possible pairs of concepts
conc_pairs_idx = list(combinations(range(data_rdm.shape[0]), 2 ))
hits = [] # will count correctly decoded pairs
# Counter for printing progress bar
n_comparisons = len(conc_pairs_idx)
counter = 0
for idx, pair in enumerate(conc_pairs_idx):
counter += 1
if verbose: print(counter, '/', n_comparisons, end='\r')
# (1) ----- Extract brain and model vector from dsms
model_1 = model_rdm[:, pair[0]]
model_2 = model_rdm[:, pair[1]]
brain_1 = data_rdm[:, pair[0]]
brain_2 = data_rdm[:, pair[1]]
# extract labels, in case needed
#label_1 = exemplar_labels[pair[0]] # can also use lexical_stats.concept. Same order
#label_2 = exemplar_labels[pair[1]]
# (2) ----- Get rid of the element that represents the test items
model_1 = np.delete(model_1, pair)
model_2 = np.delete(model_2, pair)
brain_1 = np.delete(brain_1, pair)
brain_2 = np.delete(brain_2, pair)
# (3) Correlate the model and brain vectors.
# here a hit is when corr(brain1, model1) + corr(brain2, model2) > corr(brain1, model2) + corr(brain2, model1)
# Note the correlations are first z transformed using np.arctanh
true_pairing_corr = arctanh(pearsonr(brain_1, model_1))[0] + arctanh(pearsonr(brain_2, model_2))[0]
shuffled_pairing_corr = arctanh(pearsonr(brain_1, model_2))[0] + arctanh(pearsonr(brain_2, model_1))[0]
if true_pairing_corr > shuffled_pairing_corr:
hits.append(1)
# compute accuracy over all possible pairs
accuracy = sum(hits) / len(conc_pairs_idx)
return accuracy