-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathROI_TempCorr.R
executable file
·952 lines (839 loc) · 50.4 KB
/
ROI_TempCorr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
#!/usr/bin/env Rscript
# run like
# ./ROI_TempCorr.R -ts /Volumes/Phillips/WorkingMemory/11155/20130418/SpatialWM_v1_run1_768x720.5/nfswkmtd_functional.nii.gz -brainmask /Volumes/Phillips/gm_50mask.nii
printHelp <- function() {
cat("ROI_TempCorr is a script that computes temporal correlations among ROIs defined by an integer-valued mask (e.g., a set of numbered spheres).",
"",
"Required inputs are:",
" -ts <4D file>: The file containing time series of interest. Usually preprocessed resting-state data. Can be NIfTI or AFNI BRIK/HEAD",
" -rois <3D file>: The integer-valued mask file defining the set of ROIs. Computed correlation matrices will be nROIs x nROIs in size based on this mask.",
" -out_file <filename for output>: The file to be output containing correlations among ROIs.",
"",
"Optional arguments are:",
" -help: this message",
" -pcorr_method <pearson|spearman|kendall|adalasso.net|lasso.net|pls.net|ridge.net|pcor.shrink|spcor|semi:*>: Method to compute partial correlation",
" pearson, spearman, and kendall compute the corresponding partial correlations (pcor function in ppcor package). BEWARE: Uses pseudoinverse for rank-deficient matrices.",
" adalasso.net uses adaptive LASSO regression with cross-validation to compute partial correlations (adalasso.net function in parcor package)",
" lasso.net uses LASSO regression (no adaptation) with cross-validation to compute partial correlations (adalasso.net function in parcor package)",
" pls.net uses partial least squares regression models to estimate partial correlations (pls.net function in parcor package)",
" ridge.net uses optimal ridge regression models to estimate partial correlations (ridge.net function in parcor package)",
" N.B.1",
" to use semi-partial, prepend 'semi:' to method e.g.",
" -pcorr_method semi:person,kendall",
" for semi partial person, and partial kendal",
" N.B.2",
" output is not square! roiXroi matrices concatinated for estimate, p.value, statistic + constant columns n, gp, and method",
" -pcorr_cvsplit <10>: For adalasso.net, lasso.net, pls.net, and ridge.net methods, the number of splits for k-fold cross-validation.",
" -corr_method <pearson|spearman|kendall|robust|mcd|weighted|donostah|M|pairwiseQC|pairwiseGK|cor.shrink>: Method to compute correlations among time series.",
" Default: pearson is the standard Pearson correlation",
" spearman is Spearman correlation based on ranks",
" kendall is Kendall's tau correlation (also based on ranks, but with somewhat better statistical properties than Spearman)",
" robust uses the covRob function from the robust package with estim=\"auto\" to obtain robust estimate of correlation (reduce sensitivity to outliers)",
" mcd, weighted, donostah, M, pairwiseQC, pairwiseGK are different robust estimators of correlation. See ?covRob in the robust package for details.",
" cor.shrink computes a shrinkage estimate of the correlation matrix by shrinking correlations towards the identity matrix. See ?cor.shrink in the corpcor package.",
" -roi_reduce <pca|median|mean|huber>: Method to obtain a single time series for voxels within an ROI. Default: pca",
" pca takes the first eigenvector within the ROI, representing maximal shared variance",
" median uses the median within each ROI",
" mean uses the mean within each ROI",
" huber uses the huber robust estimate of the center of the distribution (robust to outliers)",
" -roi_vals <roi_nums_csv>: comma separated list of roi values to use. should be subset of values in -rois image. eg. -roi_vals 1,5,10",
" -brainmask <3D file>: A 0/1 mask file defining voxels in the brain. This will be applied to the ROI mask before computing correlations.",
" -dropvols <integer=0>: The number of initial volumes to drop from all voxel time series prior to computing correlations (e.g., for steady state problems)",
" -censor <1D file>: An AFNI-style 1D censor file containing a single column of 0/1 values where 0 represents volumes to be censored (e.g., for motion scrubbing)",
" -fisherz: Apply Fisher's z transformation (arctanh) to normalize correlation coefficients. Not applied by default.",
" -keep_constant_values: default is to discard ROIs with constant values with warning (see -no_badmsg). This NAs instead. PROBABLY A BAD IDEA",
" -njobs <n>: Number of parallel jobs to run when computing correlations. Default: 4.",
" -na_string: Character string indicating how to represent missing correlations in output file. Default NA.",
" -no_badmsg: dont warn for all badvoxes",
" -ts_out_file <filename for time series output>: Output a file containing the average time series for each region before computing correlations.",
" -ts_only: stop before running correlations. useful with -ts_out_file",
" -ts+: additional time series files to append together. alternative to 3dTcat-ing multile files into one for -ts. can use more than one -ts+: -ts rest.nii.gz -ts+ rest2.nii.gz -ts+ task_background.nii.gz",
" -prewhiten_arima <p> <d> <q>: prewhiten all time series by fitting an ARIMA model of order p (autoregressive), d (integrated), q (moving average) before computing correlation.",
" -detrend_ts <0,1,2>: Demean (0), linear detrending (1) or quadratic detrending (2) is applied to ROI aggregated time series before correlations are computed. Default: 2",
" -min_vox 5: min number of non-constant voxels in timeseries to include roi. default ROI w/less than 5 good voxel timeseries is excluded",
" -nuisance_regressors <matrix.txt>: If passed in, the nuisance regressors (columns) of this white space-separated file will be regressed out of ROI aggregated time series prior to correlations.",
" -bandpass_filter dt freq_low freq_high: apply a FIR1 bandpass filter to ROI averaged data and nuisance regressors (if specified).",
" dt is the repetition time in seconds, freq_low is the lowest frequency to pass in Hz, freq_high is the highest frequency to pass in Hz",
" -roi_diagnostics <file>: Create a file with information about the number of voxels per ROI in the mask versus the data, including the proportion missing.",
" -write_header 1/0: Whether to include a header row with variable names in the output. Variables are named according to their corresponding mask values. Default: 0",
"",
"If the -ts file does not match the -rois file, the -ts file will be resampled to match the -rois file using 3dresample. This requires that the images be coregistered,",
" in the same stereotactic space, and have the same grid size.",
"",
"Multiple correlation methods can be computed at once by passing a comma-separated list. Both partial and full correlations computation can also be combined.",
" Example: -corr_method pearson,cor.shrink -pcor_method pcor.shrink,adalasso.net",
"",
"The script depends on the following R libraries: foreach, doParallel, MASS, oro.nifti, parcor, ppcor, pracma, and robust. These can be installed using:",
" install.packages(c(\"foreach\", \"doParallel\", \"MASS\", \"oro.nifti\", \"ppcor\", \"pracma\", \"ppcor\", \"robust\", \"tictoc\", \"forecast\", \"lmtest\")); remotes::install_github(c('cran/ppls','cran/parcor'))",
sep="\n")
}
#read in command line arguments.
# 20240214: hack to use ARGS in interactive mode for debugging
if (sys.nframe() == 0) { # is run from command line
args <- commandArgs(trailingOnly = FALSE)
scriptpath <- dirname(sub("--file=", "", grep("--file=", args, fixed=TRUE, value=TRUE), fixed=TRUE))
quitfn <- function(...) quit(...)
} else { # is sourced
args <- ARGS
scriptpath <- dirname(parent.frame(2)$ofile) #scriptpath <- this.path::here()
cat("scriptpath:",scriptpath)
cat("ARGS:",ARGS, "\n")
cat("args:",args, "\n")
quitfn <- function(...) stop("interactive use! want to quit but stoping instead")
# ARGS <- c("--args", unlist(stringr::str_split("-njobs 1 -ts /tmp/short_amartest.nii.gz -rois /Volumes/Hera/preproc/7TBrainMech_rest/MHRest_nost_nowarp_nosmooth/10129_20180917/warps/2024/feb2024_mask_amyghpc_epires.nii.gz -out_file /Volumes/Hera/Amar/amyg_7T/output/roitempcorr/background/jan2024/10129_20180917_frontoamyg_mean_gsr_pearson_adj_wf.txt.gz -ts_out_file /Volumes/Hera/Amar/amyg_7T/output/roitempcorr/background/jan2024/10129_20180917_frontoamyg_mean_gsr_ts.txt -roi_reduce mean -corr_method pearson -fisherz -censor /tmp/short_censor.1D -write_header 1")))
# source(system('which ROI_TempCorr.R',intern=T), .GlobalEnv)
}
argpos <- grep("--args", args, fixed=TRUE)
if (length(argpos) > 0L) {
args <- args[(argpos+1):length(args)]
} else {
args <- c()
}
#contains runAFNICommand
source(file.path(scriptpath, "R_helper_functions.R"))
if (is.null(args) || length(args) == 0L) {
message("ROI_TempCorr expects at least -ts <4D file> -rois <3D file> -out_file <filename for output>.\n")
printHelp()
quitfn(save="no", 1, FALSE)
}
# help doesn't need a failed exit status
# but we shouldn't do anything else but show help
if(any(c("-h","-help","--help") %in% args)){
printHelp()
quitfn(save="no", 0, FALSE)
}
#defaults
njobs <- 4
out_file <- "corr_rois.txt"
ts_out_file <- ""
no_badmsg <- FALSE
keep_constant_vals <- FALSE
fname_censor1D <- NULL
fname_brainmask <- NULL
cm <- "pearson"
cm_partial <- FALSE #TRUE for partial correlation methods
pm <- c()
pm_partial <- c()
pm_semi <- c()
roi_reduce <- "mean" #see Varoquax and Craddock 2013 for why mean is a better choice than first eigenvariate
roi_diagnostics_fname <- NULL
fisherz <- FALSE
drop_vols <- 0
pcorr_cvsplit <- 10 #default 10-fold cross-validation for parcor functions
ts_only <- FALSE # only compute timeseries, not correlation matrix
write_header <- FALSE #whether to include mask value as a header row in outputs
na_string <- "NA"
clustersocketport <- 11290 #default port for parallel cluster
fit_p <- NULL #autoregressive order for ARIMA
fit_d <- NULL #differencing order for ARIMA
fit_q <- NULL #moving average order from ARIMA
delta_t <- NULL #time in seconds, used for bandpass filtering
freq_low <- NULL #filter low frequency cutoff in Hz
freq_high <- NULL #filter high frequency cutoff in Hz
nuisance_regressors <- NULL #filename of nuisance regressors to be removed from ROI time series
detrend_order <- 2 #deterministic detrending order (0, 1, or 2) to be projected from ROI time series
roi_arima_fits <- NULL #contains the fits for ARIMA models to each time series, if relevant
white_lags <- 6 #hard code for now: the number of lags to test with Breusch-Godfrey whiteness test in ARIMA
MASKVALS_ARG <- NULL # from -roi_vals. as.numeric of csv input list
fname_rsproc_add <- c() # combining timeseries
min_vox_varts <- 5 # 20240214. minimum number of non-constant voxels within ROI in oder to not discard whole ROI
#for testing
#fname_rsproc <- "/gpfs/group/mnh5174/default/MMClock/MR_Proc/11336_20141204/mni_nosmooth_aroma_hp/rest1/Abrnawuktm_rest1.nii.gz" #name of preprocessed fMRI data
#fname_rsproc <- "/gpfs/group/mnh5174/default/MMClock/MR_Proc/11336_20141204/mni_5mm_3ddespike/rest1/brnswudktm_rest1_5.nii.gz" #name of preprocessed fMRI data
#fname_rsproc <- "nawuktm_rest1.nii.gz" #name of preprocessed fMRI data
#fname_roimask <- "/gpfs/group/mnh5174/default/lab_resources/parcellation/final_combined/7_Networks/Schaefer421_full_final_revised2018_groupmask95.nii.gz"
#fname_roimask <- "Schaefer_422_final_jul2018.nii.gz"
#fname_brainmask <- "/gpfs/group/mnh5174/default/lab_resources/standard/mni_icbm152_nlin_asym_09c/mni_icbm152_t1_tal_nlin_asym_09c_mask_2.3mm.nii"
#fname_brainmask <- "MNI152_T1_2.3mm_brain_mask.nii"
#nuisance_regressors <- "/gpfs/group/mnh5174/default/MMClock/MR_Proc/11336_20141204/mni_5mm_3ddespike/rest1/nuisance_regressors.txt"
#nuisance_regressors <- "unfiltered_nuisance_regressors.txt" #name of preprocessed fMRI data
#nuisance_regressors <- NULL
#fname_censor1D <- "/gpfs/group/mnh5174/default/MMClock/MR_Proc/10637_20140304/mni_5mm_wavelet/clock1/motion_info/censor_union.1D"
#fit_p <- 3
#fit_q <- 2
#fit_d <- 0
#delta_t = 1
#freq_low = .009
#freq_high = 1
#detrend_order = 2
#roi_reduce = "mean"
argpos <- 1
while (argpos <= length(args)) {
if (args[argpos] == "-ts") {
fname_rsproc <- args[argpos + 1] #name of preprocessed fMRI data
stopifnot(file.exists(fname_rsproc))
argpos <- argpos + 2
} else if (args[argpos] == "-ts+") {
fname_rsproc_new<- args[argpos + 1]
stopifnot(file.exists(fname_rsproc_new))
fname_rsproc_add <- c(fname_rsproc_add, fname_rsproc_new)
argpos <- argpos + 2
} else if (args[argpos] == "-rois") {
fname_roimask <- args[argpos + 1] #name of integer-valued ROI mask file
argpos <- argpos + 2
stopifnot(file.exists(fname_roimask))
} else if (args[argpos] == "-out_file") {
out_file <- args[argpos + 1] #name of file to be written
argpos <- argpos + 2
} else if (args[argpos] == "-ts_out_file") {
ts_out_file <- args[argpos + 1] #name of file to be written
argpos <- argpos + 2
} else if (args[argpos] == "-censor") {
fname_censor1D <- args[argpos + 1] #name of censor file
argpos <- argpos + 2
stopifnot(file.exists(fname_censor1D))
} else if (args[argpos] == "-roi_vals") {
MASKVALS_ARG <- as.numeric(strsplit(args[argpos+1],split=",")[[1]])
argpos <- argpos + 2
stopifnot(!any(is.na(MASKVALS_ARG)))
stopifnot(length(MASKVALS_ARG)>0)
} else if (args[argpos] == "-brainmask") {
fname_brainmask <- args[argpos + 1] #mask file for brain voxels
argpos <- argpos + 2
stopifnot(file.exists(fname_brainmask))
} else if (args[argpos] == "-njobs") {
njobs <- as.integer(args[argpos + 1])
argpos <- argpos + 2
if (is.na(njobs)) { stop("-njobs must be an integer") }
} else if (args[argpos] == "-roi_diagnostics") {
roi_diagnostics_fname <- args[argpos + 1]
argpos <- argpos + 2
} else if (args[argpos] == "-roi_reduce") {
roi_reduce <- args[argpos + 1]
argpos <- argpos + 2
stopifnot(roi_reduce %in% c("pca", "mean", "median", "huber"))
} else if (args[argpos] == "-corr_method") {
cm <- strsplit(args[argpos + 1], ",")[[1]]
cm_partial <- rep(FALSE, length(cm))
argpos <- argpos + 2
stopifnot(all(cm %in% c("pearson", "spearman", "robust", "kendall", "mcd", "weighted", "donostah", "M", "pairwiseQC", "pairwiseGK", "cor.shrink")))
} else if (args[argpos] == "-pcorr_method") {
pm <- strsplit(args[argpos + 1], ",")[[1]]
pm_partial <- rep(TRUE, length(pm))
pm_semi <- grepl("^semi:", pm)
print(pm_semi)
pm <- gsub("^semi:", "", pm)
argpos <- argpos + 2
stopifnot(all(pm %in% c("pearson", "spearman", "kendall", "adalasso.net", "lasso.net", "pls.net", "ridge.net", "pcor.shrink")))
} else if (args[argpos] == "-pcorr_cvsplit") {
pcorr_cvsplit <- as.numeric(args[argpos + 1])
if (is.na(pcorr_cvsplit)) { stop("Could not understand argument ", args[argpos+1], "to -pcorr_cvsplit") }
argpos <- argpos + 2
} else if (args[argpos] == "-fisherz") {
fisherz <- TRUE
argpos <- argpos + 1
} else if (args[argpos] == "-ts_only") {
ts_only <- TRUE
argpos <- argpos + 1
} else if (args[argpos] == "-na_string") {
na_string <- args[argpos + 1]
argpos <- argpos + 2
} else if (args[argpos] == "-no_badmsg") {
no_badmsg <- TRUE
argpos <- argpos + 1
} else if (args[argpos] == "-keep_constant_values") { # 20240208 for Amar
keep_constant_vals <- TRUE
argpos <- argpos + 1
} else if (args[argpos] == "-dropvols") {
drop_vols <- as.numeric(args[argpos + 1]) #number of vols to drop
if (is.na(drop_vols)) { stop("Could not understand argument ", args[argpos+1], "to -dropvols") }
argpos <- argpos + 2
} else if (args[argpos] == "-port") {
clustersocketport <- as.integer(args[argpos + 1])
argpos <- argpos + 2
if (is.na(njobs)) { stop("-port must be an integer") }
} else if (args[argpos] == "-prewhiten_arima") {
fit_p <- as.integer(args[argpos + 1])
fit_d <- as.integer(args[argpos + 2])
fit_q <- as.integer(args[argpos + 3])
if (any(is.na(c(fit_p, fit_d, fit_q)))) { stop ("You must pass three integer arguments (p, d, q) to -prewhiten_arima") }
argpos <- argpos + 4
} else if (args[argpos] == "-nuisance_regressors") {
nuisance_regressors <- args[argpos + 1]
stopifnot(file.exists(nuisance_regressors))
argpos <- argpos + 2
} else if (args[argpos] == "-bandpass_filter") {
delta_t <- as.numeric(args[argpos + 1]) #repetition time (sampling rate) in seconds
f_low <- as.numeric(args[argpos + 2]) #low frequency cutoff in Hz
f_high <- as.integer(args[argpos + 3]) #high frequency cutoff in Hz
if (any(is.na(c(delta_t, f_low, f_high)))) { stop ("You must pass three numeric arguments (dt, freq_low, freq_high) to -bandpass_filter") }
argpos <- argpos + 4
} else if (args[argpos] == "-detrend_ts") {
detrend_order <- as.integer(args[argpos + 1])
if (is.na(detrend_order) || ! detrend_order %in% c(1,2)) { stop ("-detrend_ts order must be an integer (1 or 2)") }
argpos <- argpos + 2
} else if (args[argpos] == "-write_header") {
write_header <- as.integer(args[argpos + 1])
argpos <- argpos + 2
if (is.na(write_header) || (!write_header %in% c(0,1))) { stop("-write_header must be 1 or 0")
} else { write_header <- as.logical(write_header) }
} else if (args[argpos] == "-min_vox") {
min_vox_varts <- as.integer(args[argpos + 1]) # 2024-09-16: finish change on 20240214 for AO.
argpos <- argpos+2
stopifnot(min_vox_varts>0)
} else {
stop("Not sure what to do with argument: ", args[argpos])
}
}
corr_method <- c(cm, pm) #put together full and partial methods
semi <- c(cm_partial, pm_semi) # cm_partial is all false
partial <- c(cm_partial, pm_partial)
#robust package uses "auto" to choose best robust estimator given problem complexity (matrix size)
corr_method <- ifelse(corr_method == "robust", "auto", corr_method)
# check for input sanity before spending time loading things in
if (ts_only && nchar(ts_out_file) == 0L) {
stop('-ts_only is set without -ts_out_file! No point in running.')
}
# handle package dependencies
for (pkg in c("methods", "foreach", "doParallel", "oro.nifti", "MASS", "corpcor", "tictoc", "pracma", "forecast", "lmtest")) {
if (!suppressMessages(require(pkg, character.only=TRUE))) {
message("Installing missing package dependency: ", pkg)
install.packages(pkg)
suppressMessages(require(pkg, character.only=TRUE))
}
}
# 20210128WF - parcor is not in CRAN for R4.0 b/c dependency 'ppls' has been orphaned
# both by Kraemer and origiginally packaged 2014-09-04
# X-CRAN-Comment: Orphaned and corrected on 2018-07-20 as check problems
# were not corrected despite reminders.
# https://cran-archive.r-project.org/web/checks/2018/2018-07-20_check_results_ppls.html
if (!suppressMessages(require(parcor))) {
if (! "remotes" %in% installed.packages()[,1]) install.packages('remotes')
remotes::install_github("cran/ppls") # Penalized Partial Least Squares
remotes::install_github("cran/parcor") # Regularized estimation of partial correlation matrices
}
if (!is.null(fname_censor1D)) {
stopifnot(file.exists(fname_censor1D))
censor1D <- read.table(fname_censor1D, header=FALSE)$V1
censor_vols <- which(censor1D == 0.0)
} else {
censor_vols <- c()
}
if (!is.null(nuisance_regressors)) {
message("Reading white space-separated nuisance regressors from: ", nuisance_regressors)
nuisance_df <- as.matrix(read.table(nuisance_regressors, header=FALSE))
message("Nuisance file has: ", nrow(nuisance_df), " timepoints (rows) and ", ncol(nuisance_df), " regressors (columns)")
}
#generate (robust or partial) correlation matrix given a set of time series.
genCorrMat <- function(roits, method="auto", fisherz=FALSE, partial=FALSE, roi_arima_fits=NULL, semi=FALSE) {
#roits should be an time x roi data.frame
suppressMessages(require(robust))
#assume that parallel has been setup upstream
njobs <- getDoParWorkers()
#sapply only works for data.frame
if (!inherits(roits, "data.frame")) stop("genCorrMat only works properly with data.frame objects.")
#remove missing ROI columns for estimating correlation
nacols <- which(sapply(roits, function(col) all(is.na(col))))
if (length(nacols) > 0) nona <- roits[,nacols*-1]
else nona <- roits
#All partial correlation methods are necessarily full matrix methods (since partial correlation is conditioned on remaining variables)
#Standard correlations are also computed on full matrices. Only the robust estimators of correlation blow up on singular matrices
if (!is.null(roi_arima_fits)) {
pairwise <- TRUE
} else if (method %in% c("pearson", "spearman", "kendall", "cor.shrink", "pcor.shrink", "adalasso.net", "lasso.net", "pls.net", "ridge.net")) {
pairwise <- FALSE
} else {
#estimators using covRob from robust package tend to expect positive definite matrix
pairwise <- TRUE
}
if (!pairwise) {
if (method=="cor.shrink") {
message("Estimating shrinkage estimates (always positive definite) of correlation matrix using cor.shrink")
rcorMat <- corpcor::cor.shrink(as.matrix(nona)) #omit lambda to estimate shrinkage using analytic formula
} else if (method=="pcor.shrink") {
message("Estimating shrinkage estimates (always positive definite) of partial correlation matrix using pcor.shrink")
rcorMat <- corpcor::pcor.shrink(as.matrix(nona)) #omit lambda to estimate shrinkage using analytic formula
} else if (method=="adalasso.net") {
rcorMat <- parcor::adalasso.net(as.matrix(nona), k=pcorr_cvsplit, both=TRUE)$pcor.adalasso
} else if (method=="lasso.net") {
rcorMat <- parcor::adalasso.net(as.matrix(nona), k=pcorr_cvsplit, both=FALSE)$pcor.lasso
} else if (method=="pls.net") {
rcorMat <- parcor::pls.net(as.matrix(nona), k=pcorr_cvsplit)$pcor
} else if (method=="ridge.net") {
rcorMat <- parcor::ridge.net(as.matrix(nona), k=pcorr_cvsplit)$pcor
} else if (partial) {
stopifnot(require(ppcor))
if (semi) {
message("ppcor::spcor func")
rcorMat <- ppcor::spcor(as.matrix(nona), method=method)
}else{
message("ppcor pcor func")
rcorMat <- ppcor::pcor(as.matrix(nona), method=method)
}
} else {
#conventional cor using specified method
rcorMat <- cor(as.matrix(nona), method=method)
}
#adalasso.net(X, k = 10,use.Gram=FALSE,both=TRUE,verbose=FALSE,intercept=TRUE)
#ridge.net(X, lambda, plot.it = FALSE, scale = TRUE, k = 10,verbose=FALSE)
#pls.net(X, scale = TRUE, k = 10, ncomp = 15,verbose=FALSE)
#come back to this: diagnostics on partial correlation?
#if (partial) {
# require(parcor)
# #at the moment, GeneNet does not expose ggm.test.edges, so set fdr=FALSE to avoid crash
# perf <- parcor::performance.pcor(rcorMat, true.pcor=NULL, fdr=FALSE, cutoff.ggm=0.8, verbose=FALSE, plot.it=FALSE)
# print(perf)
#}
} else {
#Due to rank degeneracy of many RS-fcMRI roi x time matrices, correlations are sometimes filled in pairwise.
#This is slow, of course, but necessary.
rcorMat <- matrix(NA, nrow=ncol(nona), ncol=ncol(nona))
diag(rcorMat) <- 1
#indices of lower triangle
lo.tri <- which(lower.tri(rcorMat), arr.ind=TRUE)
# how many chucks per core
# for small datasets, need to make sure we didn't pick too high a number
chunksPerProcessor <- 8
# if we only have one job, we want all the chunks together
# that is we want chunksize == nrow(lo.tri)
if(njobs == 1) { chunksPerProcessor <- 1 }
repeat {
chunksize <- floor(nrow(lo.tri)/njobs/chunksPerProcessor)
if(chunksize >= 1) break
# decrease chunk size and check we can still go lower
chunksPerProcessor <- chunksPerProcessor - 1
cat('WARNING: job is small, decreasing chunks to',chunksPerProcessor,'\n')
if (chunksPerProcessor < 1) { stop('too many jobs for too little work, lower -n') }
}
# let us know when we are asking for a lot of work
# the threshold is less for non-full cor types
chunksizewarn <- 2000
if (chunksize > chunksizewarn) message(sprintf('Lots of datapoints (%d) going to each processor. Consider increasing -njobs if this is slow',chunksize))
corrpair <- function(ts1, ts2, method="pearson") {
if (method %in% c("robust", "mcd", "weighted", "donostah", "M", "pairwiseQC", "pairwiseGK")) {
robust::covRob(na.omit(cbind(ts1, ts2)), estim=method, corr=TRUE)$cov[1,2]
} else {
cor(na.omit(cbind(ts1, ts2)), method=method)[1,2]
}
}
#do manual chunking: divide correlations across processors, where each processor handles 10 chunks in total (~350 corrs per chunk)
if (method %in% c("robust", "mcd", "weighted", "donostah", "M", "pairwiseQC", "pairwiseGK")) {
tictoc::tic("Estimating pairwise robust correlation estimates")
corrvec <- foreach(pair=iter(lo.tri, by="row", chunksize=chunksize), .inorder=TRUE, .combine=c, .multicombine=TRUE, .packages="robust") %dopar% {
#iter will pass entire chunk of lo.tri, use apply to compute row-wise corrs
apply(pair, 1, function(x) { corrpair(nona[,x[1]], nona[,x[2]]) })
}
tictoc::toc()
} else if (!is.null(roi_arima_fits)) {
#prewhiten ROI A based on ROI B
tictoc::tic("Estimating pairwise correlation estimates after prewhitening using ARIMA")
corrvec <- foreach(pair=iter(lo.tri, by="row", chunksize=chunksize), .inorder=TRUE, .combine=cbind, .multicombine=TRUE) %dopar% {
#iter will pass entire chunk of lo.tri, use apply to compute row-wise corrs
apply(pair, 1, function(x) {
cvec <- rep(0, 2) #filter with target as x, y, or just regular old correlation
for (filter_direction in 1:2) {
response <- ifelse(filter_direction==1, 1, 2)
input <- ifelse(filter_direction==1, 2, 1)
filter_model <- roi_arima_fits[[ x[response] ]]
ts_response <- residuals(filter_model)
mcoefs <- coef(filter_model)
armanames <- grep("(ar\\d+|ma\\d+)", names(mcoefs), value=TRUE)
newcoefs <- setNames(rep(NA_real_, length(mcoefs)), names(mcoefs))
newcoefs[armanames] <- mcoefs[armanames]
#fix the ARMA coefficients, leave the rest of the model freely estimated.
#note that include.drift the first time around will add a column called 'drift' to $xreg.
xreg <- filter_model$xreg
if ("drift" %in% colnames(xreg)) {
xreg <- xreg[,grep("^drift$", colnames(xreg), value=TRUE, invert=TRUE)]
if (ncol(xreg) == 0) { xreg = NULL } #if drift was the only column (no exogenous covariates)
}
input_filtered <- tryCatch(forecast::Arima(nona[, x[input]], order=forecast::arimaorder(filter_model), fixed=newcoefs, xreg=xreg, include.mean=TRUE, include.drift=TRUE, method="CSS-ML"),
error=function(e) { print(e); forecast::Arima(nona[, x[input]], order=forecast::arimaorder(filter_model), fixed=newcoefs, xreg=xreg, include.mean=TRUE, include.drift=TRUE, method="ML") })
ts_input <- residuals(input_filtered)
cvec[filter_direction] <- corrpair(residuals(input_filtered), ts_response, method=method)
#cvec[filter_direction] <- cor(residuals(input_filtered), ts_response)
}
return(cvec)
})
}
tictoc::toc()
#for now, average the estimates of correlations from the two filter directions
message("For ARIMA filtering, we prewhiten in each direction (i.e., alternating input and response series), then average the lag-0 cross-correlation")
message("Similarity of cross-correlations alternating the input and response ARIMA filters: r = ", round(cor(corrvec[1,], corrvec[2,]),3))
corrvec <- apply(corrvec, 2, mean)
}
stopifnot(length(corrvec)==nrow(lo.tri)) #be afraid if we haven't estimated every cell in the correlation matrix
rcorMat[lo.tri] <- corrvec
#duplicate the lower triangle to upper
rcorMat[upper.tri(rcorMat)] <- t(rcorMat)[upper.tri(rcorMat)] #transpose flips filled lower triangle to upper
}
#populate lower triangle of correlation matrix
if (fisherz == TRUE) {
message("Applying the Fisher z transformation to correlation coefficients.")
rcorMat <- atanh(rcorMat)
diag(rcorMat) <- 15 #avoid Inf in output by a large value. tanh(15) is ~1
}
#add back in NA cols
if (length(nacols) > 0) {
processedCorrs <- matrix(NA, nrow=ncol(roits), ncol=ncol(roits))
#fill in all non-NA cells row-wise
processedCorrs[!1:ncol(roits) %in% nacols, !1:ncol(roits) %in% nacols] <- rcorMat
} else { processedCorrs <- rcorMat }
processedCorrs #return processed correlations
}
### BEGIN DATA PROCESSING
message("Reading roi mask: ", fname_roimask)
if (grepl("^.*\\.(HEAD|BRIK|BRIK.gz)$", fname_roimask, perl=TRUE)) {
roimask <- readAFNI(fname_roimask, vol=1)
#afni masks tend to read in as 4D matrix with singleton 4th dimension. Fix this
if (length(dim(roimask)) == 4L) {
roimask@.Data <- roimask[,,,,drop=T]
}
} else {
roimask <- readNIfTI(fname_roimask, reorient=FALSE)
}
#optional: apply brain mask
if (!is.null(fname_brainmask)) {
message("Applying brain mask to ROIs: ", fname_brainmask)
stopifnot(file.exists(fname_brainmask))
if (grepl("^.*\\.(HEAD|BRIK|BRIK.gz)$", fname_brainmask, perl=TRUE)) {
brainmask <- readAFNI(fname_brainmask)
} else {
brainmask <- readNIfTI(fname_brainmask, reorient=FALSE)
}
#brain mask and roi mask must be of same dimension
stopifnot(identical(dim(brainmask)[1:3], dim(roimask)[1:3]))
roimask_prebrainmask <- roimask #for tracking filtering
message(" ROI voxels before applying brainmask: ", sum(roimask > 0, na.rm=TRUE))
#remove non-brain voxels
roimask[which(brainmask == 0.0)] <- NA_real_
message(" ROI voxels after applying brainmask: ", sum(roimask > 0, na.rm=TRUE))
}
message("Reading in 4D file: ", fname_rsproc)
#read in processed resting-state data
if (grepl("^.*\\.(HEAD|BRIK|BRIK.gz)$", fname_rsproc, perl=TRUE)) {
rsproc <- readAFNI(fname_rsproc)
} else {
rsproc <- readNIfTI(fname_rsproc, reorient=FALSE)
}
if(length(fname_rsproc_add)>0L){
# TODO: error if brik/head?
# TODO: check dims for better error message
rsproc_plus <- lapply(fname_rsproc_add,readNIfTI, reorient=F)
rsproc <- do.call(abind::abind, c(list(rsproc),rsproc_plus))
}
if (!identical(dim(rsproc)[1:3], dim(roimask)[1:3])) {
if(length(fname_rsproc_add)>0L) stop("time series and mask dims do not match! ROI_TempCorr wont resample -ts+; quitting instead")
message("Resampling rs proc file from: ", paste(dim(rsproc)[1:3], collapse="x"), " to: ", paste(dim(roimask)[1:3], collapse="x"), " using nearest neighbor")
message("This assumes that the files are in the same space and have the same grid size. Make sure this is what you want!!")
runAFNICommand(paste0("3dresample -overwrite -inset ", fname_rsproc, " -rmode NN -master ", fname_roimask, " -prefix tmpResamp.nii.gz"))
stopifnot(file.exists("tmpResamp.nii.gz"))
rsproc <- readNIfTI("tmpResamp.nii.gz", reorient=FALSE)
unlink("tmpResamp.nii.gz")
}
#obtain vector of mask values
maskvals <- sort(unique(as.vector(roimask)))
maskvals <- maskvals[which(maskvals != 0)] #omit zero
if (length(maskvals) > 1050) {
warning(sprintf(">1050 putative ROIs identified in mask file '%s'. Is -roi file a mask or a timeseries!? n=%d; vals: %.02f-%.02f",
fname_roimask, length(maskvals), min(maskvals), max(maskvals)))
}
# 20220315 - possible to give comma seperated roi values to subset mask. useful for subseting freesurfer
if(!is.null(MASKVALS_ARG)){
missing_roi_vals <- ! MASKVALS_ARG %in% maskvals
if(any(missing_roi_vals)) warning("Not all -roi_vals are in roimask:", paste(collapse=",", MASKVALS_ARG[missing_roi_vals]))
maskvals <- MASKVALS_ARG
}
if(length(maskvals) < 1) stop("no roi values! cannot run. use -rois or -roi_vals")
if (njobs > 1) {
clusterobj <- makePSOCKcluster(njobs, master="localhost", port=clustersocketport)
registerDoParallel(clusterobj)
} else {
registerDoSEQ()
}
#to reduce RAM overhead of having to copy rsproc_censor to each worker, obtain list of vox x time mats for rois
#even though this seems more elegant, it is much slower (400x!) than the use of 4d lookup and reshape below
#system.time(roimats <- lapply(maskvals, function(v) {
# apply(rsproc, 4, '[', which(roimask==v, arr.ind=TRUE))
# }))
#generate a 4d mat of indices
message("# num rois:", length(maskvals), "; 4D timeseries size:", paste(collapse=",", dim(rsproc)))
rsproc_ndim <- length(dim(rsproc))
if(rsproc_ndim !=4) stop("timeseries is ",rsproc_ndim,"D. should be 4D")
roimats <- lapply(maskvals, function(v) {
mi <- which(roimask==v, arr.ind=TRUE)
nvox <- nrow(mi)
#print(paste0("## voxels=",v,"=",nvox))
nvol <- dim(rsproc)[4]
mi4d <- cbind(pracma::repmat(mi, nvol, 1), rep(1:nvol, each=nvox))
mat <- matrix(rsproc[mi4d], nrow=nvox, ncol=nvol) #need to manually reshape into matrix from vector
attr(mat, "maskval") <- v #add mask value as attribute so that information about bad ROIs can be printed below
t(mat) #transpose matrix so that it is time x voxels
})
#add numeric index of mask (1:max) for keeping track of number of good voxels inside roimats loop
#this is necessary if there is discontinuity in the mask values (e.g., jumps between 10 and 12)
roimats <- lapply(1:length(roimats), function(i) {
attr(roimats[[i]], "maskindex") <- i
return(roimats[[i]])
})
rm(rsproc) #clear imaging file from memory now that we have obtained the roi time series
if (!is.null(roi_diagnostics_fname)) {
nvox_observed_per_roi <- sapply(roimats, ncol)
nvox_good_per_roi <- rep(NA, length(nvox_observed_per_roi))
}
# check roi size. need at least 4 good voxels in badvox check
roi_sizes <- sapply(roimats,dim)[2,]
roi_too_small <- roi_sizes < min_vox_varts
if(any(roi_too_small) )
message("WARNING: have ROIs that are too small (<",min_vox_varts,"vox); will always be removed from correlation (NA) b/c TS has < 5 good voxels",
paste(collapse=" ", sprintf("#%d (%d voxs)",
sapply(roimats,attr,'maskval')[roi_too_small],
roi_sizes[roi_too_small])))
message("Obtaining a single time series within each ROI using: ", roi_reduce)
roiavgmat <- foreach(roivox=iter(roimats), .packages=c("MASS"), .combine=cbind, .noexport=c("rsproc")) %do% { #minimal time savings from dopar here, and it prevents message output
##roivox is a time x voxels matrix for a single ROI
##data cleaning steps: remove voxels that are 1) partially or completely missing; 2) all 0; 3) variance = 0 (constant)
##leave out variance > mean check because bandpass-filtered data are demeaned
badvox <- apply(roivox, 2, function(voxts) {
if (any(is.na(voxts))) TRUE #any missing values
else if (all(voxts == 0.0)) TRUE #all zeros
else if (var(voxts) == 0.0) TRUE #constant time series
##else if (var(voxts) > mean(voxts)) TRUE #variance exceeds mean (very unstable heuristic)
else FALSE #good voxel
})
if (!is.null(roi_diagnostics_fname)) { nvox_good_per_roi[attr(roivox, "maskindex")] <- sum(!badvox) }
if (sum(!badvox) < min_vox_varts) {
##only reduce if there are at least 5 (defaul of min_vox_varts) voxels to average over after reduction above
##otherwise return NA time series
##cat(" ROI ", attr(roivox, "maskval"), ": fewer than 5 voxels had acceptable time series. Removing this ROI from correlations.\n", file=".roilog", append=TRUE)
if(!no_badmsg)
message(" ROI ", attr(roivox, "maskval"), ": fewer than ", min_vox_varts, " voxels of ", dim(roivox)[2],
" had acceptable time series. Removing this ROI from correlations.")
ts <- rep(NA_real_, nrow(roivox))
#NB TODO BUG!! colnames using maskvals will fail (w/-write_header). will get "V1" instead of "roi1"
} else {
if (sum(badvox) > 0) {
msg <- c(" ROI ", attr(roivox, "maskval"), ": ", sum(badvox),
" voxels had bad time series (e.g., constant) ")
# 20240208 - AO needs ncol of matrix output to be same
# but the problem was actually in the input mask!
if(keep_constant_vals) {
roivox[,badvox] <- NA_real_
msg <- c(msg, "and were NA'ed")
} else {
roivox <- roivox[,!badvox] #remove bad voxels (columns)
# 20240917: if only have 1 good voxel (w/ '-min_vox 1'), roivox becomes vector instead of matrix
# to avoid error "dim(X) must have a positive length", make matrix again
if(sum(!badvox) == 1) roivox <- as.matrix(roivox,ncol=1)
msg <- c(msg, "and were removed prior to ROI averaging. (n=", paste(collapse="x",dim(roivox)),")")
}
if(!no_badmsg) message(msg)
}
if (roi_reduce == "pca") {
ts <- prcomp(roivox, scale.=TRUE)$x[,1] #first eigenvector
tsmean <- apply(roivox, 1, mean, na.rm=TRUE)
#flip sign of component to match observed data (positive correlation)
if (cor(ts, tsmean) < 0) { ts <- -1*ts }
} else if (roi_reduce == "mean") {
ts <- apply(roivox, 1, mean, na.rm=TRUE) #mean time series across voxels
} else if (roi_reduce == "median") {
ts <- apply(roivox, 1, median, na.rm=TRUE)
} else if (roi_reduce == "huber") {
ts <- apply(roivox, 1, getRobLocation)
}
}
return(ts)
}
## check censor makes sense
if (!is.null(fname_censor1D)) {
ncen <- length(censor1D)
nts <- nrow(roiavgmat)
if(ncen != nts) stop(sprintf("censor length %d != roi ts length %d!", ncen, nts))
}
##drop initial volumes if requested
##this should be implemented before censoring so that ARIMA models and bandpass filtering are applied on the truncated data
##ARIMA estimation may be thrown off if we hand it nonstationary data, such as may exist prior to steady state magnetization
if (drop_vols > 0) {
message("Dropping ", drop_vols, " volumes from ROI time series prior to correlation.")
roiavgmat <- roiavgmat[-1*(1:drop_vols),]
if (!is.null(nuisance_regressors)) { nuisance_df <- nuisance_df[-1*(1:drop_vols),] }
if (length(censor_vols) > 0L) {
censor_vols <- censor_vols - drop_vols #shift censor vector based on the number dropped
censor_vols <- censor_vols[censor_vols > 0] #omit any censored volumes that may have fallen in the truncated period
}
}
#We need to detrend before bandpass filtering, if requested
#note that we could incorporate these as additional columns of the nuisance regressors if there was no temporal filter
#then apply those regressors at the stage of ARIMA or OLS removal. But then we start
if (!is.null(detrend_order)) {
message("Applying detrending of order: ", detrend_order, " to ROI aggregated time series.")
if(is.null(dim(roiavgmat))){
warning("only one surviving roi!? forcing roi ts vector back to matrix")
roiavgmat <- as.matrix(roiavgmat,ncol=1)
}
roiavgmat <- apply(roiavgmat, 2, function(ts) { detrendts(ts, order=detrend_order) })
if (!is.null(nuisance_regressors)) {
message("Applying detrending of order: ", detrend_order, " to nuisance regressors.")
nuisance_df <- apply(nuisance_df, 2, function(ts) { detrendts(ts, order=detrend_order) })
}
}
##NB. I have developed significant trepidation about temporal filtering when computing correlations among regions.
##For reasoning, see Bright & Murphy 2017 NeuroImage; Arbabsharani et al., 2014 NeuroImage; Davey et al., 2013, NeuroImage
##Minimally, temporal filtering reduces degrees of freedom and induces autocorrelation (see Fig2a,b in Bright)
##But this becomes even more difficult if we attempt to model the autoregressive structure of the data through prewhitening prior to correlation
##In this case, the ARIMA model itself acts as a temporal filter on the data.
##Thus, if requested, we filter first, then apply ARIMA, but with a scary warning!
if (!is.null(delta_t)) {
message("Bandpass filtering ROI time series prior to computing correlations using FIR1 filter.")
message("Sampling frequency: dt = ", delta_t, "s, freq_low = ", freq_low, "Hz, freq_high = ", freq_high, "Hz")
message("Be cautious about correlations among time series after temporal filtering. See Davey et al., 2013 and Bright and Murphy 2017")
if (!is.null(fit_p)) {
message("Be *really* cautious about combining temporal filtering and AR(I)MA modeling!")
message("We will filter first, then fit ARIMA models after, but AR(I)MA is itself a filter.")
message("See Arbabshirani et al., 2014 and Bright and Murphy 2017 for warnings")
}
filter_order <- min(length(ts), 300) #don't have more than n filter frequencies (doesn't do anything bad, but it has to be overkill)
tictoc::tic("Filtering ROI time series")
#roiavgmat <- apply(roiavgmat, 2, function(ts) {
roiavgmat <- foreach(ts=iter(roiavgmat, by="column"), .inorder=TRUE, .combine=cbind, .noexport="roiavgmat") %dopar% {
fir1Bandpass(ts, TR=delta_t, low=freq_low, high=freq_high, n=filter_order, padx=100, detrend=NULL)
}
tictoc::toc()
if (!is.null(nuisance_regressors)) {
message("We will apply the same filter to nuisance regressors prior to their removal.")
nuisance_df <- apply(nuisance_df, 2, function(ts) {
fir1Bandpass(ts, TR=delta_t, low=freq_low, high=freq_high, n=filter_order, padx=100, detrend=NULL)
})
}
}
##fit ARIMA model to each time series to prewhiten, if requested
##z-score nuisance regressors to help with numerical optimization (we don't care about parameter estimates)
xreg <- if (is.null(nuisance_regressors)) { NULL } else { scale(nuisance_df) }
#TODO: implement regression with spike regressors instead of chopping time points below.
if (!is.null(fit_p)) {
message("Fitting ARIMA model to each ROI time series prior to computing correlations.")
message("Model order: p = ", fit_p, ", d = ", fit_d, ", q = ", fit_q)
message("Note that ARIMA model is fit prior to censoring or truncation given the importance of temporal continguity.")
message("Mean and linear drift terms are included in ARIMA model by default.")
#NB. The correct approach for estimating cross-correlation on prewhitened time series is to estimate an ARIMA model on one
#series (x), then apply that model to the other series (y), rather than fitting a new model on the second series (y).
#http://finzi.psych.upenn.edu/R/library/TSA/html/prewhiten.html
#https://stats.stackexchange.com/questions/191131/testing-significance-of-cross-correlated-series/
#https://stats.stackexchange.com/questions/194130/cross-correlation-of-two-autocorrelated-signals-removing-autocorrelation-with-a
#https://onlinecourses.science.psu.edu/stat510/node/75/
#http://support.sas.com/documentation/cdl/en/etsug/63348/HTML/default/viewer.htm#etsug_arima_sect033.htm
#but, in the fMRI literature, people often apply a unique ARIMA to each voxel/unit and keep the residuals. When examining
#the CCF, this means the input and response series will not have the same filter applied, which is conceptually problematic.
#Thus, return a set of models here for each ROI, and apply them appropriately during cross-correlation (essentially cell by cell)
tictoc::tic("Fitting ARIMA models to ROI time series")
#roi_arima_fits <- apply(roiavgmat, 2, function(ts) {
roi_arima_fits <- foreach(ts=iter(roiavgmat, by="column"), .inorder=TRUE, .noexport="roiavgmat") %dopar% {
tryCatch(forecast::Arima(as.vector(ts), order=c(fit_p, fit_d, fit_q), include.drift=TRUE, include.mean=TRUE, xreg=xreg, method="CSS-ML"), #fit model of specified order
error=function(e) { print(e); forecast::Arima(as.vector(ts), order=c(fit_p, fit_d, fit_q), include.drift=TRUE, include.mean=TRUE, xreg=xreg, method="ML") }) #fall back to full ML
}
tictoc::toc()
#There is a challenge of how to handle prewhitening in the presence of exogenous covariates (xreg) since simply passing
#the model argument to Arima will fit the other time series with the exact coefficients for both the ARMA part and the
#covariates. This seems problematic if the other time series has a substantially different trend or effect of the nuisance signals.
#In prewhitening the premise is that the ARMA coefficients act as a filter on both Y and X (see Shumway & Stoffer 2017, p. 267),
#not that the covariates are part of the filter.
#The most principled thing I can think of is that we wish to estimate the structure of the *noise* in one series, which means that the ARMA
#coefficients must be derived conditioned on the external covariates. This is a regression model with ARMA errors.
#When we wish to examine the cross-correlation, however, the ARMA coefficients are the *filter* that changes the temporal structure (and ensures)
#that cross-correlation is not due to the autoregressive properties of the input. BUT, we also wish to derive unique estimates of the
#response series for intercept, drift, and exogenous covariates. Thus, we should fix the ARMA coefficients to equality between models, but
#uniquely estimate the external regressor effects in each.
#See bottom of this page:
#http://support.sas.com/documentation/cdl/en/etsug/63939/HTML/default/viewer.htm#etsug_arima_sect012.htm
#bg tests the null of no serial correlation *up to* the max order. Thus, no real need to test sequential lags, just the max.
message("Checking whiteness of ARIMA residuals using Breusch-Godfrey test.")
message("This tests the null hypothesis of serial correlation up to lag order: ", white_lags, " [hard coded for now]")
whitechecks <- sapply(roi_arima_fits, function(mout) {
#deprecate Ljung-Box; use Breusch-Godfrey
#https://stats.stackexchange.com/questions/148004/testing-for-autocorrelation-ljung-box-versus-breusch-godfrey
#test <- Box.test(mout$residuals, lag=lag, type="Ljung-Box")
#test$p.value
test <- lmtest::bgtest(mout$residuals ~ 1, order=white_lags)
test$p.value
})
message("The percentage of non-white residuals is: ", pct_nonwhite <- round(100*sum(whitechecks < .05)/length(whitechecks), 3), "%. This should be no more than ~5% given a nominal Type I error rate.")
if (pct_nonwhite > 5) { warning("Whiteness checks failed in ARIMA modeling. Consider changing (increasing) your model order!!")}
} else if (!is.null(nuisance_regressors)) {
message("Removing nuisance regressors through OLS regression. See Bright & Murphy 2017 for why this is probably suboptimal!")
roiavgmat <- apply(roiavgmat, 2, function(ts) {
residuals(lm(ts ~ xreg)) #remove columns in nuisance
})
}
#keep intelligent names on time x roi matrix
colnames(roiavgmat) <- paste0("roi", maskvals)
rownames(roiavgmat) <- paste0("vol", 1:nrow(roiavgmat))
#handle roi diagnostics output, adding whiteness p-value checks if relevant
if (!is.null(roi_diagnostics_fname)) {
tt <- as.data.frame(table(roimask_prebrainmask))
names(tt) <- c("maskval", "nvox_total")
tt$maskval <- as.numeric(as.character(tt$maskval))
tt <- subset(tt, maskval != 0)
roi_diagnostics <- data.frame(dataset=fname_rsproc, maskval=maskvals, nvox_good=nvox_good_per_roi, nvox_observed=nvox_observed_per_roi)
roi_diagnostics <- merge(roi_diagnostics, tt, by="maskval")
roi_diagnostics$prop_masked <- with(roi_diagnostics, 1 - nvox_observed/nvox_total)
roi_diagnostics$prop_missing <- with(roi_diagnostics, 1 - nvox_good/nvox_observed)
if (!is.null(roi_arima_fits)) { roi_diagnostics$bg_arima_white_pval <- whitechecks }
write.csv(file=roi_diagnostics_fname, roi_diagnostics, row.names=FALSE)
}
##apply censoring to resulting time series
censorvec <- rep(0, nrow(roiavgmat))
goodVols <- 1:nrow(roiavgmat)
if (length(censor_vols) > 0L) {
message("Censoring volumes ", paste0(censor_vols, collapse=", "), " based on ", fname_censor1D)
goodVols <- goodVols[-censor_vols]
censorvec[censor_vols] <- 1
}
roiavgmat_censored <- roiavgmat[goodVols,]
##output ts file if requested
if (nchar(ts_out_file) > 0L) {
message("Writing time series to: ", ts_out_file)
if(is.null(fname_censor1D)) {
df <- roiavgmat
} else {
df <- cbind(censor=censorvec, roiavgmat)
}
write.table(df, file=ts_out_file, col.names=write_header, row.names=FALSE)
#output individually prewhitened time series from ARIMA, if available
if (!is.null(roi_arima_fits)) {
pw_ts_file <- paste0(tools::file_path_sans_ext(ts_out_file, compression = TRUE), "_prewhitened", file_ext(ts_out_file))
message("Writing prewhitened time series to: ", pw_ts_file)
roiavgmat_whitened <- sapply(roi_arima_fits, residuals)
colnames(roiavgmat_whitened) <- paste0("roi", maskvals)
rownames(roiavgmat_whitened) <- paste0("vol", 1:nrow(roiavgmat_whitened))
if(is.null(fname_censor1D)) {
df <- roiavgmat_whitened
} else {
df <- cbind(censor=censorvec, roiavgmat_whitened)
}
write.table(df, file=pw_ts_file, col.names=write_header, row.names=FALSE)
}
}
#If we only want the timeseries, quit before computing correlations
if (ts_only) {
if (njobs > 1) { try(stopCluster(clusterobj)) }
quitfn(save="no", 0, FALSE)
}
pp <- ifelse(partial==TRUE, "_partial", "") #insert 'partial' into message and file name if relevant
pw <- ifelse(is.null(roi_arima_fits), "", "_prewhitened") #insert 'partial' into message and file name if relevant
pp <- ifelse(semi==TRUE, "_semipartial", pp) #insert 'semipartial' when semi
for (m in 1:length(corr_method)) {
message("Computing", sub("_", " ", pp[m]), " correlations among ROI times series using method: ", ifelse(corr_method[m]=="auto", "robust", corr_method[m]))
if (partial[m] == TRUE && !is.null(roi_arima_fits)) {
message("At present, cannot combine partial correlation and ARIMA-based prewhitening since there is no single time series matrix. Skipping this combination.")
next
}
cormat <- genCorrMat(as.data.frame(roiavgmat_censored), method=corr_method[m], fisherz=fisherz, partial=partial[m], roi_arima_fits=roi_arima_fits, semi=semi[m])
# make names match roivalue instead of just V1...Vn
# TODO: problem if dropped roi?
if(write_header){
cormat <- as.data.frame(cormat)
names(cormat) <- paste0("roi", maskvals) #sapply(roimats,attr,'maskval')
}
this_out_file <- paste0(tools::file_path_sans_ext(out_file, compression = TRUE), "_", corr_method[m], pp[m], pw, file_ext(out_file)) #add method-specific suffix to file
message("Writing correlations to: ", this_out_file)
if (grepl(".*\\.gz$", this_out_file, perl=TRUE)) {
##write compressed
gzf <- gzfile(this_out_file, "w")
write.table(cormat, file=gzf, col.names=write_header, row.names=FALSE, na=na_string)
close(gzf)
} else {
write.table(cormat, file=this_out_file, col.names=write_header, row.names=FALSE, na=na_string)
}
}
if (njobs > 1) { try(stopCluster(clusterobj)) }
quitfn(save="no", 0, FALSE)