-
Notifications
You must be signed in to change notification settings - Fork 3
/
qtl2_scan_engine.R
90 lines (70 loc) · 2.76 KB
/
qtl2_scan_engine.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
################################################################################
# Utility functions for mapping Attie mass spec data.
# Daniel Gatti
# dan.gatti@jax.org
# June 17, 2017
################################################################################
options(stringsAsFactors = F)
library(qtl2) # Loads qtl2geno, qtl2scan & qtl2plot.
library(qtl2convert)
library(RSQLite)
library(dplyr)
# Command line arguments.
# 1: full path to input file name.
# 2: output dir and file prefix.
# 3: chunk_size: integer that is the chunk size.
# 4: chunk_number: integer that is the chunk number to run.
# 5: rankz: boolean indicating whether to rankZ transform the phenotypes.
args = commandArgs(trailingOnly = TRUE)
input.file = args[1]
output.prefix = args[2]
chunk_size = as.numeric(args[3])
chunk_number = as.numeric(args[4])
should.rankz = as.logical(args[5])
print(paste("INPUT:", input.file))
print(paste("OUTPUT:", output.prefix))
print(paste("CHUNK_SIZE:", chunk_size))
print(paste("CHUNK_NUMBER:", chunk_number))
print(paste("RANKZ:", should.rankz))
rankZ = function(x) {
x = rank(x, na.last = "keep", ties.method = "average") / (sum(!is.na(x)) + 1)
return(qnorm(x))
} # rankZ()
#####################
# Load in the data. #
#####################
load(input.file)
stopifnot(c("pheno", "pheno.rz", "pheno.descr", "genoprobs", "K", "map") %in% ls())
######################
# Set up covariates. #
######################
covar.names = strsplit(pheno.descr$covar_list, ":")
covar.names = unique(unlist(covar.names))
# There may be NA's in the covar.names.
covar.names = covar.names[!is.na(covar.names)]
stopifnot(covar.names %in% colnames(pheno))
f = as.formula(paste("~", paste(covar.names, collapse = "+")))
addcovar = model.matrix(f, data = pheno)[,-1]
#######################################################
# Split out phenotypes and convert to numeric matrix. #
#######################################################
covar = pheno[,!pheno.descr$is_pheno]
pheno = as.matrix(pheno[,pheno.descr$is_pheno])
#########################################
# Calculate the phenotype range to run. #
#########################################
max_col = ncol(pheno)
pheno.rng = ((chunk_number - 1) * chunk_size + 1):(chunk_number * chunk_size)
if(pheno.rng[length(pheno.rng)] > max_col) {
pheno.rng = pheno.rng[1]:max_col
} # if(pheno.rng[length(pheno.rng)] > max_col)
print(paste("Mapping columns:", pheno.rng[1], "to", pheno.rng[length(pheno.rng)]))
if(should.rankz) {
pheno[,pheno.rng] = apply(pheno[,pheno.rng,drop = F], 2, rankZ)
} # if(should.rankz)
#########
# Scan1 #
#########
qtl = scan1(genoprobs = genoprobs, pheno = pheno[,pheno.rng,drop = F], kinship = K,
addcovar = addcovar, cores = 1)
saveRDS(qtl, file = paste0(output.prefix, "chunk_", chunk_number, "_QTL.rds"))