The goal of GWAS2 is to perform Haplotype-based GWAS.
You can install the development version of GWAS2 from GitHub with:
# install.packages("devtools")
devtools::install_github("qj009/GWAS2")
This is a basic example which shows you how to solve a common problem:
library(GWAS2)
library(googledrive)
# genotype data
GEN_ID <- drive_get(as_id("1VPLHa9QWiiey4N5jaUOc516xXo22_fVi"))
drive_download(GEN_ID, overwrite = TRUE)
(load(file="GEN.rda"))
# phenotype data
Y_ID <- drive_get(as_id("1AF3XGQr-MwsR928NRLM5X6SwYx20NcUA"))
drive_download(Y_ID, overwrite = TRUE)
(load(file="Y.rda"))
Generate biallelic genotype matrix from original dataset:
GEN.GG <- GEN[,-(1:2)]
gg<-GG(GEN.GG)
Generate numerical format genotype matrix from biallelic data:
xx<-GEN.CODE(gg)
Calculate kinship matrix:
kin<-KIN(xx)
Generate SNP map file
CP<-matrix(as.numeric(GEN[,1:2]),nrow(GEN),2)
Generate genotype matrix used for GWAS
gen<-cbind(CP,gg)
Generate phenotype matrix used for GWAS:
YFIX <- as.matrix(Y[,2:3])
Define association model:
method<-"RANDOM"
Calculate initial parameters for association test
PAR<-TEST.SCAN(YFIX,NULL,KIN=kin,method,NULL)
snp_scan <- SEL.SNP(gen, YFIX, KIN=kin, method, PAR=PAR)
CN <- 16471 # Effective number of markers
P.threshold = 1/CN
RR.MULTI<-SEL.HAP(MAP.S=NULL, POS.S=NULL,GEN=gen,
YFIX=YFIX, KIN=kin, nHap=2,method=method,
p.threshold=P.threshold, RR0=NULL,TEST=c(1,2),PAR=PAR)
Initial haplotype test result:
WALD.HapInitial<-RR.MULTI[[2]][[1]]
LRT.HapInitial<-RR.MULTI[[2]][[2]]
Haplotype final result:
WALD.FINAL<-RR.MULTI[[1]][[1]]
LRT.FINAL<-RR.MULTI[[1]][[2]]
Here we take results based on Wald test as an example.
Generate map file of SNP and Haplotype-based GWAS result:
SNP
snp <- as.data.frame(snp_scan[[1]])
snp_map <- snp %>% dplyr::select(X1,X2,X5) %>% mutate(id = paste0("chr",X1,":",X2))
colnames(snp_map) <- c("chr", "pos","p","id")
Initial Haplotype
hapi <- WALD.HapInitial
hapi_map <- hapi %>% dplyr::select(X1,X4,X5,X8) %>% mutate(id = paste0("chr",X1,":",X4,"_",X5))
colnames(hapi_map) <- c("chr", "start","end","p","id")
Final Haplotype
hap <-WALD.FINAL
# generate haplotype length which means the number of SNPs this haplotype contains.
hap_map <- hap %>% mutate(Hap_length = X3-X2+1) %>% select(X1,X4,X5,X8,Hap_length) %>% mutate(id = paste0("chr",X1,":",X4,"_",X5))
colnames(hap_map) <- c("chr", "start","end","p","Hap_length","id")
# remove di-SNPs (initial result) in final FINAL haplotype result
hap_map <- hap_map %>% dplyr::filter(Hap_length>=3)%>% dplyr::select(-Hap_length)
Generate combined Manhattan plot:
T.Name = "LD"
sig_line = 3E-06
ylim = c(0,9)
vis(T.Name, snp_map, hapi_map, hap_map, sig_line=sig_line, ylim)