Skip to content
Xiuwen Zheng edited this page Jul 23, 2014 · 1 revision

Examples

  • The pre-fit HIBAG models for HLA genotype imputation:
library(HIBAG)
 
# Load the published parameter estimates from European ancestry
model.list <- get(load("European-HLA4.RData"))
 
#########################################################################
# Import your PLINK BED file
#
yourgeno <- hlaBED2Geno(bed.fn=".bed", fam.fn=".fam", bim.fn=".bim")
summary(yourgeno)
 
# HLA imputation at HLA-A
hla.id <- "A"
model <- hlaModelFromObj(model.list[[hla.id]])
summary(model)
# HLA allele frequencies
cbind(frequency = model$hla.freq)
 
# SNPs in the model
head(model$snp.id)
# "rs2523442" "rs9257863" "rs2107191" "rs4713226" "rs1362076" "rs7751705"
head(model$snp.position)
# 29525796 29533563 29542274 29542393 29549148 29549597
 
# best-guess genotypes and all posterior probabilities
pred.guess <- predict(model, yourgeno, type="response+prob")
summary(pred.guess)
pred.guess$value
pred.guess$postprob
  • Build a HIBAG model for HLA genotype imputation:
library(HIBAG)
 
# Import your PLINK BED file
geno <- hlaBED2Geno(bed.fn=".bed", fam.fn=".fam", bim.fn=".bim")
summary(geno)
 
# The HLA type of the first individual is 01:02/02:01, the second is 05:01/03:01, ...
train.HLA <- hlaAllele(geno$sample.id, H1=c("01:02", "05:01", ...),
    H2=c("02:01", "03:01", ...), locus="A")

# Or the HLA types are saved in a text file "YourHLATypes.txt":
#   SampleID Allele1 Allele2
#   NA001101 01:02 02:01
#   NA001201 05:01 03:01
#   ...
D <- read.table("YourHLATypes.txt", header=TRUE, stringsAsFactors=FALSE)
train.HLA <- hlaAllele(D$SampleID, H1=D$Allele1, H2=D$Allele2, locus="A")

summary(train.HLA)
 
# Selected SNPs, two options:
# 1) the flanking region of 500kb on each side,
#    or an appropriate flanking size without sacrificing predictive accuracy
  snpid <- hlaFlankingSNP(geno$snp.id, geno$snp.position, "A", 500*1000)
# 2) the SNPs in our pre-fit models
  model.list <- get(load("European-HLA4.RData"))
  snpid <- model.list[["A"]]$snp.id
# Subset training SNP genotypes
train.geno <- hlaGenoSubset(geno, snp.sel=match(snpid, geno$snp.id))
 
# Building ...
set.seed(1000)
model <- hlaAttrBagging(train.HLA, train.geno, nclassifier=100, verbose.detail=TRUE)
summary(model)
 
# Save your model
model.obj <- hlaModelToObj(model)
save(model.obj, file="your_model.RData")
 
# Predict ...
model.obj <- get(load("your_model.RData"))
model <- hlaModelFromObj(model.obj)
summary(model)
# best-guess genotypes and all posterior probabilities
pred.guess <- predict(model, newgeno, type="response+prob")
summary(pred.guess)
pred.guess$value
pred.guess$postprob
  • Build and predict in parallel:
library(parallel)
library(HIBAG)
 
# Import your PLINK BED file
geno <- hlaBED2Geno(bed.fn=".bed", fam.fn=".fam", bim.fn=".bim")
summary(geno)
 
# The HLA type of the first individual is 01:02/02:01, the second is 05:01/03:01, ...
train.HLA <- hlaAllele(geno$sample.id, H1=c("01:02", "05:01", ...),
    H2=c("02:01", "03:01", ...), locus="A")

# Or the HLA types are saved in a text file "YourHLATypes.txt":
#   SampleID Allele1 Allele2
#   NA001101 01:02 02:01
#   NA001201 05:01 03:01
#   ...
D <- read.table("YourHLATypes.txt", header=TRUE, stringsAsFactors=FALSE)
train.HLA <- hlaAllele(D$SampleID, H1=D$Allele1, H2=D$Allele2, locus="A")

summary(train.HLA)
 
# Create an environment with an appropriate cluster size
cl <- makeCluster(8)
 
# Building ...
set.seed(1000)
hlaParallelAttrBagging(cl, train.HLA, geno, nclassifier=100, auto.save="AutoSaveModel.RData")
model.obj <- get(load("AutoSaveModel.RData"))
model <- hlaModelFromObj(model.obj)
summary(model)
 
# best-guess genotypes and all posterior probabilities
pred.guess <- predict(model, yourgeno, type="response+prob", cl=cl)
summary(pred.guess)
pred.guess$value
pred.guess$postprob
Clone this wiki locally