-
Notifications
You must be signed in to change notification settings - Fork 8
/
countSingletonsMaize_downsample.R
62 lines (48 loc) · 2.23 KB
/
countSingletonsMaize_downsample.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
################################################################################
### This script will load angsd mafs output and count singletons ###
### It should be run on slurm because mafs files are several gigs. ###
################################################################################
### Tim Beissinger
### 5-19-2014
### Load data
library(data.table)
mafsMaize <- fread("zcat ../WholeGenomeFolded2/OUTS/BKN_WholeGenome_windows.mafs.gz",header=T,stringsAsFactors=F, showProgress=T)
### Remove sites with few individuals
mafsMaize <- mafsMaize[which(mafsMaize$nInd >= 0.8*23),] #Only include sites with 80% of individuals sequenced
### Downsample
downsamp <- function(x) sum(sample(c(1,0),size=13,replace=T,prob=c(x,1-x)))
downed <- sapply(mafsMaize$knownEM,downsamp) # do downsampling
mafsMaize$downSampled <- downed
### Split on chromosomes
maizeList <- list()
for(i in 1:10){
print(i)
maizeList[[i]] <- mafsMaize[which(mafsMaize$chromo == i),]
}
### Make storage list
maizeResults <- list()
for(i in 1:10){
print(i)
chr <- rep(i,ceiling(max(maizeList[[i]]$position)/1000))
start <- seq(1,max(maizeList[[i]]$position),1000)
end <- c(seq(1000,max(maizeList[[i]]$position),1000),max(maizeList[[i]]$position))
center <- (start+end)/2
tF <- rep(NA,length(center))
nSites <- rep(NA,length(center))
maizeResults[[i]] <- data.frame(cbind(chr,start,end,center,tF,nSites),stringsAsFactors=F)
}
rm(i)
###Analyze
Singletons <- function(x){length(which(x == 1))} # maizemax = 23
for(chr in 1:10){
print(chr)
cuts <- cut(maizeList[[chr]]$position,breaks=c(seq(1,max(maizeList[[chr]]$position),1000),max(maizeList[[chr]]$position)),right=F)
split <- split(maizeList[[chr]]$downSampled,cuts)
maizeResults[[chr]]$nSites <- unlist(sapply(split,length))
maizeResults[[chr]]$tF <- unlist(sapply(split,Singletons))
}
### Combine maize results
allChromosomes <- rbind(maizeResults[[1]],maizeResults[[2]],maizeResults[[3]],maizeResults[[4]],maizeResults[[5]],maizeResults[[6]],maizeResults[[7]],maizeResults[[8]],maizeResults[[9]],maizeResults[[10]])
### Write results
write.table(allChromosomes,file="tF_maize_downsampled.txt")
save(allChromosomes,file="tF_maize_downsampled.Robj")