-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfigureS6.R
110 lines (90 loc) · 4.92 KB
/
figureS6.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
##########################################################################
## Codes for the paper:
## Global determinants of freshwater and marine fish genetic diversity
## Authors :
## Stephanie Manel, Pierre-Edouard Guerin, David Mouillot,
## Simon Blanchet, Laure Velez, Camille Albouy, Loic Pellissier
##
## Montpellier, 2017-2019
## Submited to Nature communications, 2019
##
## functions used to generate figures
## Supplementary Figure S6.
## Taxonomic coverage of the sequences used by the model
## (a) Number of species per order
## (b) Number of sequences per order
##
##
##########################################################################
## Libraries
## no library required
##########################################################################
## functions
## sum of number of element by 2 columns moF moM
## according to a category defined by the third column moCategory
aggregateBy2Colons <- function(moF,moM,moCategory){
numberSpecModT=aggregate(cbind(moF,moM), by=list(Category=moCategory), FUN=sum)
numbSpecMoSumColons=data.frame(category=numberSpecModT$Category, numberOfElements=numberSpecModT$moM+numberSpecModT$moF)
return(numbSpecMoSumColons)
}
##########################################################################
## load data
tc=read.csv("11-sequences_taxonomy_habitat/watertype_all_modeles_effectives_family.csv",header=T,sep=",")
## correct wrong size of family in NCBI taxonomy
tc[which(tc$taxonFamily == "Citharinidae"),]$ncbiNumberOfSpecies = tc[which(tc$taxonFamily == "Citharinidae"),]$ncbiNumberOfSpecies + 91
tc[which(tc$taxonFamily == "Gadidae"),]$ncbiNumberOfSpecies = 53
## number of species by family in two model(marine+freshwater)
monModelMARINENumbSpec=tc$dcMNumberOfSpecies
monModelFRESHWATERNumbSpec=tc$dcF346FreshwaterNumberOfSpecies
## number of sequences by family in two model(marine+freshwater)
monModelMARINENumbSeq=tc$dcMNumberOfSequences
monModelFRESHWATERNumbSeq=tc$dcF346FreshwaterNumberOfSequences
## models with at least 4 sequences by species and at least 2 species by cell
#monModelMARINENumbSpec=tc$S2I4C29MNumberOfSpecies
#monModelFRESHWATERNumbSpec=tc$S2I5C32FNumberOfSpecies
## models with at least 3 sequences by species and at least 2 species by cell
#monModelMARINENumbSpec=tc$S2I3C66MNumberOfSpecies
#monModelFRESHWATERNumbSpec=tc$S2I3C75FNumberOfSpecies
## models with at least 2 sequences by species and at least 8 species by cell
#monModelMARINENumbSpec=tc$S8I2C36MNumberOfSpecies
#monModelFRESHWATERNumbSpec=tc$S8I2C34FNumberOfSpecies
## models with at least 2 sequences by species and at least 3 species by cell
#monModelMARINENumbSpec=tc$S2I3C66MNumberOfSpecies
#monModelFRESHWATERNumbSpec=tc$S3I2C75FNumberOfSpecies
## number of species by family in two model(marine+freshwater)
NSpeF<-aggregateBy2Colons(monModelMARINENumbSpec,monModelFRESHWATERNumbSpec,tc$taxonFamily)
TotSpeFamilyNumbSeq<-aggregate(tc$ncbiNumberOfSpecies, by=list(Category=tc$taxonFamily), FUN=sum)
propFam=data.frame(proportion=(NSpeF$numberOfElements/TotSpeFamilyNumbSeq$x)*100, family=as.vector(TotSpeFamilyNumbSeq$Category))
## number of species by ORDER in two model(marine+freshwater)
NSpeO<-aggregateBy2Colons(monModelMARINENumbSpec,monModelFRESHWATERNumbSpec,tc$taxonOrder)
## Number of species order total
TotSpeOrderNumbSeq<-aggregate(tc$ncbiNumberOfSpecies, by=list(Category=tc$taxonOrder), FUN=sum)
propOrder=data.frame(proportion=(NSpeO$numberOfElements/TotSpeOrderNumbSeq$x)*100,
order=as.vector(TotSpeOrderNumbSeq$Category))
## number of sequences by family in two model(marine+freshwater)
NSeqF<-aggregateBy2Colons(monModelMARINENumbSeq,monModelFRESHWATERNumbSeq,tc$taxonFamily)
## number of sequences by order in two model(marine+freshwater)
NSeqO<-aggregateBy2Colons(monModelMARINENumbSeq,monModelFRESHWATERNumbSeq,tc$taxonOrder)
##########################################################################
## Quartile of the proportion of species and number of sequences
## for the 63 orders and 480 families
summary(propFam)
summary(propOrder)
##########################################################################
## barplots
## Number of species per family
#barplot(propFam$proportion, las=2,names.arg=propFam$family,ylab="Prop species per order",main ="full models",cex.names = 0.5 )
## Number of species per order
## Number of sequences by family
#barplot(NSeqF$numberOfElements, las=2,names.arg=NSeqF$category,ylab="Number sequences per order",cex.names = 0.5 )
## Number of sequences by order
##########################################################################
## write pdf files
pdf("10-figures/figureS6.pdf",width=12,height=12,paper='special')
layout(matrix(c(1,2),nrow=2))
par(mar=c(6,4,4,4)) # c(bottom, left, top, right)
barplot(propOrder$proportion,las=2,names.arg=propOrder$order,ylab="Proportion species per order",cex.names = 0.6 )
title("(a)", adj = 0)
barplot(NSeqO$numberOfElements, las=2,names.arg=NSeqO$category,ylab="Number sequences per order",cex.names = 0.6 )
title("(b)", adj = 0)
dev.off()