Skip to content

Latest commit

 

History

History
2011 lines (1477 loc) · 39.2 KB

2_Netgrowth.md

File metadata and controls

2011 lines (1477 loc) · 39.2 KB

2. Immigration and netgrowth

source("R/functions.R")
## Loading required package: phyloseq
## Loading required package: ade4
## Loading required package: picante
## Loading required package: ape
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.0-10
## 
## Attaching package: 'vegan'
## 
## The following object is masked from 'package:ade4':
## 
##     cca
## 
## Loading required package: nlme
## Note: the specification for S3 class "AsIs" in package 'BiocGenerics' seems equivalent to one from package 'RJSONIO': not turning on duplicate class definitions for this class.
## Note: the specification for S3 class "connection" in package 'BiocGenerics' seems equivalent to one from package 'RJSONIO': not turning on duplicate class definitions for this class.
## Note: the specification for S3 class "file" in package 'BiocGenerics' seems equivalent to one from package 'RJSONIO': not turning on duplicate class definitions for this class.
## Note: the specification for S3 class "pipe" in package 'BiocGenerics' seems equivalent to one from package 'RJSONIO': not turning on duplicate class definitions for this class.
## Note: the specification for S3 class "textConnection" in package 'BiocGenerics' seems equivalent to one from package 'RJSONIO': not turning on duplicate class definitions for this class.
## Loading required package: gridExtra
## Loading required package: grid
## 
## Attaching package: 'dplyr'
## 
## The following object is masked _by_ '.GlobalEnv':
## 
##     n
## 
## The following object is masked from 'package:nlme':
## 
##     collapse
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, desc, failwith, id, mutate, summarise
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
biomfname = "data/otu97/netgrowth_seqs_97_otutable.biom"
netgrowthdata <- LoadData(biompath = biomfname, mapfpath = "data/mapfile.txt")
## Warning: No greengenes prefixes were found. 
## Consider using parse_taxonomy_default() instead if true for all OTUs. 
## Dummy ranks may be included among taxonomic ranks now.
## Warning: No greengenes prefixes were found. 
## Consider using parse_taxonomy_default() instead if true for all OTUs. 
## Dummy ranks may be included among taxonomic ranks now.
data.k <- SampleEvenDepth(mydata = netgrowthdata, depth = 20000, seed = 1234)
## [1] "sample seed = 1234"
## [1] "subsampled at 20000 reads"
## `set.seed(1234)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(1234); .Random.seed` for the full vector
## ...
## 450 OTUs were removed because they are no longer 
##  present in any sample after random subsampling
## ...
print(sample_sums(data.k))
## AMPA636 AMPA645 AMPA649 AMPA642 AMPA648 AMPA609 AMPA635 AMPA644 AMPA610 
##   20000   20000   20000   20000   20000   20000   20000   20000   20000 
## AMPA640 AMPA639 AMPA647 
##   20000   20000   20000

Investigate/visualise the influent data. Compare to sludge. Compare to McLellan.

Calculate the net growth of each OTU

Compare net growth to abundance and find out which of the OTUs' growth is overestimated by their abundance.

Can we find single sequences that are abundant in the influent and sludge.

ord.cca <- ordinate(physeq = data.k, method = "CCA", distance = "bray")
plot1 <- plot_ordination(physeq = data.k, ordination = ord.cca, shape = "plant", 
    type = "samples", color = "sample_type") + geom_text(aes(label = pair), 
    color = "black", size = 5, vjust = 2) + geom_point(size = 5)

ord.rda <- ordinate(physeq = data.k, method = "RDA", distance = "euclidian")
plot2 <- plot_ordination(physeq = data.k, ordination = ord.rda, type = "samples", 
    shape = "plant") + geom_text(aes(label = pair, color = sample_type, vjust = 2)) + 
    geom_point(size = 5)

ggsave(path = "figs", filename = "CCA_AS_vs_WW.png", plot = plot1, dpi = 400)
## Saving 7 x 7 in image

The euclidian PCA splits the samples by sample type in the first PC and roughly by plant in the second.

The CCA has the same trend but splits the plants Aalborg and Hjørring on the second axis more clearly.

ids <- as.character(c(94, 97, 99))
fname_template <- "data/otuXX/netgrowth_seqs_XX_otutable.biom"
fnames <- sapply(ids, function(id) gsub("XX", id, fname_template))

datasets <- lapply(ids, function(id) LoadData(biompath = fnames[id], mapfpath = "data/mapfile.txt"))
## Warning: No greengenes prefixes were found. 
## Consider using parse_taxonomy_default() instead if true for all OTUs. 
## Dummy ranks may be included among taxonomic ranks now.
## Warning: No greengenes prefixes were found. 
## Consider using parse_taxonomy_default() instead if true for all OTUs. 
## Dummy ranks may be included among taxonomic ranks now.
## Warning: No greengenes prefixes were found. 
## Consider using parse_taxonomy_default() instead if true for all OTUs. 
## Dummy ranks may be included among taxonomic ranks now.
ids <- sapply(ids, function(id) paste0("otu", id))
names(datasets) <- ids

sapply(datasets, function(dataset) sum(sample_sums(dataset)))
##  otu94  otu97  otu99 
## 811161 811161 811161
sapply(datasets, function(dataset) sum(nsamples(dataset)))
## otu94 otu97 otu99 
##    12    12    12
sapply(datasets, function(dataset) sum(ntaxa(dataset)))
## otu94 otu97 otu99 
##  2214  3609  5584
depth <- 40000

rarefyDataset <- function(mydata, depth = FALSE, seed = FALSE) {
    mydata <- subset_samples(mydata, type == "WW")
    mydata <- rarefy_even_depth(mydata, rngseed = seed, sample.size = depth, 
        trimOTUs = TRUE)
    mydata
}

WWDatasets <- lapply(ids, function(id) rarefyDataset(datasets[[id]], depth, 
    1234))
## `set.seed(1234)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(1234); .Random.seed` for the full vector
## ...
## 1058 OTUs were removed because they are no longer 
##  present in any sample after random subsampling
## ...
## `set.seed(1234)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(1234); .Random.seed` for the full vector
## ...
## 1728 OTUs were removed because they are no longer 
##  present in any sample after random subsampling
## ...
## `set.seed(1234)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(1234); .Random.seed` for the full vector
## ...
## 2560 OTUs were removed because they are no longer 
##  present in any sample after random subsampling
## ...
names(WWDatasets) <- ids

lapply(WWDatasets, function(dataset) printDatasetStats(dataset))
## [1] "number of plants: 3"
##      
##       2012 2013
##   AAE    0    2
##   AAW    1    1
##   HJO    0    2
## [1] 240000
## [1] "total reads: 240000"
## [1] "number of plants: 3"
##      
##       2012 2013
##   AAE    0    2
##   AAW    1    1
##   HJO    0    2
## [1] 240000
## [1] "total reads: 240000"
## [1] "number of plants: 3"
##      
##       2012 2013
##   AAE    0    2
##   AAW    1    1
##   HJO    0    2
## [1] 240000
## [1] "total reads: 240000"
## $otu94
## [1] "total reads: 240000"
## 
## $otu97
## [1] "total reads: 240000"
## 
## $otu99
## [1] "total reads: 240000"
source("R/core_community.R")

coredataframe <- calcData(WWDatasets, ids, core_cutoff = 1)
## Error: could not find function "calcData"
OTUtotals <- ddply(coredataframe, "identities", function(id) c(totalOTUs = sum(id$taxsum), 
    coreOTUs = id$taxsum[id$corestatus == "core"], percentcoreOTUs = round(id$taxsum[id$corestatus == 
        "core"]/sum(id$taxsum) * 100, 1), percentreads = round(id$readprop[id$corestatus == 
        "core"] * 100, 1)))
## Error: object 'coredataframe' not found
print(OTUtotals)
## Error: object 'OTUtotals' not found
p <- plotCore(coredataframe, fig_fname = "figs/infeff_conservation40k.png", 
    cumulative = FALSE)
## Loading required package: RColorBrewer
## Error: object 'coredataframe' not found
otuplot <- p[[1]]
## Error: object 'p' not found
readsplot <- p[[2]]
## Error: object 'p' not found
pdf(file = "figs/ww_core40k.pdf")
grid.arrange(otuplot, readsplot, nrow = 2)
## Error: object 'otuplot' not found
dev.off()
## pdf 
##   2
grid.arrange(otuplot, readsplot, nrow = 2)
## Error: object 'otuplot' not found

Compare the top OTUs in sludge and influent

source("R//net_growth_rate.R")
## Warning: cannot open file 'scr/functions.R': No such file or directory
## Error: cannot open the connection
plot3 <- compareTop20AsWw(data.k)
## Error: could not find function "compareTop20AsWw"
ggsave(path = "figs", filename = "top20OTUsWW_vs_AS.png", plot = plot3, width = 8, 
    dpi = 600)
## Error: object 'plot3' not found
# what percentage of reads are from the top genera?
ds <- WWDatasets[["otu94"]]
ds.p <- transform_sample_counts(ds, function(x) x/sum(x))
n <- 20

topNww_otus <- names(sort(taxa_sums(ds.p), decreasing = TRUE)[1:n])
topN.ww <- subset_taxa(ds, taxa_names(ds) %in% topNww_otus)
df.rawdata <- as.data.frame(otu_table(topN.ww))
otus <- row.names(df.rawdata)
df.outdata <- data.frame(OTU = otus, as.data.frame(tax_table(topN.ww)))
row.names(df.outdata) <- 1:n
df.outdata$median <- apply(df.rawdata, 1, median)
df.outdata$geomean <- apply(df.rawdata, 1, function(x) round(exp(mean(log(x))), 
    1))
df.outdata$max <- apply(df.rawdata, 1, max)
df.outdata$min <- apply(df.rawdata, 1, min)
df.outdata <- df.outdata[order(df.outdata$median, decreasing = TRUE), ]
df.outdata
##     OTU  Kingdom         Phylum                 Class             Order
## 6   650 Bacteria Proteobacteria Epsilonproteobacteria Campylobacterales
## 18 2009 Bacteria Proteobacteria    Betaproteobacteria   Burkholderiales
## 3    56 Bacteria     Firmicutes               Bacilli   Lactobacillales
## 5   643 Bacteria Proteobacteria   Gammaproteobacteria   Pseudomonadales
## 19 2046 Bacteria Proteobacteria Epsilonproteobacteria Campylobacterales
## 15 1514 Bacteria Proteobacteria   Gammaproteobacteria     Aeromonadales
## 17 1913 Bacteria  Bacteroidetes         Flavobacteria  Flavobacteriales
## 11  954 Bacteria  Bacteroidetes           Bacteroidia     Bacteroidales
## 16 1691 Bacteria Proteobacteria   Gammaproteobacteria   Pseudomonadales
## 2    40 Bacteria Proteobacteria   Gammaproteobacteria   Pseudomonadales
## 9   796 Bacteria  Bacteroidetes           Bacteroidia     Bacteroidales
## 13 1203 Bacteria     Firmicutes            Clostridia     Clostridiales
## 12  981 Bacteria     Firmicutes            Clostridia     Clostridiales
## 4    62 Bacteria  Bacteroidetes           Bacteroidia     Bacteroidales
## 1    20 Bacteria Proteobacteria    Betaproteobacteria     Rhodocyclales
## 7   677 Bacteria     Firmicutes            Clostridia     Clostridiales
## 8   700 Bacteria   Fusobacteria          Fusobacteria   Fusobacteriales
## 14 1509 Bacteria     Firmicutes               Bacilli   Lactobacillales
## 10  830 Bacteria Proteobacteria   Gammaproteobacteria   Pseudomonadales
## 20 2139 Bacteria Proteobacteria   Gammaproteobacteria   Pseudomonadales
##                Family              Genus median geomean   max  min
## 6  Campylobacteraceae         Arcobacter 5947.5  5683.1  7036 4162
## 18     Comamonadaceae Limnohabitans_etal 3896.0  4241.8 10930 2270
## 3   Carnobacteriaceae               <NA> 3187.0  3368.6  5438 2594
## 5       Moraxellaceae      Acinetobacter 3091.0  2589.6  4565 1193
## 19 Campylobacteraceae         Arcobacter 1326.5  1204.7  1551  894
## 15     Aeromonadaceae          Aeromonas  972.0   931.0  1427  583
## 17  Flavobacteriaceae     Flavobacterium  898.0   834.2  1482  407
## 11     Bacteroidaceae        Bacteroides  671.5   601.8   875  261
## 16   Pseudomonadaceae        Pseudomonas  621.0   559.7  1904  154
## 2    Pseudomonadaceae               <NA>  589.5   582.9  5542  147
## 9      Prevotellaceae         Prevotella  544.5   466.8   703  173
## 13    Lachnospiraceae            Blautia  533.5   525.6   893  268
## 12    Ruminococcaceae   Faecalibacterium  515.0   484.3   709  219
## 4      Bacteroidaceae        Bacteroides  443.5   384.0   527  149
## 1      Rhodocyclaceae      Dechloromonas  435.0   347.3  1030  109
## 7     Lachnospiraceae          Roseburia  378.0   363.2   483  194
## 8    Fusobacteriaceae               <NA>  366.0   344.6   947  130
## 14   Streptococcaceae        Lactococcus  264.5   712.1  8160  149
## 10      Moraxellaceae      Psychrobacter  189.0   268.9  1193  148
## 20      Moraxellaceae      Enhydrobacter  109.0   108.3  1145    8
source("R/net_growth_rate.R")
## Warning: cannot open file 'scr/functions.R': No such file or directory
## Error: cannot open the connection
df.netgrowth <- calcNetGrowthData(data.k, k_lower_bound = -0.2)
## Error: could not find function "calcNetGrowthData"
plist <- plotNetGrowthDistribution(df.netgrowth)
## Error: could not find function "plotNetGrowthDistribution"
print(plist[[1]])
## Error: object 'plist' not found
print(plist[[2]])
## Error: object 'plist' not found
grid.arrange(plist[[1]], plist[[2]], nrow = 2)
## Error: object 'plist' not found
dev.off()
## null device 
##           1
ggsave(path = "figs", filename = "cumabun_by_netgrowth.pdf", plot = plist[[2]], 
    width = 8, height = 6, units = "cm")
## Error: object 'plist' not found
ggplot(data = df.netgrowth, aes(x = km, fill = class)) + geom_histogram(binwidth = 0.05) + 
    # coord_cartesian(ylim=c(0, 500)) +
scale_fill_brewer(palette = "Set1") + labs(x = NULL, y = "OTU count") + theme(legend.position = "none")
## Error: object 'df.netgrowth' not found
df.netgrowth$km_class <- cut(df.netgrowth$km, breaks = c(-0.22, -0.1, 0, 0.03, 
    10), labels = c("inactive", "low", "active", "max"))
## Error: object 'df.netgrowth' not found
df.netgrowth[(df.netgrowth$prop_ww == 0), 11] = "max"
## Error: object 'df.netgrowth' not found
as_by_class <- with(df.netgrowth, aggregate(prop_as * 100, by = list(pair, km_class), 
    FUN = length))
## Error: object 'df.netgrowth' not found
names(as_by_class) = c("pair", "class", "n_otus")
## Error: object 'as_by_class' not found
df.netgrowth.abun <- subset(df.netgrowth, df.netgrowth$prop_as > 0.001)
## Error: object 'df.netgrowth' not found
res <- with(df.netgrowth.abun, aggregate(prop_as * 100, by = list(pair, km_class), 
    FUN = length))
## Error: object 'df.netgrowth.abun' not found
as_by_class$n_abun_otus <- res$x
## Error: object 'res' not found
res <- with(df.netgrowth, aggregate(prop_as * 100, by = list(pair, km_class), 
    FUN = sum))
## Error: object 'df.netgrowth' not found
as_by_class$read_percent <- res$x
## Error: object 'res' not found
res <- with(df.netgrowth.abun, aggregate(prop_as * 100, by = list(pair, km_class), 
    FUN = sum))
## Error: object 'df.netgrowth.abun' not found
as_by_class$abun_read_percent <- res$x
## Error: object 'res' not found
as_by_class$class_per_abun <- round(as_by_class$abun_read_percent/as_by_class$read_percent * 
    100, 0)
## Error: object 'as_by_class' not found
ggplot(data = as_reads_by_class, aes(x = pair, y = read_percent, fill = class)) + 
    geom_bar(stat = "identity")
## Error: object 'as_reads_by_class' not found
n_per_kmclass <- with(as_reads_by_class, aggregate(read_percent, by = list(class), 
    FUN = sum))
## Error: object 'as_reads_by_class' not found
mean_per_kmclass <- with(as_reads_by_class, aggregate(read_percent, by = list(class), 
    FUN = mean))
## Error: object 'as_reads_by_class' not found
sd_per_kmclass <- with(as_reads_by_class, aggregate(read_percent, by = list(class), 
    FUN = sd))
## Error: object 'as_reads_by_class' not found
# How many reads are active (km > 0)
active <- subset(as_by_class, class %in% c("active", "max"))
## Error: object 'as_by_class' not found
act_totals <- with(active, aggregate(read_percent, by = list(pair), FUN = sum))
## Error: object 'active' not found
round(mean(act_totals$x), 0)
## Error: object 'act_totals' not found
sd(act_totals$x)
## Error: object 'act_totals' not found
act_totals <- with(active, aggregate(n_abun_otus, by = list(pair), FUN = sum))
## Error: object 'active' not found
round(mean(act_totals$x), 0)
## Error: object 'act_totals' not found
sd(act_totals$x)
## Error: object 'act_totals' not found
# How many reads are inactive (km < 0)
inactive <- subset(as_by_class, class %in% c("low", "inactive"))
## Error: object 'as_by_class' not found
inact_totals <- with(inactive, aggregate(read_percent, by = list(pair), FUN = sum))
## Error: object 'inactive' not found
round(mean(inact_totals$x), 0)
## Error: object 'inact_totals' not found
sd(inact_totals$x)
## Error: object 'inact_totals' not found
inact_totals <- with(inactive, aggregate(n_otus, by = list(pair), FUN = sum))
## Error: object 'inactive' not found
round(mean(inact_totals$x), 0)
## Error: object 'inact_totals' not found
sd(inact_totals$x)
## Error: object 'inact_totals' not found
sum(inactive$n_abun_otus)
## Error: object 'inactive' not found
inact_totals <- with(inactive, aggregate(n_abun_otus, by = list(pair), FUN = sum))
## Error: object 'inactive' not found
round(mean(inact_totals$x), 0)
## Error: object 'inact_totals' not found
sd(inact_totals$x)
## Error: object 'inact_totals' not found
df.netgrowth.abuninact <- subset(df.netgrowth.abun, df.netgrowth.abun$km < 0)
## Error: object 'df.netgrowth.abun' not found
length(unique(df.netgrowth.abuninact$OTU))
## Error: object 'df.netgrowth.abuninact' not found
length(unique(df.netgrowth.abuninact$tax_name))
## Error: object 'df.netgrowth.abuninact' not found
barchart(table(df.netgrowth.abuninact$tax_name))
## Error: object 'df.netgrowth.abuninact' not found
hist(table(df.netgrowth.abuninact$OTU))
## Error: object 'df.netgrowth.abuninact' not found
abun_by_taxon <- with(df.netgrowth.abuninact, aggregate(prop_as, by = list(tax_name), 
    FUN = sum))
## Error: object 'df.netgrowth.abuninact' not found
abun_by_taxon$x <- round(abun_by_taxon$x * 100/6, 1)
## Error: object 'abun_by_taxon' not found
abun_by_taxon <- abun_by_taxon[order(abun_by_taxon$x), ]
## Error: object 'abun_by_taxon' not found
names(abun_by_taxon) <- c("name", "mean")
## Error: object 'abun_by_taxon' not found
per_plant_tax_counts <- with(df.netgrowth.abuninact, aggregate(prop_as * 100, 
    by = list(pair, tax_name), FUN = max))
## Error: object 'df.netgrowth.abuninact' not found
names(per_plant_tax_counts) <- c("pair", "taxon", "max_count")
## Error: object 'per_plant_tax_counts' not found
per_plant_tax_counts <- per_plant_tax_counts[order(per_plant_tax_counts$max_count), 
    ]
## Error: object 'per_plant_tax_counts' not found
#### 

df.netgrowth$tax_name <- tax_table(data.k)[df.netgrowth$OTU, "Genus"]
## Error: error in evaluating the argument 'i' in selecting a method for function '[': Error: object 'df.netgrowth' not found
df.netgrowth$taxlab <- ifelse((df.netgrowth$prop_ww > 0.01) & (df.netgrowth$km > 
    -0.2) & (df.netgrowth$km < 0.01), as.character(df.netgrowth$tax_name), "")
## Error: object 'df.netgrowth' not found
# percent reads near k max
cumsum(df.netgrowth$prop_as/6)[sum(df.netgrowth$k > 0.02857)]
## Error: object 'df.netgrowth' not found
high_growth_rate <- df.netgrowth[df.netgrowth$k > 0.02857, ]
## Error: object 'df.netgrowth' not found
max(high_growth_rate$prop_ww)
## Error: object 'high_growth_rate' not found
# percent of reads with k > -0.15
cumsum(df.netgrowth$prop_as/6)[sum(df.netgrowth$km < -0)]
## Error: object 'df.netgrowth' not found

The assumption is that the net growth rate at steady state will be equal to the max net growth rate (1/35 = 0.0286).

There are a lot of OTUs that are at or near the upper limit.

printNetGrowthStats(df.netgrowth)
## Error: could not find function "printNetGrowthStats"

so the are on average 64% of the reads in the sludge detected in the influent, but this number is highly variable probably because the sampling of the sludge taxa in the nfluent is near the level of detection.

Can I recalculate the data resampling the wastewater at a higher cutoff.

data.ww <- subset_samples(netgrowthdata, type == "WW")
sample_sums(data.ww)
## AMPA649 AMPA635 AMPA644 AMPA610 AMPA639 AMPA647 
##   74369   71700   62439   67449   75525   58297
data.k.ww <- SampleEvenDepth(mydata = data.ww, depth = 58000, seed = 1234)
## [1] "sample seed = 1234"
## [1] "subsampled at 58000 reads"
## `set.seed(1234)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(1234); .Random.seed` for the full vector
## ...
## 1651 OTUs were removed because they are no longer 
##  present in any sample after random subsampling
## ...
data.as <- subset_samples(netgrowthdata, type == "AS")
data.k.as <- SampleEvenDepth(mydata = data.as, depth = 40000, seed = 1234)
## [1] "sample seed = 1234"
## [1] "subsampled at 40000 reads"
## `set.seed(1234)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(1234); .Random.seed` for the full vector
## ...
## 1052 OTUs were removed because they are no longer 
##  present in any sample after random subsampling
## ...
data.k.uneven <- merge_phyloseq(data.k.ww, data.k.as)
df.netgrowth.uneven <- calcNetGrowthData(data.k.uneven)
## Error: could not find function "calcNetGrowthData"
printNetGrowthStats(df.netgrowth.uneven)
## Error: could not find function "printNetGrowthStats"

This did not make much difference...

myBreaks <- c(1, 10)

ggplot(data = df.netgrowth, aes(x = prop_as * 100, y = km, color = prop_ww * 
    100, label = taxlab)) + geom_point() + geom_text(size = 3, color = "black", 
    vjust = 1, hjust = 1) + scale_x_log10(breaks = c(0.01, 0.1, 1, 10), limits = c(0.01, 
    15)) + scale_colour_gradient(trans = "log", low = "white", high = "red", 
    breaks = myBreaks, labels = myBreaks) + facet_wrap(pair ~ plant) + labs(y = "bounded net growth rate", 
    x = "biomass abundance (%)", colour = "ww abundance (%)")
## Error: object 'df.netgrowth' not found
ggsave(filename = "figs/sludge_vs_k_all.png")
## Saving 7 x 7 in image
myBreaks <- c(1, 10)
df.netgrowth$tax_name <- makeTaxLabels(df.netgrowth$OTU, data.k)
## Error: object 'df.netgrowth' not found
df.netgrowth$taxlab <- NA
## Error: object 'df.netgrowth' not found
df.netgrowth$taxlab <- with(df.netgrowth, ifelse((prop_ww >= 0.01) & (prop_as >= 
    0.0012), tax_name, taxlab))
## Error: object 'df.netgrowth' not found
p <- plot_netgrowth(mydata = df.netgrowth, value = "AAE-2")
## Error: could not find function "plot_netgrowth"
p
## Error: object 'p' not found
info <- c(as.character(samData(data.k)$pair), "all")
per_plant_plots <- lapply(info, FUN = plot_netgrowth)
## Error: object 'plot_netgrowth' not found
names(per_plant_plots) <- info
## Error: object 'per_plant_plots' not found
class_by_pair <- group_by(df.netgrowth, pair, class)
## Error: object 'df.netgrowth' not found
d <- summarise(class_by_pair, total = n()) %.% arrange(class, pair)
## Error: object 'class_by_pair' not found
d1 <- d[d$class == "Both", "total"]/d[d$class == "AS", "total"] * 100
## Error: object 'd' not found
names(d1) <- unique(d$pair)
## Error: object 'd' not found
# result per pair
d1
## Error: object 'd1' not found
by_class <- group_by(df.netgrowth, class)
## Error: object 'df.netgrowth' not found
d2 <- summarise(by_class, total = n()) %.% arrange(class)
## Error: object 'by_class' not found
d2
## Error: object 'd2' not found
# overall fraction
d2[d2$class == "Both", "total"]/d2[d2$class == "AS", "total"] * 100
## Error: object 'd2' not found
source("R/net_growth_rate.R")
## Warning: cannot open file 'scr/functions.R': No such file or directory
## Error: cannot open the connection
data.genus <- tax_glom(physeq = data.k, taxrank = "Genus", NArm = TRUE)
df.genus.k <- calcNetGrowthData(data.genus, k_lower_bound = -0.2)
## Error: could not find function "calcNetGrowthData"
plotlist <- plotNetGrowthDistribution(df.genus.k)
## Error: could not find function "plotNetGrowthDistribution"
plotlist[[1]]
## Error: object 'plotlist' not found
plotlist[[2]]
## Error: object 'plotlist' not found
dev.off()
## null device 
##           1
myBreaks <- c(1, 10)
df.genus.k$tax_name <- makeTaxLabels(df.genus.k$OTU, data.k)
## Error: object 'df.genus.k' not found
df.genus.k$taxlab <- NA
## Error: object 'df.genus.k' not found
df.genus.k$taxlab <- with(df.genus.k, ifelse((prop_ww >= 0.01) & (prop_as >= 
    0.0012), tax_name, taxlab))
## Error: object 'df.genus.k' not found
df.genus.k$taxlab <- with(df.genus.k, ifelse((prop_as >= 0.02), tax_name, taxlab))
## Error: object 'df.genus.k' not found
p <- plot_netgrowth(mydata = df.genus.k, value = "AAE-2")
## Error: could not find function "plot_netgrowth"
p
## Error: object 'p' not found

Which taxa have a strong negative growth rate?

df.netgrowth.neg <- subset(df.netgrowth, (k <= -0) & (prop_ww > 0.001)  )
## Error: object 'df.netgrowth' not found
neg_otus         <- unique(df.netgrowth.neg$OTU)
## Error: object 'df.netgrowth.neg' not found
neg_taxnames     <- as.data.frame(tax_table(data.k)[neg_otus])[1:6]
## Error: error in evaluating the argument 'i' in selecting a method for function '[': Error: object 'neg_otus' not found
neg_taxnames$OTU <- row.names(neg_taxnames)
## Error: object 'neg_taxnames' not found
stats            <- ddply(df.netgrowth.neg, "OTU", function (otu) 
                                        c("max" = max(otu$k),
                                          "min" = min(otu$k),
                                         "range"= max(otu$k) -  min(otu$k),
                                    "total_as" = sum(otu$prop_as),
                                     "total_w" = sum(otu$prop_ww)))
## Error: object 'df.netgrowth.neg' not found
neg_taxa <- merge(neg_taxnames, stats)
## Error: object 'neg_taxnames' not found
neg_taxa.min <- neg_taxa[ order(neg_taxa$min), ]
## Error: object 'neg_taxa' not found
write.table(neg_taxa.min, file="exp/neg_taxa.tsv", sep="\t", 
              quote=FALSE, row.names=FALSE, col.names=TRUE)
## Error: object 'neg_taxa.min' not found
pl <- ggplot(data= df.netgrowth.neg, 
             aes(y=abs(k), 
             x =reorder(OTU, abs(k), order= TRUE, function(x) median(x))) ) +
    scale_y_reverse(name = "k") + 
    scale_x_discrete(name   = "highly negative OTUs", 
                  labels = function(x) 
                     df.netgrowth.neg[ df.netgrowth.neg$OTU == x, "taxname"]) +
    coord_cartesian(ylim = c(0, 0.5)) +
    geom_boxplot(  ) + # aes(fill = tax_name)
    theme(axis.text.x  = element_text(size = 8,  hjust = 1, vjust = 1, 
                                      angle = 45)) +
    theme(axis.title.x = element_text(size = 12)) + 
    theme(axis.title.y = element_text(size = 12, vjust = 0.2)) +  
    theme(axis.text.y  = element_text(size = 12)) +
    theme(legend.text  = element_text(size = 10)) + 
    theme(legend.title = element_text(size = 12)) +
    theme(legend.justification = c(1, 0))
## Error: object 'df.netgrowth.neg' not found
#+    theme(legend.position      = "none")
  
pl
## Error: object 'pl' not found
df.netgrowth.asneg <- subset(df.netgrowth, k <= -0.1)
## Error: object 'df.netgrowth' not found
stats <- ddply(df.netgrowth.asneg, .(pair), summarize, 
               sumas=sum(prop_as),
               sumww=sum(prop_ww))
## Error: object 'df.netgrowth.asneg' not found
mean(stats$sumas) * 100
## Error: object 'stats' not found
sd  (stats$sumas) * 100
## Error: object 'stats' not found
df.netgrowth.asneg.abun <- subset(df.netgrowth, (k <= -0.1) & (prop_as > 0.005))
## Error: object 'df.netgrowth' not found
stats <- ddply(df.netgrowth.asneg.abun, .(tax_name, pair), summarize, 
               sumas=round(sum(prop_as * 100), 1),
               sumww=round(sum(prop_ww * 100), 1))
## Error: object 'df.netgrowth.asneg.abun' not found
stats
## Error: object 'stats' not found

The two big players in the neg k taxa are Clostridia and Bacteroidales. These are typically anaerobes and therefore these are likely inactive.

The Bacteroides had a k of -0.2 which is considerably less than the the -0.04 we have used as a lower bound.

THere are some OTUs of known sludge genera that actually have a low k.

Thauera, Defluvicoccus, Thiothrix

TODO: Move the lower bound to say -0.2 and look at the

What is the range of the OTUs?

Are there some in the higher ranges with a larger range?

not sure what I am doing here...

calcKSummary <- function(ps, df) {
    result <- as.data.frame(tax_table(ps))[1:6]
    result$OTU <- row.names(result)
    stats <- ddply(df, "OTU", function(otu) c(max = max(otu$k), min = min(otu$k), 
        range = max(otu$k) - min(otu$k), total_as = sum(otu$prop_as), total_w = sum(otu$prop_ww)))
    result <- merge(result, stats)
}


plotKSummary <- function(df) {
    a <- ggplot(data = df, aes(x = k)) + geom_histogram(binwidth = 0.005) + 
        coord_cartesian(ylim = c(0, 50), xlim = c(-0.5, 0.03)) + scale_fill_brewer(palette = "Set1") + 
        labs(x = NULL, y = "OTU count") + theme(legend.position = "none")
    a
    
    df <- df[order(df$km, decreasing = TRUE), ]
    prop_at_zerok <- cumsum(df$prop_as/6)[sum(df$km > 0)]
    df <- df[order(df$km, decreasing = FALSE), ]
    
    b <- ggplot(data = df, aes(x = km, y = cumsum(prop_as/6) * 100)) + scale_x_continuous(limit = c(-0.041, 
        0.03), breaks = seq(-0.04, 0.03, 0.01)) + coord_cartesian(ylim = c(0, 
        10)) + geom_step() + geom_hline(yintercept = (1 - prop_at_zerok) * 100, 
        color = "red") + labs(x = "bounded Net growth rate (k)", y = "% cumulative abundance")
    b
    list(a, b)
}


Lachnospir <- subset_taxa(data.k, Family == "Lachnospiraceae")

Lachnospir.otus <- taxa_names(Lachnospir)
df.Lachnospir <- df.netgrowth[df.netgrowth$OTU %in% Lachnospir.otus, ]
## Error: object 'df.netgrowth' not found
stats <- ddply(df.Lachnospir, .(tax_name, pair), summarize, sumas = round(sum(prop_as * 
    100), 1), sumww = round(sum(prop_ww * 100), 1), kmean = round(mean(k), 3))
## Error: object 'df.Lachnospir' not found
stats
## Error: object 'stats' not found
Lach.summary <- calcKSummary(data.k, df.Lachnospir)
## Error: object 'df.Lachnospir' not found
Lach.plots <- plotKSummary(df.Lachnospir)
## Error: object 'df.Lachnospir' not found
a <- subset(df.Lachnospir, (df$prop_as > 0.001) & (df$prop_ww > 0.001))
## Error: object 'df.Lachnospir' not found
ggplot(data = a, aes(x = k)) + geom_histogram(binwidth = 0.005) + coord_cartesian(ylim = c(0, 
    50), xlim = c(-0.15, 0.03)) + scale_fill_brewer(palette = "Set1") + labs(x = NULL, 
    y = "OTU count") + theme(legend.position = "none")
## Error: object 'a' not found
Bacteroides <- subset_taxa(data.k, Family == "Bacteroidaceae")
Bacteroides.otus <- taxa_names(Bacteroides)
df.Bacteroides <- df.netgrowth[df.netgrowth$OTU %in% Bacteroides.otus, ]
## Error: object 'df.netgrowth' not found
stats <- ddply(df.Bacteroides, .(tax_name, pair), summarize, sumas = round(sum(prop_as * 
    100), 1), sumww = round(sum(prop_ww * 100), 1), kmean = round(mean(k), 3))
## Error: object 'df.Bacteroides' not found
stats
## Error: object 'stats' not found
Bacteroides.summary <- calcKSummary(data.k, df.Bacteroides)
## Error: object 'df.Bacteroides' not found
Bacteroides.plots <- plotKSummary(df.Bacteroides)
## Error: object 'df.Bacteroides' not found
a <- subset(df.Bacteroides, (df$prop_as > 0.001) & (df$prop_ww > 0.001))
## Error: object 'df.Bacteroides' not found
ggplot(data = a, aes(x = k)) + geom_histogram(binwidth = 0.005) + coord_cartesian(ylim = c(0, 
    50), xlim = c(-0.15, 0.03)) + scale_fill_brewer(palette = "Set1") + labs(x = NULL, 
    y = "OTU count") + theme(legend.position = "none")
## Error: object 'a' not found
df.Bacteroides <- calcNetGrowthData(Bacteroides)
## Error: could not find function "calcNetGrowthData"
Bact.summary <- calcKSummary(data.k, df.Bacteroides)
## Error: object 'df.Bacteroides' not found
Bact.plots <- plotKSummary(df.Bacteroides)
## Error: object 'df.Bacteroides' not found
Bact.plots
## Error: object 'Bact.plots' not found
Arcobacter <- subset_taxa(data.k, Genus == "Arcobacter")
Arco.otus <- taxa_names(Arcobacter)
df.Arcobacter <- df.netgrowth[df.netgrowth$OTU %in% Arco.otus, ]
## Error: object 'df.netgrowth' not found
Arco.summary <- calcKSummary(data.k, df.Arcobacter)
## Error: object 'df.Arcobacter' not found
Arco.plots <- plotKSummary(df.Arcobacter)
## Error: object 'df.Arcobacter' not found
Arco.plots
## Error: object 'Arco.plots' not found
Accumulibacter <- subset_taxa(data.k, Genus == "CandidatusAccumulibacter")
Accumulibacter.otus <- taxa_names(Accumulibacter)
df.Accumulibacter <- df.netgrowth[df.netgrowth$OTU %in% Accumulibacter.otus, 
    ]
## Error: object 'df.netgrowth' not found
ggplot(df.Accumulibacter, aes(y = prop_as, x = prop_ww, color = OTU)) + geom_point()
## Error: object 'df.Accumulibacter' not found
Accumulibacter.summary <- calcKSummary(data.k, df.Accumulibacter)
## Error: object 'df.Accumulibacter' not found
Accumulibacter.plots <- plotKSummary(df.Accumulibacter)
## Error: object 'df.Accumulibacter' not found
Accumulibacter.plots
## Error: object 'Accumulibacter.plots' not found
data.k.p <- transformSampleCounts(data.k, function(x) x/sum(x) * 100)
Tetrasphaera <- subset_taxa(data.k.p, Genus == "Tetrasphaera_etal")
Tetrasphaera.otus <- taxa_names(Tetrasphaera)
df.Tetrasphaera <- df.netgrowth[df.netgrowth$OTU %in% Tetrasphaera.otus, ]
## Error: object 'df.netgrowth' not found
p <- plot_bar(Tetrasphaera, "Genus", fill = "OTU", facet_grid = type ~ pair)
## Error: error in evaluating the argument 'j' in selecting a method for function '[': Error in apply(!apply(TT, 2, is.na), 2, any) : 
##   dim(X) must have a positive length
## Calls: which -> apply
p$data$OTU <- factor(p$data$OTU)
## Error: object 'p' not found
p
## Error: object 'p' not found
Tetrasphaera.summary <- calcKSummary(data.k, df.Tetrasphaera)
## Error: object 'df.Tetrasphaera' not found
Tetrasphaera.plots <- plotKSummary(df.Tetrasphaera)
## Error: object 'df.Tetrasphaera' not found
Tetrasphaera.plots
## Error: object 'Tetrasphaera.plots' not found
AOB <- subset_taxa(data.k, Family == "Nitrosomonadaceae")
AOB <- transform_sample_counts(physeq = AOB, function(x) x/sum(x))

p <- plot_bar(AOB, "Genus", fill = "OTU", facet_grid = type ~ pair)
p$data$OTU <- factor(p$data$OTU)
p
## Warning: Removed 7 rows containing missing values (position_stack).
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: position_stack requires constant width: output may be incorrect
## Warning: Removed 7 rows containing missing values (position_stack).
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-5

NOB in influent and effluent

data.p <- transform_sample_counts(physeq = data.k, function(x) x/sum(x) * 100)
NOB <- subset_taxa(data.p, (Genus == "Nitrotoga_etal") | (Genus == "Nitrospira"))
NOB.prop <- transform_sample_counts(physeq = NOB, function(x) x/sum(x) * 100)

p <- plot_bar(NOB, "Genus", fill = "OTU", facet_grid = type ~ pair)
p$data$OTU <- factor(p$data$OTU)
p$data$Genus <- gsub(p$data$Genus, pattern = "_etal", replacement = "")
p <- p + ylab("Absolute abundance") + xlab(label = "") + theme(axis.text.x = element_blank(), 
    legend.position = "none")

p2 <- plot_bar(NOB.prop, "Genus", fill = "OTU", facet_grid = type ~ pair)
p2$data$OTU <- factor(p2$data$OTU)
p2$data$Genus <- gsub(p2$data$Genus, pattern = "_etal", replacement = "")
p2 <- p2 + ylab("Relative abundance") + xlab(label = "") + theme(legend.position = "none")

pdf(file = "figs/NOB_influent_sludge.pdf")
grid.arrange(p, p2, nrow = 2)
## Warning: Removed 7 rows containing missing values (position_stack).
## Warning: no non-missing arguments to min; returning Inf
## Warning: no non-missing arguments to max; returning -Inf
## Warning: position_stack requires constant width: output may be incorrect
dev.off()
## pdf 
##   2
d <- select(p$data, OTU, pair, type, Abundance) %.% filter(Abundance > 0.1)
mutate(d, k = df.netgrowth$k[OTU])
## Error: binding not found: 'df.netgrowth'
d_group <- group_by(d, OTU)
dcast(d.group, OTU ~ type + pair, value.var = "Abundance", fun.aggregate = sum)
## Error: object 'd.group' not found