-
Notifications
You must be signed in to change notification settings - Fork 1
/
cite.bib
72 lines (68 loc) · 9.26 KB
/
cite.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
@article{scrapp,
author = {Barbera, Pierre and Czech, Lucas and Lutteropp, Sarah and Stamatakis, Alexandros},
title = "{SCRAPP: A tool to assess the diversity of microbial samples from phylogenetic placements}",
journal = {Molecular Ecology Resources},
volume = {n/a},
number = {n/a},
pages = {},
keywords = {diversity, microbiome, phylogenetic placement, species delimitation},
doi = {10.1111/1755-0998.13255},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/1755-0998.13255},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1111/1755-0998.13255},
abstract = {Abstract Microbial ecology research is currently driven by the continuously decreasing cost of DNA sequencing and the improving accuracy of data analysis methods. One such analysis method is phylogenetic placement, which establishes the phylogenetic identity of the anonymous environmental sequences in a sample by means of a given phylogenetic reference tree. However, assessing the diversity of a sample remains challenging, as traditional methods do not scale well with the increasing data volumes and/or do not leverage the phylogenetic placement information. Here, we present scrapp, a highly parallel and scalable tool that uses a molecular species delimitation algorithm to quantify the diversity distribution over the reference phylogeny for a given phylogenetic placement of the sample. scrapp employs a novel approach to cluster phylogenetic placements, called placement space clustering, to efficiently perform dimensionality reduction, so as to scale on large data volumes. Furthermore, it uses the phylogeny-aware molecular species delimitation method mPTP to quantify diversity. We evaluated scrapp using both, simulated and empirical data sets. We use simulated data to verify our approach. Tests on an empirical data set show that scrapp-derived metrics can classify samples by their diversity-correlated features equally well or better than existing, commonly used approaches. scrapp is available at https://github.com/pbdas/scrapp.}
}
@article{mptp,
author = {Kapli, P and Lutteropp, S and Zhang, J and Kobert, K and Pavlidis, P and Stamatakis, A and Flouri, T},
title = "{Multi-rate Poisson tree processes for single-locus species delimitation under maximum likelihood and Markov chain Monte Carlo}",
journal = {Bioinformatics},
volume = {33},
number = {11},
pages = {1630-1638},
year = {2017},
month = {01},
abstract = "{In recent years, molecular species delimitation has become a routine approach for quantifying and classifying biodiversity. Barcoding methods are of particular importance in large-scale surveys as they promote fast species discovery and biodiversity estimates. Among those, distance-based methods are the most common choice as they scale well with large datasets; however, they are sensitive to similarity threshold parameters and they ignore evolutionary relationships. The recently introduced “Poisson Tree Processes” (PTP) method is a phylogeny-aware approach that does not rely on such thresholds. Yet, two weaknesses of PTP impact its accuracy and practicality when applied to large datasets; it does not account for divergent intraspecific variation and is slow for a large number of sequences.We introduce the multi-rate PTP (mPTP), an improved method that alleviates the theoretical and technical shortcomings of PTP. It incorporates different levels of intraspecific genetic diversity deriving from differences in either the evolutionary history or sampling of each species. Results on empirical data suggest that mPTP is superior to PTP and popular distance-based methods as it, consistently yields more accurate delimitations with respect to the taxonomy (i.e., identifies more taxonomic species, infers species numbers closer to the taxonomy). Moreover, mPTP does not require any similarity threshold as input. The novel dynamic programming algorithm attains a speedup of at least five orders of magnitude compared to PTP, allowing it to delimit species in large (meta-) barcoding data. In addition, Markov Chain Monte Carlo sampling provides a comprehensive evaluation of the inferred delimitation in just a few seconds for millions of steps, independently of tree size.mPTP is implemented in C and is available for download at http://github.com/Pas-Kapli/mptp under the GNU Affero 3 license. A web-service is available at http://mptp.h-its.org.Supplementary data are available at Bioinformatics online.}",
issn = {1367-4803},
doi = {10.1093/bioinformatics/btx025},
url = {https://doi.org/10.1093/bioinformatics/btx025},
eprint = {http://oup.prod.sis.lan/bioinformatics/article-pdf/33/11/1630/25153573/btx025.pdf},
}
@article{pargenes,
author = {Morel, Benoit and Kozlov, Alexey M and Stamatakis, Alexandros},
title = {{ParGenes: a tool for massively parallel model selection and phylogenetic tree inference on thousands of genes}},
journal = {Bioinformatics},
volume = {35},
number = {10},
pages = {1771-1773},
year = {2018},
month = {10},
abstract = "{Coalescent- and reconciliation-based methods are now widely used to infer species phylogenies from genomic data. They typically use per-gene phylogenies as input, which requires conducting multiple individual tree inferences on a large set of multiple sequence alignments (MSAs). At present, no easy-to-use parallel tool for this task exists. Ad hoc scripts for this purpose do not only induce additional implementation overhead, but can also lead to poor resource utilization and long times-to-solution. We present ParGenes, a tool for simultaneously determining the best-fit model and inferring maximum likelihood (ML) phylogenies on thousands of independent MSAs using supercomputers.ParGenes executes common phylogenetic pipeline steps such as model-testing, ML inference(s), bootstrapping and computation of branch support values via a single parallel program invocation. We evaluated ParGenes by inferring \\> 20 000 phylogenetic gene trees with bootstrap support values from Ensembl Compara and VectorBase alignments in 28 h on a cluster with 1024 nodes.GNU GPL at https://github.com/BenoitMorel/ParGenes.Supplementary material is available at Bioinformatics online.}",
issn = {1367-4803},
doi = {10.1093/bioinformatics/bty839},
url = {https://doi.org/10.1093/bioinformatics/bty839},
eprint = {https://academic.oup.com/bioinformatics/article-pdf/35/10/1771/28604612/bty839.pdf},
}
@article{raxmlng,
author = {Kozlov, Alexey M and Darriba, Diego and Flouri, Tomáš and Morel, Benoit and Stamatakis, Alexandros},
title = "{RAxML-NG: A fast, scalable, and user-friendly tool for maximum likelihood phylogenetic inference}",
journal = {Bioinformatics},
year = {2019},
month = {05},
abstract = "{Phylogenies are important for fundamental biological research, but also have numerous applications in biotechnology, agriculture, and medicine. Finding the optimal tree under the popular maximum likelihood (ML) criterion is known to be NP-hard. Thus, highly optimized and scalable codes are needed to analyze constantly growing empirical datasets.We present RAxML-NG, a from scratch re-implementation of the established greedy tree search algorithm of RAxML/ExaML. RAxML-NG offers improved accuracy, flexibility, speed, scalability, and usability compared to RAxML/ExaML. On taxon-rich datasets, RAxML-NG typically finds higher-scoring trees than IQTree, an increasingly popular recent tool for ML-based phylogenetic inference (although IQ-Tree shows better stability). Finally, RAxML-NG introduces several new features, such as the detection of terraces in tree space and a the recently introduced transfer bootstrap support metric.The code is available under GNU GPL at https://github.com/amkozlov/raxml-ng. RAxML-NG web service (maintained by Vital-IT) is available at https://raxml-ng.vital-it.ch/.Supplementary data are available at Bioinformatics online.}",
issn = {1367-4803},
doi = {10.1093/bioinformatics/btz305},
url = {https://doi.org/10.1093/bioinformatics/btz305},
eprint = {http://oup.prod.sis.lan/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btz305/28574204/btz305.pdf},
}
@article{genesisgappa,
author = {Czech, Lucas and Barbera, Pierre and Stamatakis, Alexandros},
title = {{Genesis and Gappa: processing, analyzing and visualizing phylogenetic (placement) data}},
journal = {Bioinformatics},
year = {2020},
month = {02},
abstract = "{We present genesis, a library for working with phylogenetic data, and gappa, an accompanying command-line tool for conducting typical analyses on such data. The tools target phylogenetic trees and phylogenetic placements, sequences, taxonomies and other relevant data types, offer high-level simplicity as well as low-level customizability, and are computationally efficient, well-tested and field-proven.Both genesis and gappa are written in modern C++11, and are freely available under GPLv3 at http://github.com/lczech/genesis and http://github.com/lczech/gappa.Supplementary data are available at Bioinformatics online.}",
issn = {1367-4803},
doi = {10.1093/bioinformatics/btaa070},
url = {https://doi.org/10.1093/bioinformatics/btaa070},
note = {btaa070},
eprint = {https://academic.oup.com/bioinformatics/advance-article-pdf/doi/10.1093/bioinformatics/btaa070/32522527/btaa070.pdf},
}