From e238ffe709d34dd1203359b17dfe77c0d2e2c0cf Mon Sep 17 00:00:00 2001 From: <> Date: Thu, 25 Apr 2024 11:03:03 +0000 Subject: [PATCH] Deployed cade16e with MkDocs version: 1.6.0 --- .nojekyll | 0 404.html | 815 ++ analyse/index.html | 1128 +++ assets/_mkdocstrings.css | 0 assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.dd8806f2.min.js | 29 + assets/javascripts/bundle.dd8806f2.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.b8dbb3d2.min.js | 42 + .../workers/search.b8dbb3d2.min.js.map | 7 + assets/stylesheets/main.66ac8b77.min.css | 1 + assets/stylesheets/main.66ac8b77.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + climb-tre.pdf | Bin 0 -> 265055 bytes common/index.html | 1010 +++ index.html | 863 +++ mscape-analysis/index.html | 1298 ++++ mscape-examples/index.html | 1015 +++ mscape/index.html | 1226 +++ onyx_client_installation_guide/index.html | 892 +++ pathsafe-analysis/index.html | 1084 +++ pathsafe/index.html | 1153 +++ search/search_index.json | 1 + sitemap.xml | 53 + sitemap.xml.gz | Bin 0 -> 291 bytes upload/index.html | 1062 +++ zymo-comparison.png | Bin 0 -> 39096 bytes 61 files changed, 18890 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 analyse/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.dd8806f2.min.js create mode 100644 assets/javascripts/bundle.dd8806f2.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js.map create mode 100644 assets/stylesheets/main.66ac8b77.min.css create mode 100644 assets/stylesheets/main.66ac8b77.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 climb-tre.pdf create mode 100644 common/index.html create mode 100644 index.html create mode 100644 mscape-analysis/index.html create mode 100644 mscape-examples/index.html create mode 100644 mscape/index.html create mode 100644 onyx_client_installation_guide/index.html create mode 100644 pathsafe-analysis/index.html create mode 100644 pathsafe/index.html create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 upload/index.html create mode 100644 zymo-comparison.png diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..e826e70 --- /dev/null +++ b/404.html @@ -0,0 +1,815 @@ + + + +
+ + + + + + + + + + + + + + + + +Once data and metadata have been ingested into the Onyx database, you
+can query it using the Onyx client, which provides a command line interface (CLI)
+and Python API. This short example
+demonstrates a few principal functions. More are described in the
+onyx-client
documentation.
This guide also assumes that you're using a Notebook Server on CLIMB, +so that once installed, the Onyx client will automatically be configured.
+First, let's install the Onyx client, which is available through the
+conda-forge package
+climb-onyx-client
and can thus be installed
+with conda
. As advised in the CLIMB docs on installing
+software,
+you should install the client in a new Conda environment.
+I'll name my environment onyx
and install climb-onyx-client
, as well as ipykernel
(so that the client is available in my Jupyter Notebooks).
+
mscape
listed.
+As an example task, we'll see if we can find any sequencing data performed +for ZymoBIOMICS sources. These are designed with +a particular specification +of DNA from eight bacteria and two yeasts. We can use these to see if our protocol +correctly recovers the DNA fractions. I.e. if our protocol is biased.
+From the command line, the main route to querying Onyx is via the filter
command.
+On its own, this queries the database with no filters. The command
+
(onyx) jovyan:~$ onyx fields mscape
+...
+├────────────────────────────────┼──────────┼───────────────────┼──────────────────────────────────────────────────────────────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
+│ extraction_enrichment_protocol │ optional │ text │ Details of nucleic acid extraction and optional enrichment steps. │ │
+├────────────────────────────────┼──────────┼───────────────────┼──────────────────────────────────────────────────────────────────────────────┼─────────────────────────────────────────────────────────────────────────────┤
+...
+
zymo
(case-insensitive) in this field.
+
+That should return JSON data for a few entries. You may wish to format the
+data as CSV or TSV with --format csv
or --format tsv
, respectively.
+When data is ingested into Onyx, a taxonomic classification is automatically run.
+The last part of the JSON data is usually some of this, in JSON format.
+The complete reports can be found in the S3 buckets given in the
+'taxon_report'
field. You can find this in the output you've already produced
+or modify the filter
command to only request them using the --include
flag. e.g.
+
(onyx) jovyan:~$ onyx filter mscape --field extraction_enrichment_protocol.icontains=zymo --include=taxon_reports
+[
+ {
+ "taxon_reports": "s3://mscape-published-taxon-reports/C-FDE50853AD/"
+ },
+ {
+ "taxon_reports": "s3://mscape-published-taxon-reports/C-04F4495068/"
+ }
+]
+
--include
flag e.g.
+(onyx) jovyan:~$ onyx filter mscape --field extraction_enrichment_protocol.icontains=zymo --include climb_id,taxon_reports
+[
+ {
+ "climb_id": "C-FDE50853AD",
+ "taxon_reports": "s3://mscape-published-taxon-reports/C-FDE50853AD/"
+ },
+ {
+ "climb_id": "C-04F4495068",
+ "taxon_reports": "s3://mscape-published-taxon-reports/C-04F4495068/"
+ }
+]
+
--exclude
+flag in the same way.
+Either way, you now have the location of the taxonomy reports. Let's have a look
+with s3cmd
.
+
(onyx) jovyan:~$ s3cmd ls s3://mscape-published-taxon-reports/C-FDE50853AD/
+2023-11-10 12:56 146K s3://mscape-published-taxon-reports/C-FDE50853AD/PlusPF.kraken.json
+2023-11-10 12:56 2G s3://mscape-published-taxon-reports/C-FDE50853AD/PlusPF.kraken_assignments.tsv
+2023-11-10 12:56 193K s3://mscape-published-taxon-reports/C-FDE50853AD/PlusPF.kraken_report.txt
+
s3cmd
:
+(onyx) jovyan:~$ s3cmd get s3://mscape-published-taxon-reports/C-FDE50853AD/PlusPF.kraken_report.txt
+download: 's3://mscape-published-taxon-reports/C-FDE50853AD/PlusPF.kraken_report.txt' -> './PlusPF.kraken_report.txt' [1 of 1]
+ 197750 of 197750 100% in 0s 3.79 MB/s done
+
If you've never seen one of these reports before, it's worth having a
+quick look with a tool like less
or by opening it using the
+JupyterLab file browser. For reference, it's worth showing the header
+
(onyx) jovyan:~$ head -n 1 PlusPF.kraken_report.txt
+% of Seqs Clades Taxonomies Rank Taxonomy ID Scientific Name
+
(onyx) jovyan:~$ grep "Bacillus subtilis" PlusPF.kraken_report.txt
+ 20.30 435278 1452 G1 653685 Bacillus subtilis group
+ 0.12 2624 1952 S 1423 Bacillus subtilis
+ 0.03 565 242 S1 135461 Bacillus subtilis subsp. subtilis
+ 0.01 108 108 S2 1404258 Bacillus subtilis subsp. subtilis str. OH 131.1
+ ...
+
An important detail here is that the fraction reported in this output +is not calculated in the same way as what's used in the reference values (12% for bacteria; 2% for yeasts). +Let's make a fairer comparison using the JSON taxonomic data.
+To fairly compare the taxonomic data with the reference values in the +Zymo community, we need to know the proportions of gDNA, so we need to +compute the number of base pairs that were assigned to each taxon. +Let's make this comparison in Python using the Onyx client's Python +API.
+Let's first run the same query for the Zymo data. We'll follow the +examples in the Onyx documentation and run the query in a context +manager. +
import os
+from onyx import OnyxConfig, OnyxEnv, OnyxClient
+
+config = OnyxConfig(
+ domain=os.environ[OnyxEnv.DOMAIN],
+ token=os.environ[OnyxEnv.TOKEN],
+)
+
+with OnyxClient(config) as client:
+ records = list(client.filter(
+ "mscape",
+ fields={
+ "extraction_enrichment_protocol__icontains": "zymo",
+ },
+ ))
+
filter
call in a list
because otherwise
+we get a generator.
+If you want to inspect the data, it's a bit easier to read if formatted with
+indentation, which can be done using the standard json.dumps
function:
+
'taxa_files'
key gives us a list of dictionaries
+that each has a number of reads and a mean length, the product of
+which is the total number of base pairs that were read for that
+taxon. A simple first step is to convert the taxonomic data (for the first record)
+into a Pandas DataFrame with
+
+We also need to drop a few lower-level taxa that are already
+accounted for in higher ones. e.g. the reads for Bacillus spizizenii TU-B-10 are
+among the reads counted for Bacillus spizizenii. A quick way of doing this
+is by selecting the rows that have only two words in their names.
+
+Now, let's add columns for the total number of base pairs associated with
+each taxon and what proportion that is of the total.
+
+Finally, let's make a rough plot with a black dashed line at 12%.
+import matplotlib.pyplot as plt
+
+plt.plot(df['human_readable'], df['proportion']*100, 'o')
+plt.axhline(12, c='k', ls='--');
+plt.xticks(rotation=22.5, ha='right');
+
There are some clear discrepancies—Pseudomonas aeruginosa is +underreported and Bacillus spizizenii is overreported—but this +matches results by e.g. Nicholls et +al. (2019).
+This short example is intended as a basic demonstration of what's +possible in CLIMB-TRE. We're always interested to hear more examples +of research questions that CLIMB-TRE can answer, so let us know if you +have an example that could be included as a guide for others.
+Last modified 2024-04-25 12:02:14+01:00 (cade16e)
+ + + + + + + + + + + + + +