From 8dfeccbf1369e5257f3ca0cea51c5b44885b7039 Mon Sep 17 00:00:00 2001
From: Michal Stolarczyk <stolarczyk.michal93@gmail.com>
Date: Mon, 31 Jul 2023 09:53:44 -0400
Subject: [PATCH] update api usage docs

---
 docs/API_usage.md | 612 +++++++++-------------------------------------
 1 file changed, 114 insertions(+), 498 deletions(-)

diff --git a/docs/API_usage.md b/docs/API_usage.md
index 1b50e3a..125f7a2 100644
--- a/docs/API_usage.md
+++ b/docs/API_usage.md
@@ -93,7 +93,7 @@ print(response)
 
 ```
 
-    {'MetricDataResults': [{'Id': 'nephele', 'Label': 'mem_used', 'Timestamps': [datetime.datetime(2023, 7, 19, 16, 45, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 44, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 43, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 42, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 41, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 40, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 39, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 38, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 37, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 36, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 35, tzinfo=tzutc()), datetime.datetime(2023, 7, 19, 16, 34, tzinfo=tzutc())], 'Values': [486064128.0, 485814272.0, 4685066240.0, 6207594496.0, 4992217088.0, 4720185344.0, 2435854336.0, 2444738560.0, 2400739328.0, 11007488000.0, 2191474688.0, 576376832.0], 'StatusCode': 'Complete'}], 'Messages': [], 'ResponseMetadata': {'RequestId': 'bbc8d20e-7879-447e-87e5-019b3769220f', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'bbc8d20e-7879-447e-87e5-019b3769220f', 'content-type': 'text/xml', 'content-length': '1596', 'date': 'Wed, 19 Jul 2023 18:35:02 GMT'}, 'RetryAttempts': 0}}
+    {'MetricDataResults': [{'Id': 'nephele', 'Label': 'mem_used', 'Timestamps': [], 'Values': [], 'StatusCode': 'Complete'}], 'Messages': [], 'ResponseMetadata': {'RequestId': '6f543152-1547-4c3f-a1ec-8d904e28dba2', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '6f543152-1547-4c3f-a1ec-8d904e28dba2', 'content-type': 'text/xml', 'content-length': '496', 'date': 'Mon, 31 Jul 2023 13:51:13 GMT'}, 'RetryAttempts': 0}}
 
 
 
@@ -122,12 +122,6 @@ mw.save_metric_json(file_path=f"/tmp/{instance_id}_metric.json", query_kwargs=qu
 mw.save_response_json(file_path=f"/tmp/{instance_id}_response.json", query_kwargs=query_kwargs)
 ```
 
-
-    
-![png](API_usage_files/API_usage_10_0.png)
-    
-
-
 ### Manual EC2 querying
 
 For users that require more control over the EC2 instance query settings, the `query_ec2_metrics` method can be used to manually query the EC2 instance. For instance it allows to fine tune the query period settings.
@@ -135,7 +129,7 @@ For users that require more control over the EC2 instance query settings, the `q
 
 ```python
 FINE_TUNED_SETTINGS = {
-    "days": 5,
+    "days": 7,
     "hours": 0,
     "minutes": 0,
     "stat": "Maximum",
@@ -149,12 +143,12 @@ response["ResponseMetadata"]
 
 
 
-    {'RequestId': '256d262e-488a-42bc-9197-2fcd4df82e98',
+    {'RequestId': 'a59814d4-5445-4cb8-b539-9efb7d65716f',
      'HTTPStatusCode': 200,
-     'HTTPHeaders': {'x-amzn-requestid': '256d262e-488a-42bc-9197-2fcd4df82e98',
+     'HTTPHeaders': {'x-amzn-requestid': 'a59814d4-5445-4cb8-b539-9efb7d65716f',
       'content-type': 'text/xml',
-      'content-length': '1596',
-      'date': 'Wed, 19 Jul 2023 18:35:04 GMT'},
+      'content-length': '4418',
+      'date': 'Mon, 31 Jul 2023 13:51:13 GMT'},
      'RetryAttempts': 0}
 
 
@@ -177,15 +171,15 @@ timed_metric.values[1:10]
 
 
 
-    [485814272.0,
-     4685066240.0,
-     6207594496.0,
-     4992217088.0,
-     4720185344.0,
-     2435854336.0,
-     2444738560.0,
-     2400739328.0,
-     11007488000.0]
+    [1051193344.0,
+     22160080896.0,
+     29538459648.0,
+     29531140096.0,
+     17124524032.0,
+     29451448320.0,
+     17050480640.0,
+     29373624320.0,
+     29358415872.0]
 
 
 
@@ -205,13 +199,22 @@ from dotenv import load_dotenv
 import os
 
 load_dotenv()
+x=os.environ.get("LOG_GROUP_NAME")
+y=os.environ.get("LOG_STREAM_NAME")
+print(f"LOG_GROUP_NAME: {x}")
+print(f"LOG_STREAM_NAME: {y}")
 
 lw = LogWatcher(
     log_group_name=os.environ.get("LOG_GROUP_NAME"),
     log_stream_name=os.environ.get("LOG_STREAM_NAME"),
 )
+
 ```
 
+    LOG_GROUP_NAME: main-NepheleWorker
+    LOG_STREAM_NAME: i-05cec4924aadbd516-job.log
+
+
 Importantly, you can also provide the start token for the log, which will be used to determine the starting point of the log query.
 
 ### Log streaming
@@ -232,7 +235,7 @@ next(streamer)
 
 
 
-    LogEventsList(events=[LogEvent(message='[2023-07-19 12:34:48,735 - INFO] Nephele, developed by BCBB/OCICB/NIAID/NIH version: 2.27.1, tag: Nephele_2023_July_19, commit: dce18e5', timestamp=datetime.datetime(2023, 7, 19, 12, 34, 48, 833000)), LogEvent(message='[2023-07-19 12:34:48,736 - INFO] Python version: 3.9.2', timestamp=datetime.datetime(2023, 7, 19, 12, 34, 48, 833000))], next_forward_token='f/37683453325894048129959718411585392236426944928309968897/s', next_backward_token='b/37683453325894048129959718411585392236426944928309968896/s')
+    LogEventsList(events=[LogEvent(message='[2023-07-25 12:58:13,421 - INFO] Nephele, developed by BCBB/OCICB/NIAID/NIH version: 2.27.1, tag: Nephele_2023_July_19, commit: 0b87cad', timestamp=datetime.datetime(2023, 7, 25, 12, 58, 14, 403000)), LogEvent(message='[2023-07-25 12:58:13,421 - INFO] Python version: 3.7.3', timestamp=datetime.datetime(2023, 7, 25, 12, 58, 14, 403000))], next_forward_token='f/37695045377463395103684887982714400219916480157627908097/s', next_backward_token='b/37695045377463395103684887982714400219916480157627908096/s')
 
 
 
@@ -250,480 +253,93 @@ print(formatted_logs)
 
 ```
 
-    [19-07-2023 12:34:48 UTC] Nephele, developed by BCBB/OCICB/NIAID/NIH version: 2.27.1, tag: Nephele_2023_July_19, commit: dce18e5
-    [19-07-2023 12:34:48 UTC] Python version: 3.9.2
-    [19-07-2023 12:34:48 UTC] Current time: 2023-07-19 12:34
-    [19-07-2023 12:34:49 UTC] Pipeline name: DADA2
-    [19-07-2023 12:34:49 UTC] Job Description:
-    [19-07-2023 12:34:49 UTC] Job parameters
-    [19-07-2023 12:34:49 UTC] job_id: b73de8bfdd22
-    [19-07-2023 12:34:49 UTC] inputs_dir: None
-    [19-07-2023 12:34:49 UTC] outputs_dir: None
-    [19-07-2023 12:34:49 UTC] map_file: <_io.TextIOWrapper name='/nephele_data/inputs/N2_16S_example_mapping_file_3_corrected.txt' mode='r' encoding='UTF-8'>
-    [19-07-2023 12:34:49 UTC] data_type: PE
-    [19-07-2023 12:34:49 UTC] wurlitzer_stdout: file
-    [19-07-2023 12:34:49 UTC] wurlitzer_stderr: file
-    [19-07-2023 12:34:49 UTC] ion_torrent: False
-    [19-07-2023 12:34:49 UTC] trimleft_fwd: 0
-    [19-07-2023 12:34:49 UTC] trimleft_rev: 0
-    [19-07-2023 12:34:49 UTC] maxee: 5
-    [19-07-2023 12:34:49 UTC] trunclen_fwd: 0
-    [19-07-2023 12:34:49 UTC] trunclen_rev: 0
-    [19-07-2023 12:34:49 UTC] truncq: 4
-    [19-07-2023 12:34:49 UTC] just_concatenate: False
-    [19-07-2023 12:34:49 UTC] maxmismatch: 0
-    [19-07-2023 12:34:49 UTC] trim_overhang: False
-    [19-07-2023 12:34:49 UTC] chimera: True
-    [19-07-2023 12:34:49 UTC] ref_db: sv138.1
-    [19-07-2023 12:34:49 UTC] taxmethod: rdp
-    [19-07-2023 12:34:49 UTC] sampling_depth: None
-    [19-07-2023 12:34:49 UTC] pseudopool: False
-    [19-07-2023 12:34:49 UTC] minboot: 80
-    [19-07-2023 12:34:49 UTC] allowmultiplespecies: False
-    [19-07-2023 12:34:49 UTC] Results manager initialized. Results registry path: /mnt/EFS/user_uploads/b73de8bfdd22/outputs/b73de8bfdd22_results_registry.json
-    [19-07-2023 12:34:49 UTC] Checking Mapfile for Gzipped inputs.
-    [19-07-2023 12:34:49 UTC] Gzipped files listed in map file, attempting to rm .gz extension.
-    [19-07-2023 12:34:51 UTC] Done. Attempting file decompression.
-    [19-07-2023 12:34:51 UTC] Finished decompression.
-    [19-07-2023 12:34:55 UTC] Skipping FASTQ file validation
-    [19-07-2023 12:35:04 UTC] Reference DB (sv138.1) checksum: 6b41db7139834c71171f8ce5b5918fc6
-    [19-07-2023 12:35:05 UTC] Taxonomy assignemnt DB checksum: f21c2d97c79ff07c17949a9622371a4c
-    [19-07-2023 12:35:05 UTC] Running dada2nephele.R with command:
-    [19-07-2023 12:35:09 UTC] Rscript /usr/local/src/nephele2/pipelines/DADA2/dada2nephele/R/dada2nephele.R  --datadir /nephele_data/inputs/ --outdir /nephele_data/outputs/ --mapfile /nephele_data/outputs/N2_16S_example_mapping_file_3_corrected.txt.no_gz --logfilename /var/log/job.log --nthread 12 --maxEE 5 --truncQ 4 --maxMismatch 0 --chimera --data_type PE --minBoot 80 --no_MultipleSpecies --trimLeft_R1 0 --trimLeft_R2 0 --truncLen_R1 0 --truncLen_R2 0 --taxmethod rdp --refdb /mnt/EFS/dbs/dada2_silva_v138.1/silva_nr99_v138.1_train_set.fa.gz --refdb_species /mnt/EFS/dbs/dada2_silva_v138.1/silva_species_assignment_v138.1.fa.gz
-    [19-07-2023 12:35:16 UTC] R version 4.3.1 (2023-06-16)
-    [19-07-2023 12:35:16 UTC] Platform: x86_64-pc-linux-gnu (64-bit)
-    [19-07-2023 12:35:16 UTC] Running under: Debian GNU/Linux 11 (bullseye)
-    [19-07-2023 12:35:16 UTC] Matrix products: default
-    [19-07-2023 12:35:16 UTC] BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
-    [19-07-2023 12:35:16 UTC] LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
-    [19-07-2023 12:35:16 UTC] locale:
-     [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
-     [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
-     [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C
-    [19-07-2023 12:35:16 UTC] [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
-    [19-07-2023 12:35:16 UTC] time zone: America/New_York
-    [19-07-2023 12:35:16 UTC] tzcode source: system (glibc)
-    [19-07-2023 12:35:16 UTC] attached base packages:
-    [19-07-2023 12:35:16 UTC] [1] stats     graphics  grDevices utils     datasets  methods   base
-    [19-07-2023 12:35:16 UTC] other attached packages:
-    [19-07-2023 12:35:16 UTC] [1] dada2_1.28.0 Rcpp_1.0.11  docopt_0.7.1
-    [19-07-2023 12:35:16 UTC] loaded via a namespace (and not attached):
-     [1] utf8_1.2.3                  generics_0.1.3             
-     [3] bitops_1.0-7                stringi_1.7.12             
-     [5] jpeg_0.1-10                 lattice_0.20-45            
-     [7] magrittr_2.0.3              grid_4.3.1                 
-     [9] RColorBrewer_1.1-3          iterators_1.0.14
-    [19-07-2023 12:35:16 UTC] [11] foreach_1.5.2               plyr_1.8.8
-    [19-07-2023 12:35:16 UTC] [13] Matrix_1.5-3                GenomeInfoDb_1.36.1
-    [19-07-2023 12:35:16 UTC] [15] fansi_1.0.4                 scales_1.2.1
-    [19-07-2023 12:35:16 UTC] [17] Biostrings_2.68.1           codetools_0.2-19
-    [19-07-2023 12:35:16 UTC] [19] cli_3.6.1                   ShortRead_1.58.0
-    [19-07-2023 12:35:16 UTC] [21] rlang_1.1.1                 crayon_1.5.2
-    [19-07-2023 12:35:16 UTC] [23] XVector_0.40.0              Biobase_2.60.0
-    [19-07-2023 12:35:16 UTC] [25] munsell_0.5.0               DelayedArray_0.26.6
-    [19-07-2023 12:35:16 UTC] [27] S4Arrays_1.0.4              tools_4.3.1
-    [19-07-2023 12:35:16 UTC] [29] parallel_4.3.1              reshape2_1.4.4
-    [19-07-2023 12:35:16 UTC] [31] deldir_1.0-9                BiocParallel_1.34.2
-    [19-07-2023 12:35:16 UTC] [33] dplyr_1.1.2                 interp_1.1-4
-    [19-07-2023 12:35:16 UTC] [35] colorspace_2.1-0            ggplot2_3.4.2
-    [19-07-2023 12:35:16 UTC] [37] GenomeInfoDbData_1.2.10     Rsamtools_2.16.0
-    [19-07-2023 12:35:16 UTC] [39] hwriter_1.3.2.1             SummarizedExperiment_1.30.2
-    [19-07-2023 12:35:16 UTC] [41] BiocGenerics_0.46.0         png_0.1-8
-    [19-07-2023 12:35:16 UTC] [43] vctrs_0.6.3                 R6_2.5.1
-    [19-07-2023 12:35:16 UTC] [45] matrixStats_1.0.0           stats4_4.3.1
-    [19-07-2023 12:35:16 UTC] [47] lifecycle_1.0.3             stringr_1.5.0
-    [19-07-2023 12:35:16 UTC] [49] zlibbioc_1.46.0             S4Vectors_0.38.1
-    [19-07-2023 12:35:16 UTC] [51] IRanges_2.34.1              pkgconfig_2.0.3
-    [19-07-2023 12:35:16 UTC] [53] RcppParallel_5.1.7          pillar_1.9.0
-    [19-07-2023 12:35:16 UTC] [55] gtable_0.3.3                glue_1.6.2
-    [19-07-2023 12:35:16 UTC] [57] tibble_3.2.1                GenomicAlignments_1.36.0
-    [19-07-2023 12:35:16 UTC] [59] GenomicRanges_1.52.0        tidyselect_1.2.0
-    [19-07-2023 12:35:16 UTC] [61] MatrixGenerics_1.12.2       latticeExtra_0.6-30
-    [19-07-2023 12:35:16 UTC] [63] compiler_4.3.1              import_1.3.0
-    [19-07-2023 12:35:16 UTC] [65] RCurl_1.98-1.12
-    [19-07-2023 12:35:16 UTC] Taxonomic Reference Database
-    [19-07-2023 12:35:16 UTC] /mnt/EFS/dbs/dada2_silva_v138.1/silva_nr99_v138.1_train_set.fa.gz
-    [19-07-2023 12:35:16 UTC] /mnt/EFS/dbs/dada2_silva_v138.1/silva_species_assignment_v138.1.fa.gz
-    [19-07-2023 12:35:16 UTC] Reading in map file  /nephele_data/outputs/N2_16S_example_mapping_file_3_corrected.txt.no_gz
-    [19-07-2023 12:35:16 UTC] Printing dada algorithm options.
-     [1]           16        FALSE           -8         TRUE         TRUE
-     [6]                      0.42            5            0           10
-    [19-07-2023 12:35:16 UTC] [11]            1            1            1           -4 0.000000....
-    [19-07-2023 12:35:16 UTC] [16] 0.000000....       0.0001          Inf            2            2
-    [19-07-2023 12:35:16 UTC] [21]         TRUE         TRUE         TRUE
-    [19-07-2023 12:35:16 UTC] Paired End
-    [19-07-2023 12:35:20 UTC] pqp <- lapply(readslist, FUN = function(x) { ppp <- plotQualityProfile(file.path(datadir, x)); ppp$facet$params$ncol <- 4; ppp })
-    [19-07-2023 12:35:53 UTC] Saving quality profile plots to quality_Profile_R*.pdf
-    [19-07-2023 12:35:57 UTC] out <- filterAndTrim(fwd=file.path(datadir,readslist$R1), filt=file.path(filt.dir,trimlist$R1),rev=file.path(datadir,readslist$R2), filt.rev=file.path(filt.dir,trimlist$R2),  maxEE=5, trimLeft=c(0, 0), truncQ=4, truncLen = c(0, 0), rm.phix=TRUE, compress=TRUE, verbose=TRUE, multithread=12, minLen=50)
-    [19-07-2023 12:36:01 UTC] Creating output directory: /nephele_data/outputs/filtered_data
-    [19-07-2023 12:36:03 UTC] reads.in reads.out
-    [19-07-2023 12:36:03 UTC] 22831_S41_R1_subsample.fastq     25000     20511
-    [19-07-2023 12:36:03 UTC] 22833_S45_R1_subsample.fastq     25000     20346
-    [19-07-2023 12:36:03 UTC] 22349_S26_R1_subsample.fastq     25000     20929
-    [19-07-2023 12:36:03 UTC] 22192_S22_R1_subsample.fastq     25000     21446
-    [19-07-2023 12:36:03 UTC] 22187_S19_R1_subsample.fastq     25000     20753
-    [19-07-2023 12:36:03 UTC] 22061_S5_R1_subsample.fastq      25000     20200
-    [19-07-2023 12:36:03 UTC] 22057_S2_R1_subsample.fastq      25000     20969
-    [19-07-2023 12:36:03 UTC] 22145_S14_R1_subsample.fastq     25000     18613
-    [19-07-2023 12:36:03 UTC] 22350_S27_R1_subsample.fastq     25000     19778
-    [19-07-2023 12:36:03 UTC] 23572_S307_R1_subsample.fastq    25000     17656
-    [19-07-2023 12:36:03 UTC] Saved Vega-Lite data to: /nephele_data/outputs/readsInReadsOutVegaJSON.json
-    [19-07-2023 12:36:03 UTC] Checking that trimmed files exist.
-    [19-07-2023 12:36:04 UTC] list2env(checktrimfiles(A, filt.dir, trimlist), envir = environment())
-    [19-07-2023 12:36:08 UTC] err <- lapply(trimlist, function(x) learnErrors(x, multithread=12, nbases=100000000,randomize=FALSE))
-    [19-07-2023 12:36:14 UTC] 52511424 total bases in 201201 reads from 10 samples will be used for learning the error rates.
-    [19-07-2023 12:37:36 UTC] 52366403 total bases in 201201 reads from 10 samples will be used for learning the error rates.
-    [19-07-2023 12:39:31 UTC] pe <- lapply(err, function(x) plotErrors(x, nominalQ=TRUE))
-    [19-07-2023 12:39:31 UTC] Saving 7 x 7 in image
-    [19-07-2023 12:39:32 UTC] Warning: Transformation introduced infinite values in continuous y-axis
-    [19-07-2023 12:39:32 UTC] Saving 7 x 7 in image
-    [19-07-2023 12:39:33 UTC] Warning: Transformation introduced infinite values in continuous y-axis
-    [19-07-2023 12:39:33 UTC] Saving 7 x 7 in image
-    [19-07-2023 12:39:34 UTC] Warning: Transformation introduced infinite values in continuous y-axis
-    [19-07-2023 12:39:34 UTC] Saving 7 x 7 in image
-    [19-07-2023 12:39:35 UTC] Warning: Transformation introduced infinite values in continuous y-axis
-    [19-07-2023 12:39:35 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:39:35 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22831_S41_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:39:35 UTC] Encountered 10011 unique sequences from 20511 total sequences read.
-    [19-07-2023 12:39:36 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22831_S41_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:39:36 UTC] Encountered 16353 unique sequences from 20511 total sequences read.
-    [19-07-2023 12:39:38 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:39:38 UTC] R1: 132 sequence variants were inferred from 10011 input unique sequences. R2: 106 sequence variants were inferred from 16353 input unique sequences.
-    [19-07-2023 12:39:39 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:39:39 UTC] 14245 paired-reads (in 245 unique pairings) successfully merged out of 19342 (in 799 pairings) input.
-    [19-07-2023 12:39:39 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:39:40 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22833_S45_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:39:40 UTC] Encountered 12613 unique sequences from 20346 total sequences read.
-    [19-07-2023 12:39:40 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22833_S45_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:39:40 UTC] Encountered 18675 unique sequences from 20346 total sequences read.
-    [19-07-2023 12:39:44 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:39:44 UTC] R1: 268 sequence variants were inferred from 12613 input unique sequences. R2: 82 sequence variants were inferred from 18675 input unique sequences.
-    [19-07-2023 12:39:44 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:39:44 UTC] 10806 paired-reads (in 98 unique pairings) successfully merged out of 16476 (in 400 pairings) input.
-    [19-07-2023 12:39:44 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:39:44 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22349_S26_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:39:44 UTC] Encountered 11655 unique sequences from 20929 total sequences read.
-    [19-07-2023 12:39:45 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22349_S26_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:39:45 UTC] Encountered 17146 unique sequences from 20929 total sequences read.
-    [19-07-2023 12:39:48 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:39:48 UTC] R1: 175 sequence variants were inferred from 11655 input unique sequences. R2: 95 sequence variants were inferred from 17146 input unique sequences.
-    [19-07-2023 12:39:48 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:39:49 UTC] 12735 paired-reads (in 161 unique pairings) successfully merged out of 18253 (in 738 pairings) input.
-    [19-07-2023 12:39:49 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:39:49 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22192_S22_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:39:49 UTC] Encountered 10687 unique sequences from 21446 total sequences read.
-    [19-07-2023 12:39:49 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22192_S22_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:39:49 UTC] Encountered 16476 unique sequences from 21446 total sequences read.
-    [19-07-2023 12:39:52 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:39:52 UTC] R1: 130 sequence variants were inferred from 10687 input unique sequences. R2: 95 sequence variants were inferred from 16476 input unique sequences.
-    [19-07-2023 12:39:53 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:39:53 UTC] 17599 paired-reads (in 177 unique pairings) successfully merged out of 19561 (in 513 pairings) input.
-    [19-07-2023 12:39:53 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:39:53 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22187_S19_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:39:53 UTC] Encountered 10100 unique sequences from 20753 total sequences read.
-    [19-07-2023 12:39:53 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22187_S19_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:39:53 UTC] Encountered 16901 unique sequences from 20753 total sequences read.
-    [19-07-2023 12:39:56 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:39:56 UTC] R1: 140 sequence variants were inferred from 10100 input unique sequences. R2: 99 sequence variants were inferred from 16901 input unique sequences.
-    [19-07-2023 12:39:56 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:39:56 UTC] 17011 paired-reads (in 183 unique pairings) successfully merged out of 18987 (in 418 pairings) input.
-    [19-07-2023 12:39:56 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:39:56 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22061_S5_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:39:56 UTC] Encountered 11283 unique sequences from 20200 total sequences read.
-    [19-07-2023 12:39:57 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22061_S5_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:39:57 UTC] Encountered 17350 unique sequences from 20200 total sequences read.
-    [19-07-2023 12:39:59 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:39:59 UTC] R1: 186 sequence variants were inferred from 11283 input unique sequences. R2: 83 sequence variants were inferred from 17350 input unique sequences.
-    [19-07-2023 12:39:59 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:39:59 UTC] 14630 paired-reads (in 133 unique pairings) successfully merged out of 17632 (in 343 pairings) input.
-    [19-07-2023 12:39:59 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:40:00 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22057_S2_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:40:00 UTC] Encountered 9268 unique sequences from 20969 total sequences read.
-    [19-07-2023 12:40:00 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22057_S2_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:40:00 UTC] Encountered 15736 unique sequences from 20969 total sequences read.
-    [19-07-2023 12:40:02 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:40:02 UTC] R1: 151 sequence variants were inferred from 9268 input unique sequences. R2: 148 sequence variants were inferred from 15736 input unique sequences.
-    [19-07-2023 12:40:03 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:40:03 UTC] 16865 paired-reads (in 269 unique pairings) successfully merged out of 20052 (in 475 pairings) input.
-    [19-07-2023 12:40:03 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:40:03 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22145_S14_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:40:03 UTC] Encountered 9540 unique sequences from 18613 total sequences read.
-    [19-07-2023 12:40:03 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22145_S14_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:40:03 UTC] Encountered 16815 unique sequences from 18613 total sequences read.
-    [19-07-2023 12:40:05 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:40:05 UTC] R1: 222 sequence variants were inferred from 9540 input unique sequences. R2: 79 sequence variants were inferred from 16815 input unique sequences.
-    [19-07-2023 12:40:06 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:40:06 UTC] 12534 paired-reads (in 144 unique pairings) successfully merged out of 15578 (in 352 pairings) input.
-    [19-07-2023 12:40:06 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:40:07 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22350_S27_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:40:07 UTC] Encountered 12092 unique sequences from 19778 total sequences read.
-    [19-07-2023 12:40:07 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/22350_S27_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:40:07 UTC] Encountered 17357 unique sequences from 19778 total sequences read.
-    [19-07-2023 12:40:10 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:40:10 UTC] R1: 214 sequence variants were inferred from 12092 input unique sequences. R2: 94 sequence variants were inferred from 17357 input unique sequences.
-    [19-07-2023 12:40:10 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:40:10 UTC] 12376 paired-reads (in 128 unique pairings) successfully merged out of 16356 (in 470 pairings) input.
-    [19-07-2023 12:40:10 UTC] derep <- lapply(trimlist, function(x) derepFastq(x[sample], verbose=TRUE))
-    [19-07-2023 12:40:10 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/23572_S307_R1_subsample_trim.fastq.gz
-    [19-07-2023 12:40:10 UTC] Encountered 9005 unique sequences from 17656 total sequences read.
-    [19-07-2023 12:40:11 UTC] Dereplicating sequence entries in Fastq file: /nephele_data/outputs/filtered_data/23572_S307_R2_subsample_trim.fastq.gz
-    [19-07-2023 12:40:11 UTC] Encountered 16088 unique sequences from 17656 total sequences read.
-    [19-07-2023 12:40:12 UTC] dd <- sapply(nameslist, function(x) dada(derep[[x]], err=err[[x]], multithread=12, verbose=F, priors = pseudo_priors[[x]]), USE.NAMES=TRUE, simplify=FALSE)
-    [19-07-2023 12:40:12 UTC] R1: 162 sequence variants were inferred from 9005 input unique sequences. R2: 47 sequence variants were inferred from 16088 input unique sequences.
-    [19-07-2023 12:40:13 UTC] mergePairs(dd$R1, derep$R1, dd$R2, derep$R2, verbose=TRUE, minOverlap=12, trimOverhang=FALSE, maxMismatch=0, justConcatenate=FALSE)
-    [19-07-2023 12:40:13 UTC] 11701 paired-reads (in 85 unique pairings) successfully merged out of 15018 (in 267 pairings) input.
-    [19-07-2023 12:40:13 UTC] seqtab <- makeSequenceTable(sampleVariants$sv)
-    [19-07-2023 12:40:13 UTC] Removing sequences of length less than 75bp
-    [19-07-2023 12:40:13 UTC] seqlengths <- nchar(colnames(seqtab))
-    [19-07-2023 12:40:13 UTC] seqtab <- seqtab[,which(seqlengths >=75), drop=F]
-    [19-07-2023 12:40:13 UTC] saveRDS(seqtab, file.path(interm.dir,"seqtab_min75.rds"))
-    [19-07-2023 12:40:13 UTC] seqtabnochimera <- removeBimeraDenovo(seqtab, verbose=TRUE, multithread=12)
-    [19-07-2023 12:40:13 UTC] Identified 568 bimeras out of 1349 input sequences.
-    [19-07-2023 12:40:13 UTC] % Reads remaining after chimera removal: 71.4808330130532
-    [19-07-2023 12:40:13 UTC] seqtab <- seqtabnochimera
-    [19-07-2023 12:40:13 UTC] Track Reads
-                 denoisedF denoisedR merged filter75 nochim
-    [19-07-2023 12:40:13 UTC] A22831           20093     19494  14245    14245   7921
-    [19-07-2023 12:40:13 UTC] A22833           18688     16924  10806    10806  10054
-    [19-07-2023 12:40:13 UTC] A22349           19980     18528  12735    12735   8165
-    [19-07-2023 12:40:13 UTC] A22192           20688     19840  17599    17599  13051
-    [19-07-2023 12:40:13 UTC] A22187           20173     19243  17011    17011  10509
-    [19-07-2023 12:40:13 UTC] A22061           19115     17917  14630    14630  12260
-    [19-07-2023 12:40:13 UTC] A22057           20673     20147  16865    16865   9998
-    [19-07-2023 12:40:13 UTC] A22145           17757     15789  12534    12534   9239
-    [19-07-2023 12:40:13 UTC] A22350           18350     16709  12376    12376   9838
-    [19-07-2023 12:40:13 UTC] 7pRecSw478.1     16806     15202  11701    11701   9397
-    [19-07-2023 12:40:13 UTC] Saved Vega-Lite data to: /nephele_data/outputs/trackReadsVegaJSON.json
-    [19-07-2023 12:40:13 UTC] rep_seq_names <- dada2fasta(seqtab, filename="/nephele_data/outputs/seq.fasta")
-    [19-07-2023 12:40:13 UTC] rep_seq_names <- make_seq_names(seqtab, nametype)
-    [19-07-2023 12:40:13 UTC] writeFasta(seqs, file="/nephele_data/outputs/seq.fasta")
-    [19-07-2023 12:40:13 UTC] Taxonomic assignment with rdp
-    [19-07-2023 12:40:17 UTC] taxa <- assignTaxonomy(seqtab, refdb, multithread=12, minBoot=80, tryRC=TRUE, verbose=TRUE)
-    [19-07-2023 12:42:09 UTC] Finished processing reference fasta.
-    [19-07-2023 12:42:09 UTC] Species assignment with dada2::addSpecies
-    [19-07-2023 12:42:09 UTC] taxa.genus <- taxa; rm(taxa);
-    [19-07-2023 12:42:13 UTC] taxa <- addSpecies(taxa.genus, refdb_species, verbose=TRUE, tryRC=TRUE, n=4000, allowMultiple =FALSE)
-    [19-07-2023 12:43:28 UTC] 3 out of 781 were assigned to the species level.
-    [19-07-2023 12:43:28 UTC] Of which 2 had genera consistent with the input table.Garbage collection 247 = 164+40+43 (level 2) ...
-    [19-07-2023 12:43:28 UTC] 432.3 Mbytes of cons cells used (56%)
-    [19-07-2023 12:43:28 UTC] 175.0 Mbytes of vectors used (5%)
-    [19-07-2023 12:43:28 UTC] rep_seq_names <- make_seq_names(seqtab, nametype)
-    [19-07-2023 12:43:28 UTC] writeFasta(seqs, file="/nephele_data/outputs/seq.fasta")
-    [19-07-2023 12:43:28 UTC] colnames(seqtab) <- replace_names(colnames(seqtab), rep_seq_names)
-    [19-07-2023 12:43:28 UTC] row.names(taxtab) <- replace_names(row.names(taxtab), rep_seq_names)
-    [19-07-2023 12:43:30 UTC] write_biom(dada2biom(seqtab,taxtab, metadata = metadata), file.path(outdir, "taxa.biom"))
-    [19-07-2023 12:43:30 UTC] dada2text(seqtab, taxtab, file.path(outdir, "OTU_table.txt"))
-    [19-07-2023 12:43:30 UTC] dada2taxonomy(taxtab, file.path(outdir, "taxonomy_table.txt"))
-    [19-07-2023 12:43:30 UTC] Garbage collection 248 = 164+40+44 (level 2) ...
-    [19-07-2023 12:43:30 UTC] 432.1 Mbytes of cons cells used (56%)
-    [19-07-2023 12:43:31 UTC] 111.6 Mbytes of vectors used (4%)
-    [19-07-2023 12:43:31 UTC] Summarizing biom file to /nephele_data/outputs/otu_summary_table.txt.
-    [19-07-2023 12:43:31 UTC] Creating phylogenetic trees
-    [19-07-2023 12:43:33 UTC] Running command: mafft --preservecase --inputorder --thread 12 /nephele_data/outputs/seq.fasta > /nephele_data/outputs/phylo/aligned_seq.fasta
-    [19-07-2023 12:43:37 UTC] Running command: FastTreeMP -quote -nt /nephele_data/outputs/phylo/aligned_seq.fasta > /nephele_data/outputs/phylo/unrooted_tree.nwk
-    [19-07-2023 12:43:39 UTC] Finished creating trees: /nephele_data/outputs/phylo/rooted_tree.nwk, /nephele_data/outputs/phylo/unrooted_tree.nwk
-    [19-07-2023 12:43:39 UTC] Checking output file from dada2 pipeline required by data visualization pipeline.
-    [19-07-2023 12:43:39 UTC] Running data visualization pipeline
-    [19-07-2023 12:43:43 UTC] Running with args: {'datafile': '/nephele_data/outputs/OTU_table.txt', 'outdir': '/nephele_data/outputs/', 'logfilename': '/var/log/job.log', 'sampdepth': 10054, 'mapfile': '/nephele_data/outputs/N2_16S_example_mapping_file_3_corrected.txt.no_gz', 'tsvfile': True}
-    [19-07-2023 12:43:44 UTC] R version 4.3.1 (2023-06-16)
-    [19-07-2023 12:43:44 UTC] Platform: x86_64-pc-linux-gnu (64-bit)
-    [19-07-2023 12:43:44 UTC] Running under: Debian GNU/Linux 11 (bullseye)
-    [19-07-2023 12:43:44 UTC] Matrix products: default
-    [19-07-2023 12:43:44 UTC] BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
-    [19-07-2023 12:43:44 UTC] LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
-    [19-07-2023 12:43:44 UTC] locale:
-     [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
-     [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
-     [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C
-    [19-07-2023 12:43:44 UTC] [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
-    [19-07-2023 12:43:44 UTC] time zone: America/New_York
-    [19-07-2023 12:43:44 UTC] tzcode source: system (glibc)
-    [19-07-2023 12:43:44 UTC] attached base packages:
-    [19-07-2023 12:43:44 UTC] [1] stats     graphics  grDevices utils     datasets  methods   base
-    [19-07-2023 12:43:44 UTC] other attached packages:
-     [1] htmlwidgets_1.6.2 jsonlite_1.8.7    plotly_4.10.2     ampvis2_2.7.4    
-     [5] ggplot2_3.4.2     vegan_2.6-4       lattice_0.20-45   permute_0.9-7    
-     [9] htmltools_0.5.5   morpheus_0.1.1.1
-    [19-07-2023 12:43:44 UTC] loaded via a namespace (and not attached):
-     [1] Matrix_1.5-3       gtable_0.3.3       crayon_1.5.2       dplyr_1.1.2       
-     [5] compiler_4.3.1     tidyselect_1.2.0   Rcpp_1.0.11        stringr_1.5.0     
-     [9] parallel_4.3.1     tidyr_1.3.0        cluster_2.1.3      splines_4.3.1
-    [19-07-2023 12:43:44 UTC] [13] scales_1.2.1       fastmap_1.1.1      plyr_1.8.8         R6_2.5.1
-    [19-07-2023 12:43:44 UTC] [17] generics_0.1.3     MASS_7.3-58.3      ggrepel_0.9.3      tibble_3.2.1
-    [19-07-2023 12:43:44 UTC] [21] munsell_0.5.0      pillar_1.9.0       RColorBrewer_1.1-3 rlang_1.1.1
-    [19-07-2023 12:43:44 UTC] [25] utf8_1.2.3         stringi_1.7.12     lazyeval_0.2.2     viridisLite_0.4.2
-    [19-07-2023 12:43:44 UTC] [29] cli_3.6.1          withr_2.5.0        magrittr_2.0.3     mgcv_1.8-42
-    [19-07-2023 12:43:44 UTC] [33] digest_0.6.33      grid_4.3.1         lifecycle_1.0.3    nlme_3.1-162
-    [19-07-2023 12:43:44 UTC] [37] vctrs_0.6.3        data.table_1.14.8  glue_1.6.2         ape_5.7-1
-    [19-07-2023 12:43:44 UTC] [41] fansi_1.0.4        colorspace_2.1-0   purrr_1.0.1        httr_1.4.6
-    [19-07-2023 12:43:44 UTC] [45] tools_4.3.1        pkgconfig_2.0.3
-    [19-07-2023 12:43:44 UTC] "allgraphs"(datafile="/nephele_data/outputs/OTU_table.txt", outdir="/nephele_data/outputs//graphs", mapfile="/nephele_data/outputs/N2_16S_example_mapping_file_3_corrected.txt.no_gz",tsvfile=TRUE, ...)
-    [19-07-2023 12:43:44 UTC] Reading in map file /nephele_data/outputs/N2_16S_example_mapping_file_3_corrected.txt.no_gz
-    [19-07-2023 12:43:44 UTC] Reading in OTU file /nephele_data/outputs/OTU_table.txt
-    [19-07-2023 12:43:44 UTC] otu <- read.delim(datafile, check.names = FALSE, na.strings = '', row.names = 1)
-    [19-07-2023 12:43:44 UTC] tax <- otu[,!names(otu) %in% map$SampleID]
-    [19-07-2023 12:43:44 UTC] otu <- otu[, names(otu) %in% map$SampleID, drop=F]
-    [19-07-2023 12:43:44 UTC] otu <- cbind(otu, tax)
-    [19-07-2023 12:43:44 UTC] amp <- amp_load(otu, map)
-    [19-07-2023 12:43:45 UTC] Warning: Could not find a column named OTU/ASV in otutable, using rownames as sample ID's
-    [19-07-2023 12:43:45 UTC] ampvis2 object with 3 elements.
-    [19-07-2023 12:43:45 UTC] Summary of OTU table:
-         Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
-              10          781       100432         7921        13051         9918 
-       Avg#Reads 
-         10043.2
-    [19-07-2023 12:43:45 UTC] Assigned taxonomy:
-        Kingdom      Phylum       Class       Order      Family       Genus 
-      781(100%) 771(98.72%) 770(98.59%) 760(97.31%) 702(89.88%) 527(67.48%) 
-        Species 
-       2(0.26%)
-    [19-07-2023 12:43:45 UTC] Metadata variables: 7 
-     SampleID, ForwardFastqFile, ReverseFastqFile, TreatmentGroup, Animal, Day, Description
-    [19-07-2023 12:43:45 UTC] Rarefaction curve
-    [19-07-2023 12:43:45 UTC] rarefactioncurve(outdir = outdir, amp = amp, colors = allcols, pipeline=TRUE)
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 8
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 9
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 4
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 4
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 24
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 9
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 6
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 7
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 2
-    [19-07-2023 12:43:45 UTC] Warning in vegan::rarefy(abund[i, ], n) :
-      most observed count data have counts 1, but smallest count is 30
-    [19-07-2023 12:43:45 UTC] Saving plot to /nephele_data/outputs/graphs/rarecurve.html
-    [19-07-2023 12:43:46 UTC] Warning in plotly::config(pp, cloud = T, edits = list(titleText = T, legendText = T,  :
-      The `cloud` argument is deprecated. Use `showSendToCloud` instead.
-    [19-07-2023 12:43:46 UTC] Saving rarefaction curve table to /nephele_data/outputs//graphs/rarecurve.txt
-    [19-07-2023 12:43:46 UTC] Relative abundance heatmaps
-    [19-07-2023 12:43:46 UTC] morphheatmap(outdir = outdir, amp = amp, colors=allcols, filter_level = 5)
-    [19-07-2023 12:43:46 UTC] Filter taxa below 5 counts/abundance.
-    [19-07-2023 12:43:46 UTC] amp <- filterlowabund(amp, level = 5, abs=T)
-    [19-07-2023 12:43:46 UTC] Calculate relative abundance.
-    [19-07-2023 12:43:46 UTC] amp <- subsetamp(amp, sampdepth = NULL, rarefy=FALSE, normalise = TRUE, printsummary = FALSE)
-    [19-07-2023 12:43:46 UTC] 0 samples have been filtered.
-    [19-07-2023 12:43:46 UTC] makeheatmap("seq", amp)
-    [19-07-2023 12:43:48 UTC] heatmap <- morpheus(mat, columns=columns, columnAnnotations = amptax$metadata, columnColorModel = list(type=as.list(colors)), colorScheme = list(scalingMode="fixed", values=values, colors=hmapcolors, stepped=FALSE), rowAnnotations = amptax$tax, rows = rows, dendrogram="none")
-    [19-07-2023 12:43:48 UTC] Saving plot to /nephele_data/outputs/graphs/seq_heatmap.html
-    [19-07-2023 12:43:48 UTC] Sampling depth: 10054
-    [19-07-2023 12:43:48 UTC] Filter samples below 10054 counts.
-    [19-07-2023 12:43:48 UTC] amp <- amp_subset_samples(amp, minreads = 10054, ...)
-    [19-07-2023 12:43:48 UTC] 6 samples and 448 OTUs have been filtered
-    [19-07-2023 12:43:48 UTC] Before: 10 samples and 781 OTUs
-    [19-07-2023 12:43:48 UTC] After: 4 samples and 333 OTUs
-    [19-07-2023 12:43:48 UTC] Saving excluded sample ids to /nephele_data/outputs//graphs/samples_being_ignored.txt
-    [19-07-2023 12:43:48 UTC] ampvis2 object with 3 elements.
-    [19-07-2023 12:43:48 UTC] Summary of OTU table:
-         Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
-               4          333        45874        10054        13051      11384.5 
-       Avg#Reads 
-         11468.5
-    [19-07-2023 12:43:48 UTC] Assigned taxonomy:
-        Kingdom      Phylum       Class       Order      Family       Genus 
-      333(100%)  330(99.1%)  330(99.1%)  320(96.1%) 296(88.89%) 227(68.17%) 
-        Species 
-        2(0.6%)
-    [19-07-2023 12:43:48 UTC] Metadata variables: 7 
-     SampleID, ForwardFastqFile, ReverseFastqFile, TreatmentGroup, Animal, Day, Description
-    [19-07-2023 12:43:48 UTC] PCoA plot with binomial distance
-    [19-07-2023 12:43:48 UTC] pcoaplot(outdir = outdir, amp = ampsub, distm = "binomial", colors = allcols)
-    [19-07-2023 12:43:49 UTC] pcoa <- amp_ordinate(amp, filter_species =0.1,type="PCOA", distmeasure ="binomial",sample_color_by = "TreatmentGroup", detailed_output = TRUE, transform="none")
-    [19-07-2023 12:43:49 UTC] Saving plot to /nephele_data/outputs/graphs/pcoa_binomial.html
-    [19-07-2023 12:43:49 UTC] Warning in plotly::config(pp, cloud = T, edits = list(titleText = T, legendText = T,  :
-      The `cloud` argument is deprecated. Use `showSendToCloud` instead.
-    [19-07-2023 12:43:49 UTC] Saving binomial PCoA table to /nephele_data/outputs//graphs/pcoa_binomial.txt
-    [19-07-2023 12:43:49 UTC] Making top species table.
-    [19-07-2023 12:43:49 UTC] Saving table to /nephele_data/outputs//graphs/top_85_species_table.txt
-    [19-07-2023 12:43:49 UTC] Rarefying OTU Table to  10054 reads.
-    [19-07-2023 12:43:49 UTC] set.seed(500)
-    [19-07-2023 12:43:49 UTC] otu <- rrarefy(t(amp$abund), sampdepth)
-    [19-07-2023 12:43:49 UTC] Warning in rrarefy(t(amp$abund), sampdepth) :
-      function should be used for observed counts, but smallest count is 4
-    [19-07-2023 12:43:49 UTC] amp <- amp_subset_samples(amp, minreads = 10054, ...)
-    [19-07-2023 12:43:49 UTC] 0 samples have been filtered.
-    [19-07-2023 12:43:49 UTC] ampvis2 object with 3 elements.
-    [19-07-2023 12:43:49 UTC] Summary of OTU table:
-         Samples         OTUs  Total#Reads    Min#Reads    Max#Reads Median#Reads 
-               4          333        40216        10054        10054        10054 
-       Avg#Reads 
-           10054
-    [19-07-2023 12:43:49 UTC] Assigned taxonomy:
-        Kingdom      Phylum       Class       Order      Family       Genus 
-      333(100%)  330(99.1%)  330(99.1%)  320(96.1%) 296(88.89%) 227(68.17%) 
-        Species 
-        2(0.6%)
-    [19-07-2023 12:43:49 UTC] Metadata variables: 7 
-     SampleID, ForwardFastqFile, ReverseFastqFile, TreatmentGroup, Animal, Day, Description
-    [19-07-2023 12:43:49 UTC] Saving rarefied OTU Table to  /nephele_data/outputs//graphs/rarefied_OTU_table_10054.txt
-    [19-07-2023 12:43:49 UTC] Making heatmap from rarefied counts.
-    [19-07-2023 12:43:49 UTC] morphheatmap(outdir = outdir, amp = amprare, colors=allcols, filter_level = 5, filesuffix = "_rarefied")
-    [19-07-2023 12:43:49 UTC] Filter taxa below 5 counts/abundance.
-    [19-07-2023 12:43:49 UTC] amp <- filterlowabund(amp, level = 5, abs=T)
-    [19-07-2023 12:43:49 UTC] Calculate relative abundance.
-    [19-07-2023 12:43:49 UTC] amp <- subsetamp(amp, sampdepth = NULL, rarefy=FALSE, normalise = TRUE, printsummary = FALSE)
-    [19-07-2023 12:43:49 UTC] 0 samples have been filtered.
-    [19-07-2023 12:43:49 UTC] makeheatmap("seq", amp)
-    [19-07-2023 12:43:49 UTC] heatmap <- morpheus(mat, columns=columns, columnAnnotations = amptax$metadata, columnColorModel = list(type=as.list(colors)), colorScheme = list(scalingMode="fixed", values=values, colors=hmapcolors, stepped=FALSE), rowAnnotations = amptax$tax, rows = rows, dendrogram="none")
-    [19-07-2023 12:43:49 UTC] Saving plot to /nephele_data/outputs/graphs/seq_heatmap_rarefied.html
-    [19-07-2023 12:43:49 UTC] Normalizing rarefied OTU table to 100 for Bray-Curtis distance.
-    [19-07-2023 12:43:49 UTC] 0 samples have been filtered.
-    [19-07-2023 12:43:49 UTC] pcoaplot(outdir = outdir, amp = ampbc, distm = "bray", colors = allcols, filesuffix="_rarefied")
-    [19-07-2023 12:43:49 UTC] pcoa <- amp_ordinate(amp, filter_species =0.1,type="PCOA", distmeasure ="bray",sample_color_by = "TreatmentGroup", detailed_output = TRUE, transform="none")
-    [19-07-2023 12:43:49 UTC] Saving plot to /nephele_data/outputs/graphs/pcoa_bray_rarefied.html
-    [19-07-2023 12:43:49 UTC] Warning in plotly::config(pp, cloud = T, edits = list(titleText = T, legendText = T,  :
-      The `cloud` argument is deprecated. Use `showSendToCloud` instead.
-    [19-07-2023 12:43:49 UTC] Saving bray PCoA table to /nephele_data/outputs//graphs/pcoa_bray_rarefied.txt
-    [19-07-2023 12:43:49 UTC] Alpha diversity boxplot
-    [19-07-2023 12:43:49 UTC] adivboxplot(outdir = outdir, amp = amprare, sampdepth = sampdepth, colors = allcols, pipeline=TRUE)
-    [19-07-2023 12:43:49 UTC] alphadiv <- amp_alphadiv(amp, measure="shannon", richness = TRUE, rarefy = 10054)
-    [19-07-2023 12:43:49 UTC] Warning: The data you have provided does not have
-    [19-07-2023 12:43:49 UTC] any singletons. This is highly suspicious. Results of richness
-    [19-07-2023 12:43:49 UTC] estimates (for example) are probably unreliable, or wrong, if you have already
-    [19-07-2023 12:43:49 UTC] trimmed low-abundance taxa from the data.
-    [19-07-2023 12:43:49 UTC] We recommend that you find the un-trimmed data and retry.
-    [19-07-2023 12:43:50 UTC] Saving alpha diversity table to /nephele_data/outputs//graphs/alphadiv.txt
-    [19-07-2023 12:43:50 UTC] Saving plot to /nephele_data/outputs/graphs/alphadiv.html
-    [19-07-2023 12:43:50 UTC] Warning in plotly::config(pp, cloud = T, edits = list(titleText = T, legendText = T,  :
-      The `cloud` argument is deprecated. Use `showSendToCloud` instead.
-    [19-07-2023 12:43:51 UTC] "allgraphs" complete.
-    [19-07-2023 12:43:51 UTC] Result 'ref_db' reported
-    [19-07-2023 12:43:51 UTC] Result 'error_rate_r1' reported
-    [19-07-2023 12:43:51 UTC] Result 'rooted_tree' reported
-    [19-07-2023 12:43:51 UTC] Result 'top_species_table' reported
-    [19-07-2023 12:43:51 UTC] Result 'track_reads' reported
-    [19-07-2023 12:43:51 UTC] Result 'sampling_depth' reported
-    [19-07-2023 12:43:51 UTC] Result 'otu_summary_table' reported
-    [19-07-2023 12:43:51 UTC] Result 'biom' reported
-    [19-07-2023 12:43:51 UTC] Result 'alphadiv' reported
-    [19-07-2023 12:43:51 UTC] Result 'pcoa_binomial' reported
-    [19-07-2023 12:43:51 UTC] Result 'rarecurve' reported
-    [19-07-2023 12:43:51 UTC] Result 'logfile_debug' reported
-    [19-07-2023 12:43:51 UTC] Result 'quality_profile_r2' reported
-    [19-07-2023 12:43:51 UTC] Result 'error_rate_r2' reported
-    [19-07-2023 12:43:51 UTC] Result 'pcoa_bray' reported
-    [19-07-2023 12:43:51 UTC] Result 'otu_table' reported
-    [19-07-2023 12:43:51 UTC] Optional result 'species_heatmap' does not exist: /nephele_data/outputs/graphs/Species_heatmap.html
-    [19-07-2023 12:43:51 UTC] Result 'rarefied_otu_table' reported
-    [19-07-2023 12:43:51 UTC] Result 'seq_fasta' reported
-    [19-07-2023 12:43:51 UTC] Result 'quality_profile_r1' reported
-    [19-07-2023 12:43:51 UTC] Result 'taxonomy_table' reported
-    [19-07-2023 12:43:51 UTC] Result 'reads_in_reads_out' reported
-    [19-07-2023 12:43:51 UTC] Result 'seq_heatmap' reported
-    [19-07-2023 12:43:51 UTC] Result 'taxmethod' reported
-    [19-07-2023 12:43:52 UTC] Results tarball does not exist: /mnt/EFS/user_uploads/b73de8bfdd22_reported_results.tar.gz. Creating.
-    [19-07-2023 12:43:52 UTC] Created results tarball: /mnt/EFS/user_uploads/b73de8bfdd22_reported_results.tar.gz
-    [19-07-2023 12:43:52 UTC] Uploaded to S3: /mnt/EFS/user_uploads/b73de8bfdd22_reported_results.tar.gz
-    [19-07-2023 12:43:52 UTC] Uploaded to S3: /mnt/EFS/user_uploads/b73de8bfdd22/outputs/b73de8bfdd22_results_registry.json
-    [19-07-2023 12:43:52 UTC] DADA2 pipeline complete.
-    [19-07-2023 12:43:56 UTC] None
+    [25-07-2023 12:58:14 UTC] Nephele, developed by BCBB/OCICB/NIAID/NIH version: 2.27.1, tag: Nephele_2023_July_19, commit: 0b87cad
+    [25-07-2023 12:58:14 UTC] Python version: 3.7.3
+    [25-07-2023 12:58:14 UTC] Current time: 2023-07-25 12:58
+    [25-07-2023 12:58:14 UTC] Pipeline name: Biobakery
+    [25-07-2023 12:58:14 UTC] Job Description:
+    [25-07-2023 12:58:14 UTC] Job parameters
+    [25-07-2023 12:58:14 UTC] job_id: 5bfc066feb92
+    [25-07-2023 12:58:14 UTC] map_file: <_io.TextIOWrapper name='/nephele_data/inputs/N2_16S_example_mapping_one_corrected.txt' mode='r' encoding='latin-1'>
+    [25-07-2023 12:58:14 UTC] data_type: WGS_PE
+    [25-07-2023 12:58:14 UTC] threads: 12
+    [25-07-2023 12:58:14 UTC] local_jobs: 4
+    [25-07-2023 12:58:14 UTC] strainphlan: False
+    [25-07-2023 12:58:14 UTC] keep: False
+    [25-07-2023 12:58:14 UTC] project_name: 5bfc066feb92
+    [25-07-2023 12:58:14 UTC] inputs_dir: None
+    [25-07-2023 12:58:14 UTC] outputs_dir: None
+    [25-07-2023 12:58:14 UTC] Results manager initialized. Results registry path: /mnt/EFS/user_uploads/5bfc066feb92/outputs/5bfc066feb92_results_registry.json
+    [25-07-2023 12:58:14 UTC] Skipping FASTQ file validation
+    [25-07-2023 12:58:14 UTC] Renaming paired end files.
+    [25-07-2023 12:58:14 UTC] Inputs directory: /nephele_data/outputs/renamed_inputs/
+    [25-07-2023 12:58:14 UTC] Running Whole Metagenome Shotgun Workflow (wmgx).
+    [25-07-2023 12:58:19 UTC] run --mount type=bind,source=/mnt/EFS/dbs/biobakery_workflows_databases_3.0.0.a.7,target=/opt/biobakery_workflows_databases --mount type=bind,source=/nephele_data/,target=/nephele_data/ --user www-data biobakery/nephele2:3.0.0.a.7 biobakery_workflows wmgx --input-extension fastq --threads 12 --input /nephele_data/outputs/renamed_inputs/ --output /nephele_data/outputs/ --skip-nothing --local-jobs 4 --taxonomic-profiling-options "-x mpa_v30_CHOCOPhlAn_201901" --bypass-strain-profiling
+    [25-07-2023 13:38:00 UTC] Create wmgx_vis output directory: /nephele_data/outputs/wmgx_vis
+    [25-07-2023 13:38:00 UTC] Checking output files from wmgx workflow that are required by wmgx_vis workflow.
+    [25-07-2023 13:38:00 UTC] Running Visualization for Whole Metagenome Shotgun Workflow (wmgx_vis).
+    [25-07-2023 13:38:04 UTC] run --mount type=bind,source=/mnt/EFS/dbs/biobakery_workflows_databases_3.0.0.a.7,target=/opt/biobakery_workflows_databases --mount type=bind,source=/nephele_data/,target=/nephele_data/ --user www-data biobakery/nephele2:3.0.0.a.7 biobakery_workflows wmgx_vis --input /nephele_data/outputs/ --project-name '5bfc066feb92' --format html --output /nephele_data/outputs/wmgx_vis --introduction-text "The data was run through the standard workflow for whole metagenome shotgun sequencing  with the exception of strain profiling (StrainPhlAn).  Details of the pipelines can be found in the <a href=https://github.com/biobakery/biobakery/wiki/biobakery_workflows#2-metagenome-profiling>bioBakery Workflows Tutorial</a>."
+    [25-07-2023 13:38:04 UTC] Checking output files from wmgx_vis pipeline.
+    [25-07-2023 13:38:04 UTC] Pipeline Error:
+    [25-07-2023 13:38:04 UTC] ('/nephele_data/outputs/wmgx_vis/wmgx_report.html does not exist.\n', 'Job ID Unknown')
+    [25-07-2023 13:38:04 UTC] A step in the biobakery workflows may have failed. Check anadama.log files.
+    [25-07-2023 13:38:04 UTC] 
+    [25-07-2023 13:38:04 UTC] Cleaning up intermediate files.
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831.trimmed.single.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192.trimmed.single.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350.trimmed.single.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192_hg37dec_v0.1_bowtie2_paired_contam_2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350_hg37dec_v0.1_bowtie2_unmatched_1_contam.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192.repeats.removed.unmatched.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831.repeats.removed.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831_unmatched_1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350_hg37dec_v0.1_bowtie2_unmatched_2_contam.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831.trimmed.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350.trimmed.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192.trimmed.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831_hg37dec_v0.1_bowtie2_paired_contam_1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350.repeats.removed.unmatched.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831_hg37dec_v0.1_bowtie2_paired_contam_2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831.trimmed.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192.repeats.removed.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350_paired_2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831.repeats.removed.unmatched.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350.repeats.removed.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831_paired_2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350_hg37dec_v0.1_bowtie2_paired_contam_1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192_paired_2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192_hg37dec_v0.1_bowtie2_paired_contam_1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350_unmatched_2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350_hg37dec_v0.1_bowtie2_paired_contam_2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192_hg37dec_v0.1_bowtie2_unmatched_2_contam.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192_hg37dec_v0.1_bowtie2_unmatched_1_contam.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350.repeats.removed.unmatched.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350.trimmed.single.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192.trimmed.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831_unmatched_2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831.repeats.removed.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831.repeats.removed.unmatched.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831_hg37dec_v0.1_bowtie2_unmatched_1_contam.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350_paired_1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831.trimmed.single.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192.trimmed.single.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192_unmatched_1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350_unmatched_1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192_paired_1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192.repeats.removed.unmatched.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350.repeats.removed.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831_paired_1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22831_hg37dec_v0.1_bowtie2_unmatched_2_contam.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192.repeats.removed.1.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22192_unmatched_2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/kneaddata/main/A22350.trimmed.2.fastq
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/metaphlan/main/A22192_bowtie2.sam
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/metaphlan/main/A22350_bowtie2.sam
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/metaphlan/main/A22831_bowtie2.sam
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/humann/main/A22192_humann_temp
+    [25-07-2023 13:38:04 UTC] Removing /nephele_data/outputs/humann/main/A22831_humann_temp
+    [25-07-2023 13:38:09 UTC] Removing /nephele_data/outputs/humann/main/A22350_humann_temp