From b5ca02eb3110151e79c0e36d9b55308dba7556a3 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Tue, 7 Feb 2023 13:10:59 -0500 Subject: [PATCH 1/5] [wfpm v0.8.0] started a new version validate-seqtools@0.1.6 from validate-seqtools@0.1.5 which was released --- validate-seqtools/main.nf | 2 +- validate-seqtools/pkg.json | 2 +- validate-seqtools/tests/checker.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/validate-seqtools/main.nf b/validate-seqtools/main.nf index 53640d0..2ea45bc 100755 --- a/validate-seqtools/main.nf +++ b/validate-seqtools/main.nf @@ -25,7 +25,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.5' +version = '0.1.6' container = [ 'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.validate-seqtools' diff --git a/validate-seqtools/pkg.json b/validate-seqtools/pkg.json index 9f8aa91..1b3c666 100644 --- a/validate-seqtools/pkg.json +++ b/validate-seqtools/pkg.json @@ -1,6 +1,6 @@ { "name": "validate-seqtools", - "version": "0.1.5", + "version": "0.1.6", "description": "Using Seq-tools, validates molecular", "main": "main.nf", "deprecated": false, diff --git a/validate-seqtools/tests/checker.nf b/validate-seqtools/tests/checker.nf index 7a24302..e2b4526 100755 --- a/validate-seqtools/tests/checker.nf +++ b/validate-seqtools/tests/checker.nf @@ -29,7 +29,7 @@ /* this block is auto-generated based on info from pkg.json where */ /* changes can be made if needed, do NOT modify this block manually */ nextflow.enable.dsl = 2 -version = '0.1.5' +version = '0.1.6' container = [ 'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.validate-seqtools' From c4566d7f58e72b4dd92a924547ac32982d1883d3 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Tue, 7 Feb 2023 15:28:52 -0500 Subject: [PATCH 2/5] update `main.py` and `main.nf` --- validate-seqtools/Dockerfile | 4 +- validate-seqtools/main.nf | 16 +- validate-seqtools/main.py | 18 +- validate-seqtools/tests/checker.nf | 1 - ....PASS-with-WARNING-and-SKIPPED-check.jsonl | 161 ++++++++++++++++++ validate-seqtools/tests/test-job-bam.json | 4 +- 6 files changed, 188 insertions(+), 16 deletions(-) create mode 100644 validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl diff --git a/validate-seqtools/Dockerfile b/validate-seqtools/Dockerfile index 9b5af5e..5f8a7c9 100644 --- a/validate-seqtools/Dockerfile +++ b/validate-seqtools/Dockerfile @@ -37,8 +37,8 @@ RUN cd /tmp &&\ # Install seq-tools -RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.3 - +#RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.4 +RUN pip install git+https://github.com/icgc-argo/seq-tools.git@add-parallel #RUN git clone https://github.com/icgc-argo/seq-tools.git@1.1.0 &&\ # git clone https://github.com/icgc-argo/seq-tools.git@1.1.0 # cd seq-tools &&\ diff --git a/validate-seqtools/main.nf b/validate-seqtools/main.nf index 2ea45bc..88871c6 100755 --- a/validate-seqtools/main.nf +++ b/validate-seqtools/main.nf @@ -46,7 +46,7 @@ params.publish_dir = "" // set to empty string will disable publishDir // tool specific parmas go here, add / change as needed params.json_file = "" -params.skip_md5sum_check = false +params.skippable_tests = ["c683","c685"] params.files = "" @@ -67,20 +67,26 @@ process validateSeqtools { script: // add and initialize variables here as needed - args_skip_md5sum_check = params.skip_md5sum_check ? "--skip_md5sum_check " : "" + """ cp ${json_file} local_copy python3 /tools/main.py \ -j local_copy \ - ${args_skip_md5sum_check} \ + -k ${params.skippable_tests.join(" ")} \ + -t ${params.cpus} \ > seq-tools.log 2>&1 if ls validation_report.INVALID*.jsonl 1> /dev/null 2>&1; then echo "Payload is INVALID. Please check out details in validation report under: " - pwd + pwd + exit 1 + elif ls validation_report.UNKNOWN*.jsonl 1> /dev/null 2>&1; + then + echo "Payload is UNKNOWN. Please check out details in validation report under: " + pwd exit 1 else - exit 0 + echo 0 fi """ } diff --git a/validate-seqtools/main.py b/validate-seqtools/main.py index 99517e4..6546341 100755 --- a/validate-seqtools/main.py +++ b/validate-seqtools/main.py @@ -37,15 +37,21 @@ def main(): parser = argparse.ArgumentParser(description='Tool: validate-seqtools') parser.add_argument('-j', '--json-file', dest='json_file', type=str, help='JSON file containing molecular data to be validated', required=True) - parser.add_argument('-k', '--skip_md5sum_check', dest='skip_md5sum', action='store_true', - help='JSON file containing molecular data to be validated') + parser.add_argument('-k', '--skippable_tests', dest='skippable_tests', nargs="+",default=[], + help='Tests to skip') + parser.add_argument('-t', '--threads', dest='threads', default=1, + help='threads to speed up operations') args = parser.parse_args() # Check if successful - if args.skip_md5sum : - cmd="seq-tools validate "+args.json_file+" --skip_md5sum_check" - else: - cmd="seq-tools validate "+args.json_file + cmd="seq-tools validate "+args.json_file + + if args.skippable_tests: + for test in args.skippable_tests: + cmd+=" -k "+test + if args.threads: + cmd+=" -t "+str(args.threads) + result=subprocess.run(cmd,shell=True) diff --git a/validate-seqtools/tests/checker.nf b/validate-seqtools/tests/checker.nf index e2b4526..770bb44 100755 --- a/validate-seqtools/tests/checker.nf +++ b/validate-seqtools/tests/checker.nf @@ -45,7 +45,6 @@ params.container = "" include { validateSeqtools } from '../main' - process file_smart_diff { container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" diff --git a/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl b/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl new file mode 100644 index 0000000..d8c8234 --- /dev/null +++ b/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl @@ -0,0 +1,161 @@ +{ + "tool": { + "name": "seq-tools", + "version": "1.2.4" + }, + "metadata_file": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/6e/81222b6d743166c0774cbed4414ae5/local_copy", + "data_dir": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/6e/81222b6d743166c0774cbed4414ae5", + "started_at": "2023-02-07T20:21:57.058Z", + "ended_at": "2023-02-07T20:21:57.278Z", + "validation": { + "status": "PASS-with-WARNING-and-SKIPPED-check", + "message": "Please see individual checks for details", + "checks": [ + { + "checker": "c110_rg_id_uniqueness", + "status": "PASS", + "message": "Read group ID uniqueness check status: PASS" + }, + { + "checker": "c120_permissible_char_in_rg_id", + "status": "PASS", + "message": "Read group ID permissible character check status: PASS" + }, + { + "checker": "c130_one_sample", + "status": "PASS", + "message": "One and only one sample check status: PASS" + }, + { + "checker": "c140_platform_unit_uniqueness", + "status": "PASS", + "message": "Platform unit uniqueness check status: PASS" + }, + { + "checker": "c150_rg_count_match", + "status": "PASS", + "message": "Read groups count check status: PASS" + }, + { + "checker": "c160_file_r1_r2_check", + "status": "PASS", + "message": "Fields file_r1 and file_r2 check status: PASS" + }, + { + "checker": "c170_fq_uniqueness_in_rgs", + "status": "PASS", + "message": "FASTQ uniqueness in read groups check status: PASS" + }, + { + "checker": "c180_file_uniqueness", + "status": "PASS", + "message": "Files uniqueness check in files section status: PASS" + }, + { + "checker": "c190_no_extra_files", + "status": "PASS", + "message": "No extra files check status: PASS" + }, + { + "checker": "c200_rg_id_in_bam_uniqueness", + "status": "PASS", + "message": "'read_group_id_in_bam' uniqueness check status: PASS" + }, + { + "checker": "c210_no_path_in_filename", + "status": "PASS", + "message": "No path in fileName check in 'files' section status: PASS" + }, + { + "checker": "c220_no_rg_id_in_bam_for_fq", + "status": "PASS", + "message": "'read_group_id_in_bam' not populated for FASTQ check: PASS" + }, + { + "checker": "c230_files_info_data_category", + "status": "PASS", + "message": "Field 'info.data_category' is found populated with 'Sequencing Reads'. Validation status: PASS" + }, + { + "checker": "c240_submitter_rg_id_collide_with_rg_id_in_bam", + "status": "PASS", + "message": "For any read group, when 'read_group_id_in_bam' is not populated, 'submitter_read_group_id' must NOT be the same as 'read_group_id_in_bam' of another read group from the same BAM file. Validation result: PASS" + }, + { + "checker": "c250_file_data_type", + "status": "PASS", + "message": "Field 'dataType' is found populated with 'Submitted Reads'. Validation status: PASS" + }, + { + "checker": "c260_filename_pattern", + "status": "PASS", + "message": "'fileName' matches expected pattern '^[A-Za-z0-9]{1}[A-Za-z0-9_\\.\\-]*\\.(bam|fq\\.gz|fastq\\.gz|fq\\.bz2|fastq\\.bz2)$' in 'files' section. Validation status: PASS" + }, + { + "checker": "c605_all_files_accessible", + "status": "PASS", + "message": "All data files accessible check: PASS" + }, + { + "checker": "c608_bam_sanity", + "status": "PASS", + "message": "BAM sanity check by samtools quickcheck. Validation result: PASS" + }, + { + "checker": "c609_fastq_sanity", + "status": "PASS", + "message": "No FASTQ Files to check" + }, + { + "checker": "c610_rg_id_in_bam", + "status": "PASS", + "message": "Read group ID in BAM header check: PASS" + }, + { + "checker": "c620_submitter_read_group_id_match", + "status": "PASS", + "message": "For each read group, when 'read_group_id_in_bam' is not provided, 'submitter_read_group_id' in the metadata JSON must match RG ID in BAM. Validation result: PASS" + }, + { + "checker": "c630_rg_id_in_bam_match", + "status": "PASS", + "message": "'read_group_id_in_bam' in metadata matches RG ID in BAM check: PASS" + }, + { + "checker": "c640_one_sm_in_bam_header", + "status": "PASS", + "message": "One and only one SM in @RG BAM header check: PASS" + }, + { + "checker": "c650_sm_in_bam_matches_metadata", + "status": "WARNING", + "message": "SM in BAM header is empty. Validation status: WARNING. NOTE that submitterSampleId in metadata JSON will be used in the header of ARGO uniformly aligned sequences." + }, + { + "checker": "c660_metadata_in_bam_rg_header", + "status": "WARNING", + "message": "Information (excluding ID and SM which are validated elsewhere) in BAM @RG header does NOT match experiment/read group info in the metadata JSON. NOTE that information in the metadata JSON document will be kept and used in ICGC ARGO uniform analysis while unmatched info in BAM header will be discarded. Details of the difference: [BAM anon_chr1_complete.bam @RG QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1 vs QCMG_22f321c6-ff3f-11e4-8e8b-f8a0800c69f0_130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:866d65b8-ff3f-11e4-b413-bdbd66be296d:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1 vs QCMG_866d65b8-ff3f-11e4-b413-bdbd66be296d_130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:91ce15f2-ff3e-11e4-9d73-85b485b025f8:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1 vs QCMG_91ce15f2-ff3e-11e4-9d73-85b485b025f8_130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:dd3f83b8-ff3e-11e4-81af-910d0943bdb6:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1 vs QCMG_dd3f83b8-ff3e-11e4-81af-910d0943bdb6_130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1_8043985)]" + }, + { + "checker": "c670_rg_is_paired_in_bam", + "status": "PASS", + "message": "Read group pair status in BAM check: PASS" + }, + { + "checker": "c680_repeated_read_names_per_group_in_bam", + "status": "PASS", + "message": "Repeated Read names within Read groups in BAM not found: PASS" + }, + { + "checker": "c681_fileSize_match", + "status": "PASS", + "message": "The fileSize calculated from the sequencing files matches the info provided in metadata JSON: PASS" + }, + { + "checker": "c683_fileMd5sum_match", + "status": "SKIPPED", + "message": "This check was not performed as instructed by the command line option. Status: SKIPPED" + } + ] + } +} diff --git a/validate-seqtools/tests/test-job-bam.json b/validate-seqtools/tests/test-job-bam.json index 17bddb6..696a4a8 100644 --- a/validate-seqtools/tests/test-job-bam.json +++ b/validate-seqtools/tests/test-job-bam.json @@ -1,5 +1,5 @@ { "json_file": "input/anon_chr1_complete.json", "files": ["input/anon_chr1_complete.bam"], - "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING.jsonl" -} \ No newline at end of file + "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl" +} From 88b2c4763d4dcfb55940c4752563e26470086e42 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Wed, 8 Feb 2023 10:12:25 -0500 Subject: [PATCH 3/5] update build-test-release.yml --- .github/workflows/build-test-release.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-test-release.yml b/.github/workflows/build-test-release.yml index 0ba985b..e5a5458 100644 --- a/.github/workflows/build-test-release.yml +++ b/.github/workflows/build-test-release.yml @@ -19,10 +19,13 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up Python 3.6 - uses: actions/setup-python@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 with: - python-version: 3.6 + python-version: "3.10" + architecture: "x64" + env: + AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache - name: Extract package name and version from branch name id: get_pkg_info @@ -80,10 +83,13 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up Python 3.6 - uses: actions/setup-python@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 with: - python-version: 3.6 + python-version: "3.10" + architecture: "x64" + env: + AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache - name: Install dependencies run: | From 8aae37100931b866e28704cac2e4d66753584c6c Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Thu, 9 Feb 2023 15:58:01 -0500 Subject: [PATCH 4/5] bump `seq-tools` ver and add test scenario `test-job-bam-w-skips` --- validate-seqtools/Dockerfile | 3 +- validate-seqtools/main.nf | 8 +- validate-seqtools/main.py | 2 +- validate-seqtools/tests/checker.nf | 8 +- ....PASS-with-WARNING-and-SKIPPED-check.jsonl | 162 +----------------- .../tests/test-job-bam-w-skips.json | 6 + validate-seqtools/tests/test-job-bam.json | 2 +- 7 files changed, 21 insertions(+), 170 deletions(-) create mode 100644 validate-seqtools/tests/test-job-bam-w-skips.json diff --git a/validate-seqtools/Dockerfile b/validate-seqtools/Dockerfile index 5f8a7c9..b8a0361 100644 --- a/validate-seqtools/Dockerfile +++ b/validate-seqtools/Dockerfile @@ -37,8 +37,7 @@ RUN cd /tmp &&\ # Install seq-tools -#RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.4 -RUN pip install git+https://github.com/icgc-argo/seq-tools.git@add-parallel +RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.4 #RUN git clone https://github.com/icgc-argo/seq-tools.git@1.1.0 &&\ # git clone https://github.com/icgc-argo/seq-tools.git@1.1.0 # cd seq-tools &&\ diff --git a/validate-seqtools/main.nf b/validate-seqtools/main.nf index 88871c6..7583315 100755 --- a/validate-seqtools/main.nf +++ b/validate-seqtools/main.nf @@ -46,7 +46,7 @@ params.publish_dir = "" // set to empty string will disable publishDir // tool specific parmas go here, add / change as needed params.json_file = "" -params.skippable_tests = ["c683","c685"] +params.skippable_tests = [] params.files = "" @@ -60,6 +60,7 @@ process validateSeqtools { input: // input, make update as needed path json_file path files + val skippable_tests output: // output, make update as needed path "validation_report.*.jsonl", emit: validation_log @@ -72,7 +73,7 @@ process validateSeqtools { cp ${json_file} local_copy python3 /tools/main.py \ -j local_copy \ - -k ${params.skippable_tests.join(" ")} \ + -k ${skippable_tests.join(" ")} \ -t ${params.cpus} \ > seq-tools.log 2>&1 @@ -97,6 +98,7 @@ process validateSeqtools { workflow { validateSeqtools( file(params.json_file), - Channel.fromPath(params.files).collect() + Channel.fromPath(params.files).collect(), + params.skippable_tests ) } diff --git a/validate-seqtools/main.py b/validate-seqtools/main.py index 6546341..6c431db 100755 --- a/validate-seqtools/main.py +++ b/validate-seqtools/main.py @@ -37,7 +37,7 @@ def main(): parser = argparse.ArgumentParser(description='Tool: validate-seqtools') parser.add_argument('-j', '--json-file', dest='json_file', type=str, help='JSON file containing molecular data to be validated', required=True) - parser.add_argument('-k', '--skippable_tests', dest='skippable_tests', nargs="+",default=[], + parser.add_argument('-k', '--skippable_tests', dest='skippable_tests', nargs="*",default=[], help='Tests to skip') parser.add_argument('-t', '--threads', dest='threads', default=1, help='threads to speed up operations') diff --git a/validate-seqtools/tests/checker.nf b/validate-seqtools/tests/checker.nf index 770bb44..67bea51 100755 --- a/validate-seqtools/tests/checker.nf +++ b/validate-seqtools/tests/checker.nf @@ -42,6 +42,7 @@ params.container_registry = "" params.container_version = "" params.container = "" +params.skippable_tests=[] include { validateSeqtools } from '../main' @@ -70,11 +71,13 @@ workflow checker { input_json input_files expected_output + skippable_tests main: validateSeqtools( input_json, - input_files + input_files, + skippable_tests ) file_smart_diff( @@ -88,6 +91,7 @@ workflow { checker( file(params.json_file), Channel.fromPath(params.files).collect(), - file(params.expected_output) + file(params.expected_output), + params.skippable_tests ) } diff --git a/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl b/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl index d8c8234..ea6be96 100644 --- a/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl +++ b/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl @@ -1,161 +1 @@ -{ - "tool": { - "name": "seq-tools", - "version": "1.2.4" - }, - "metadata_file": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/6e/81222b6d743166c0774cbed4414ae5/local_copy", - "data_dir": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/6e/81222b6d743166c0774cbed4414ae5", - "started_at": "2023-02-07T20:21:57.058Z", - "ended_at": "2023-02-07T20:21:57.278Z", - "validation": { - "status": "PASS-with-WARNING-and-SKIPPED-check", - "message": "Please see individual checks for details", - "checks": [ - { - "checker": "c110_rg_id_uniqueness", - "status": "PASS", - "message": "Read group ID uniqueness check status: PASS" - }, - { - "checker": "c120_permissible_char_in_rg_id", - "status": "PASS", - "message": "Read group ID permissible character check status: PASS" - }, - { - "checker": "c130_one_sample", - "status": "PASS", - "message": "One and only one sample check status: PASS" - }, - { - "checker": "c140_platform_unit_uniqueness", - "status": "PASS", - "message": "Platform unit uniqueness check status: PASS" - }, - { - "checker": "c150_rg_count_match", - "status": "PASS", - "message": "Read groups count check status: PASS" - }, - { - "checker": "c160_file_r1_r2_check", - "status": "PASS", - "message": "Fields file_r1 and file_r2 check status: PASS" - }, - { - "checker": "c170_fq_uniqueness_in_rgs", - "status": "PASS", - "message": "FASTQ uniqueness in read groups check status: PASS" - }, - { - "checker": "c180_file_uniqueness", - "status": "PASS", - "message": "Files uniqueness check in files section status: PASS" - }, - { - "checker": "c190_no_extra_files", - "status": "PASS", - "message": "No extra files check status: PASS" - }, - { - "checker": "c200_rg_id_in_bam_uniqueness", - "status": "PASS", - "message": "'read_group_id_in_bam' uniqueness check status: PASS" - }, - { - "checker": "c210_no_path_in_filename", - "status": "PASS", - "message": "No path in fileName check in 'files' section status: PASS" - }, - { - "checker": "c220_no_rg_id_in_bam_for_fq", - "status": "PASS", - "message": "'read_group_id_in_bam' not populated for FASTQ check: PASS" - }, - { - "checker": "c230_files_info_data_category", - "status": "PASS", - "message": "Field 'info.data_category' is found populated with 'Sequencing Reads'. Validation status: PASS" - }, - { - "checker": "c240_submitter_rg_id_collide_with_rg_id_in_bam", - "status": "PASS", - "message": "For any read group, when 'read_group_id_in_bam' is not populated, 'submitter_read_group_id' must NOT be the same as 'read_group_id_in_bam' of another read group from the same BAM file. Validation result: PASS" - }, - { - "checker": "c250_file_data_type", - "status": "PASS", - "message": "Field 'dataType' is found populated with 'Submitted Reads'. Validation status: PASS" - }, - { - "checker": "c260_filename_pattern", - "status": "PASS", - "message": "'fileName' matches expected pattern '^[A-Za-z0-9]{1}[A-Za-z0-9_\\.\\-]*\\.(bam|fq\\.gz|fastq\\.gz|fq\\.bz2|fastq\\.bz2)$' in 'files' section. Validation status: PASS" - }, - { - "checker": "c605_all_files_accessible", - "status": "PASS", - "message": "All data files accessible check: PASS" - }, - { - "checker": "c608_bam_sanity", - "status": "PASS", - "message": "BAM sanity check by samtools quickcheck. Validation result: PASS" - }, - { - "checker": "c609_fastq_sanity", - "status": "PASS", - "message": "No FASTQ Files to check" - }, - { - "checker": "c610_rg_id_in_bam", - "status": "PASS", - "message": "Read group ID in BAM header check: PASS" - }, - { - "checker": "c620_submitter_read_group_id_match", - "status": "PASS", - "message": "For each read group, when 'read_group_id_in_bam' is not provided, 'submitter_read_group_id' in the metadata JSON must match RG ID in BAM. Validation result: PASS" - }, - { - "checker": "c630_rg_id_in_bam_match", - "status": "PASS", - "message": "'read_group_id_in_bam' in metadata matches RG ID in BAM check: PASS" - }, - { - "checker": "c640_one_sm_in_bam_header", - "status": "PASS", - "message": "One and only one SM in @RG BAM header check: PASS" - }, - { - "checker": "c650_sm_in_bam_matches_metadata", - "status": "WARNING", - "message": "SM in BAM header is empty. Validation status: WARNING. NOTE that submitterSampleId in metadata JSON will be used in the header of ARGO uniformly aligned sequences." - }, - { - "checker": "c660_metadata_in_bam_rg_header", - "status": "WARNING", - "message": "Information (excluding ID and SM which are validated elsewhere) in BAM @RG header does NOT match experiment/read group info in the metadata JSON. NOTE that information in the metadata JSON document will be kept and used in ICGC ARGO uniform analysis while unmatched info in BAM header will be discarded. Details of the difference: [BAM anon_chr1_complete.bam @RG QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1 vs QCMG_22f321c6-ff3f-11e4-8e8b-f8a0800c69f0_130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:866d65b8-ff3f-11e4-b413-bdbd66be296d:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1 vs QCMG_866d65b8-ff3f-11e4-b413-bdbd66be296d_130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:91ce15f2-ff3e-11e4-9d73-85b485b025f8:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1 vs QCMG_91ce15f2-ff3e-11e4-9d73-85b485b025f8_130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:dd3f83b8-ff3e-11e4-81af-910d0943bdb6:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1 vs QCMG_dd3f83b8-ff3e-11e4-81af-910d0943bdb6_130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1_8043985)]" - }, - { - "checker": "c670_rg_is_paired_in_bam", - "status": "PASS", - "message": "Read group pair status in BAM check: PASS" - }, - { - "checker": "c680_repeated_read_names_per_group_in_bam", - "status": "PASS", - "message": "Repeated Read names within Read groups in BAM not found: PASS" - }, - { - "checker": "c681_fileSize_match", - "status": "PASS", - "message": "The fileSize calculated from the sequencing files matches the info provided in metadata JSON: PASS" - }, - { - "checker": "c683_fileMd5sum_match", - "status": "SKIPPED", - "message": "This check was not performed as instructed by the command line option. Status: SKIPPED" - } - ] - } -} +{"tool": {"name": "seq-tools", "version": "1.2.4"}, "metadata_file": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/7c/89eab7957d2922c8b8379b03dba2db/local_copy", "data_dir": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/7c/89eab7957d2922c8b8379b03dba2db", "started_at": "2023-02-09T20:43:17.966Z", "ended_at": "2023-02-09T20:43:18.170Z", "validation": {"status": "PASS-with-WARNING-and-SKIPPED-check", "message": "Please see individual checks for details", "checks": [{"checker": "c110_rg_id_uniqueness", "status": "PASS", "message": "Read group ID uniqueness check status: PASS"}, {"checker": "c120_permissible_char_in_rg_id", "status": "PASS", "message": "Read group ID permissible character check status: PASS"}, {"checker": "c130_one_sample", "status": "PASS", "message": "One and only one sample check status: PASS"}, {"checker": "c140_platform_unit_uniqueness", "status": "PASS", "message": "Platform unit uniqueness check status: PASS"}, {"checker": "c150_rg_count_match", "status": "PASS", "message": "Read groups count check status: PASS"}, {"checker": "c160_file_r1_r2_check", "status": "PASS", "message": "Fields file_r1 and file_r2 check status: PASS"}, {"checker": "c170_fq_uniqueness_in_rgs", "status": "PASS", "message": "FASTQ uniqueness in read groups check status: PASS"}, {"checker": "c180_file_uniqueness", "status": "PASS", "message": "Files uniqueness check in files section status: PASS"}, {"checker": "c190_no_extra_files", "status": "PASS", "message": "No extra files check status: PASS"}, {"checker": "c200_rg_id_in_bam_uniqueness", "status": "PASS", "message": "'read_group_id_in_bam' uniqueness check status: PASS"}, {"checker": "c210_no_path_in_filename", "status": "PASS", "message": "No path in fileName check in 'files' section status: PASS"}, {"checker": "c220_no_rg_id_in_bam_for_fq", "status": "PASS", "message": "'read_group_id_in_bam' not populated for FASTQ check: PASS"}, {"checker": "c230_files_info_data_category", "status": "PASS", "message": "Field 'info.data_category' is found populated with 'Sequencing Reads'. Validation status: PASS"}, {"checker": "c240_submitter_rg_id_collide_with_rg_id_in_bam", "status": "PASS", "message": "For any read group, when 'read_group_id_in_bam' is not populated, 'submitter_read_group_id' must NOT be the same as 'read_group_id_in_bam' of another read group from the same BAM file. Validation result: PASS"}, {"checker": "c250_file_data_type", "status": "PASS", "message": "Field 'dataType' is found populated with 'Submitted Reads'. Validation status: PASS"}, {"checker": "c260_filename_pattern", "status": "PASS", "message": "'fileName' matches expected pattern '^[A-Za-z0-9]{1}[A-Za-z0-9_\\.\\-]*\\.(bam|fq\\.gz|fastq\\.gz|fq\\.bz2|fastq\\.bz2)$' in 'files' section. Validation status: PASS"}, {"checker": "c605_all_files_accessible", "status": "PASS", "message": "All data files accessible check: PASS"}, {"checker": "c608_bam_sanity", "status": "PASS", "message": "BAM sanity check by samtools quickcheck. Validation result: PASS"}, {"checker": "c609_fastq_sanity", "status": "SKIPPED", "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"}, {"checker": "c610_rg_id_in_bam", "status": "PASS", "message": "Read group ID in BAM header check: PASS"}, {"checker": "c620_submitter_read_group_id_match", "status": "PASS", "message": "For each read group, when 'read_group_id_in_bam' is not provided, 'submitter_read_group_id' in the metadata JSON must match RG ID in BAM. Validation result: PASS"}, {"checker": "c630_rg_id_in_bam_match", "status": "PASS", "message": "'read_group_id_in_bam' in metadata matches RG ID in BAM check: PASS"}, {"checker": "c640_one_sm_in_bam_header", "status": "PASS", "message": "One and only one SM in @RG BAM header check: PASS"}, {"checker": "c650_sm_in_bam_matches_metadata", "status": "WARNING", "message": "SM in BAM header is empty. Validation status: WARNING. NOTE that submitterSampleId in metadata JSON will be used in the header of ARGO uniformly aligned sequences."}, {"checker": "c660_metadata_in_bam_rg_header", "status": "WARNING", "message": "Information (excluding ID and SM which are validated elsewhere) in BAM @RG header does NOT match experiment/read group info in the metadata JSON. NOTE that information in the metadata JSON document will be kept and used in ICGC ARGO uniform analysis while unmatched info in BAM header will be discarded. Details of the difference: [BAM anon_chr1_complete.bam @RG QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1 vs QCMG_22f321c6-ff3f-11e4-8e8b-f8a0800c69f0_130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:866d65b8-ff3f-11e4-b413-bdbd66be296d:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1 vs QCMG_866d65b8-ff3f-11e4-b413-bdbd66be296d_130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:91ce15f2-ff3e-11e4-9d73-85b485b025f8:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1 vs QCMG_91ce15f2-ff3e-11e4-9d73-85b485b025f8_130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:dd3f83b8-ff3e-11e4-81af-910d0943bdb6:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1 vs QCMG_dd3f83b8-ff3e-11e4-81af-910d0943bdb6_130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1_8043985)]"}, {"checker": "c670_rg_is_paired_in_bam", "status": "PASS", "message": "Read group pair status in BAM check: PASS"}, {"checker": "c680_repeated_read_names_per_group_in_bam", "status": "PASS", "message": "Repeated Read names within Read groups in BAM not found: PASS"}, {"checker": "c681_fileSize_match", "status": "SKIPPED", "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"}, {"checker": "c683_fileMd5sum_match", "status": "SKIPPED", "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"}]}} diff --git a/validate-seqtools/tests/test-job-bam-w-skips.json b/validate-seqtools/tests/test-job-bam-w-skips.json new file mode 100644 index 0000000..2593150 --- /dev/null +++ b/validate-seqtools/tests/test-job-bam-w-skips.json @@ -0,0 +1,6 @@ +{ + "json_file": "input/anon_chr1_complete.json", + "files": ["input/anon_chr1_complete.bam"], + "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl", + "skippable_tests" : ["c681","c683","c609"] +} diff --git a/validate-seqtools/tests/test-job-bam.json b/validate-seqtools/tests/test-job-bam.json index 696a4a8..0a7700f 100644 --- a/validate-seqtools/tests/test-job-bam.json +++ b/validate-seqtools/tests/test-job-bam.json @@ -1,5 +1,5 @@ { "json_file": "input/anon_chr1_complete.json", "files": ["input/anon_chr1_complete.bam"], - "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl" + "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING.jsonl", } From ef2d8a0dd3458098bfd2bbaff57836a10e1553ee Mon Sep 17 00:00:00 2001 From: Linda Xiang Date: Fri, 10 Feb 2023 10:56:54 -0500 Subject: [PATCH 5/5] print error message when providing unskippable checks --- validate-seqtools/main.nf | 24 +++++++++++++---------- validate-seqtools/tests/test-job-bam.json | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/validate-seqtools/main.nf b/validate-seqtools/main.nf index 7583315..c06d013 100755 --- a/validate-seqtools/main.nf +++ b/validate-seqtools/main.nf @@ -77,17 +77,21 @@ process validateSeqtools { -t ${params.cpus} \ > seq-tools.log 2>&1 - if ls validation_report.INVALID*.jsonl 1> /dev/null 2>&1; then - echo "Payload is INVALID. Please check out details in validation report under: " - pwd - exit 1 - elif ls validation_report.UNKNOWN*.jsonl 1> /dev/null 2>&1; - then - echo "Payload is UNKNOWN. Please check out details in validation report under: " - pwd - exit 1 + if ls validation_report.*.jsonl 1> /dev/null 2>&1; then + if ls validation_report.INVALID*.jsonl 1> /dev/null 2>&1; then + echo "Payload is INVALID. Please check out details in validation report under: " + pwd + exit 1 + elif ls validation_report.UNKNOWN*.jsonl 1> /dev/null 2>&1; + then + echo "Payload is UNKNOWN. Please check out details in validation report under: " + pwd + exit 1 + else + echo 0 + fi else - echo 0 + cat seq-tools.log && exit 1 fi """ } diff --git a/validate-seqtools/tests/test-job-bam.json b/validate-seqtools/tests/test-job-bam.json index 0a7700f..ce7d419 100644 --- a/validate-seqtools/tests/test-job-bam.json +++ b/validate-seqtools/tests/test-job-bam.json @@ -1,5 +1,5 @@ { "json_file": "input/anon_chr1_complete.json", "files": ["input/anon_chr1_complete.bam"], - "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING.jsonl", + "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING.jsonl" }