From b5ca02eb3110151e79c0e36d9b55308dba7556a3 Mon Sep 17 00:00:00 2001
From: edsu7 <22638361+edsu7@users.noreply.github.com>
Date: Tue, 7 Feb 2023 13:10:59 -0500
Subject: [PATCH 1/5] [wfpm v0.8.0] started a new version
 validate-seqtools@0.1.6 from validate-seqtools@0.1.5 which was released

---
 validate-seqtools/main.nf          | 2 +-
 validate-seqtools/pkg.json         | 2 +-
 validate-seqtools/tests/checker.nf | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/validate-seqtools/main.nf b/validate-seqtools/main.nf
index 53640d0..2ea45bc 100755
--- a/validate-seqtools/main.nf
+++ b/validate-seqtools/main.nf
@@ -25,7 +25,7 @@
 /* this block is auto-generated based on info from pkg.json where   */
 /* changes can be made if needed, do NOT modify this block manually */
 nextflow.enable.dsl = 2
-version = '0.1.5'
+version = '0.1.6'
 
 container = [
     'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.validate-seqtools'
diff --git a/validate-seqtools/pkg.json b/validate-seqtools/pkg.json
index 9f8aa91..1b3c666 100644
--- a/validate-seqtools/pkg.json
+++ b/validate-seqtools/pkg.json
@@ -1,6 +1,6 @@
 {
     "name": "validate-seqtools",
-    "version": "0.1.5",
+    "version": "0.1.6",
     "description": "Using Seq-tools, validates molecular",
     "main": "main.nf",
     "deprecated": false,
diff --git a/validate-seqtools/tests/checker.nf b/validate-seqtools/tests/checker.nf
index 7a24302..e2b4526 100755
--- a/validate-seqtools/tests/checker.nf
+++ b/validate-seqtools/tests/checker.nf
@@ -29,7 +29,7 @@
 /* this block is auto-generated based on info from pkg.json where   */
 /* changes can be made if needed, do NOT modify this block manually */
 nextflow.enable.dsl = 2
-version = '0.1.5'
+version = '0.1.6'
 
 container = [
     'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.validate-seqtools'

From c4566d7f58e72b4dd92a924547ac32982d1883d3 Mon Sep 17 00:00:00 2001
From: edsu7 <22638361+edsu7@users.noreply.github.com>
Date: Tue, 7 Feb 2023 15:28:52 -0500
Subject: [PATCH 2/5] update `main.py` and `main.nf`

---
 validate-seqtools/Dockerfile                  |   4 +-
 validate-seqtools/main.nf                     |  16 +-
 validate-seqtools/main.py                     |  18 +-
 validate-seqtools/tests/checker.nf            |   1 -
 ....PASS-with-WARNING-and-SKIPPED-check.jsonl | 161 ++++++++++++++++++
 validate-seqtools/tests/test-job-bam.json     |   4 +-
 6 files changed, 188 insertions(+), 16 deletions(-)
 create mode 100644 validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl

diff --git a/validate-seqtools/Dockerfile b/validate-seqtools/Dockerfile
index 9b5af5e..5f8a7c9 100644
--- a/validate-seqtools/Dockerfile
+++ b/validate-seqtools/Dockerfile
@@ -37,8 +37,8 @@ RUN cd /tmp &&\
 
 # Install seq-tools
 
-RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.3
-
+#RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.4
+RUN pip install git+https://github.com/icgc-argo/seq-tools.git@add-parallel
 #RUN git clone https://github.com/icgc-argo/seq-tools.git@1.1.0 &&\
 #    git clone https://github.com/icgc-argo/seq-tools.git@1.1.0
 #    cd seq-tools &&\
diff --git a/validate-seqtools/main.nf b/validate-seqtools/main.nf
index 2ea45bc..88871c6 100755
--- a/validate-seqtools/main.nf
+++ b/validate-seqtools/main.nf
@@ -46,7 +46,7 @@ params.publish_dir = ""  // set to empty string will disable publishDir
 
 // tool specific parmas go here, add / change as needed
 params.json_file = ""
-params.skip_md5sum_check = false
+params.skippable_tests = ["c683","c685"]
 params.files = ""
 
 
@@ -67,20 +67,26 @@ process validateSeqtools {
 
   script:
     // add and initialize variables here as needed
-    args_skip_md5sum_check = params.skip_md5sum_check  ? "--skip_md5sum_check " : ""
+
     """
     cp ${json_file} local_copy
     python3 /tools/main.py \
       -j local_copy \
-      ${args_skip_md5sum_check} \
+      -k ${params.skippable_tests.join(" ")} \
+      -t ${params.cpus} \
       > seq-tools.log 2>&1
 
     if ls validation_report.INVALID*.jsonl 1> /dev/null 2>&1; then     
       echo "Payload is INVALID. Please check out details in validation report under: "
-      pwd 
+      pwd
+      exit 1
+    elif ls validation_report.UNKNOWN*.jsonl 1> /dev/null 2>&1;
+    then
+      echo "Payload is UNKNOWN. Please check out details in validation report under: "
+      pwd
       exit 1
     else
-      exit 0
+      echo 0
     fi
     """
 }
diff --git a/validate-seqtools/main.py b/validate-seqtools/main.py
index 99517e4..6546341 100755
--- a/validate-seqtools/main.py
+++ b/validate-seqtools/main.py
@@ -37,15 +37,21 @@ def main():
     parser = argparse.ArgumentParser(description='Tool: validate-seqtools')
     parser.add_argument('-j', '--json-file', dest='json_file', type=str,
                         help='JSON file containing molecular data to be validated', required=True)
-    parser.add_argument('-k', '--skip_md5sum_check', dest='skip_md5sum', action='store_true',
-                        help='JSON file containing molecular data to be validated')
+    parser.add_argument('-k', '--skippable_tests', dest='skippable_tests', nargs="+",default=[],
+                        help='Tests to skip')
+    parser.add_argument('-t', '--threads', dest='threads', default=1,
+                        help='threads to speed up operations')    
     args = parser.parse_args()
 
     # Check if successful
-    if args.skip_md5sum :
-        cmd="seq-tools validate "+args.json_file+" --skip_md5sum_check"
-    else:
-        cmd="seq-tools validate "+args.json_file
+    cmd="seq-tools validate "+args.json_file
+
+    if args.skippable_tests:
+        for test in args.skippable_tests:
+            cmd+=" -k "+test
+    if args.threads:
+        cmd+=" -t "+str(args.threads)
+
     result=subprocess.run(cmd,shell=True)
 
 
diff --git a/validate-seqtools/tests/checker.nf b/validate-seqtools/tests/checker.nf
index e2b4526..770bb44 100755
--- a/validate-seqtools/tests/checker.nf
+++ b/validate-seqtools/tests/checker.nf
@@ -45,7 +45,6 @@ params.container = ""
 
 include { validateSeqtools } from '../main'
 
-
 process file_smart_diff {
   container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
 
diff --git a/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl b/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl
new file mode 100644
index 0000000..d8c8234
--- /dev/null
+++ b/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl
@@ -0,0 +1,161 @@
+{
+  "tool": {
+    "name": "seq-tools",
+    "version": "1.2.4"
+  },
+  "metadata_file": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/6e/81222b6d743166c0774cbed4414ae5/local_copy",
+  "data_dir": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/6e/81222b6d743166c0774cbed4414ae5",
+  "started_at": "2023-02-07T20:21:57.058Z",
+  "ended_at": "2023-02-07T20:21:57.278Z",
+  "validation": {
+    "status": "PASS-with-WARNING-and-SKIPPED-check",
+    "message": "Please see individual checks for details",
+    "checks": [
+      {
+        "checker": "c110_rg_id_uniqueness",
+        "status": "PASS",
+        "message": "Read group ID uniqueness check status: PASS"
+      },
+      {
+        "checker": "c120_permissible_char_in_rg_id",
+        "status": "PASS",
+        "message": "Read group ID permissible character check status: PASS"
+      },
+      {
+        "checker": "c130_one_sample",
+        "status": "PASS",
+        "message": "One and only one sample check status: PASS"
+      },
+      {
+        "checker": "c140_platform_unit_uniqueness",
+        "status": "PASS",
+        "message": "Platform unit uniqueness check status: PASS"
+      },
+      {
+        "checker": "c150_rg_count_match",
+        "status": "PASS",
+        "message": "Read groups count check status: PASS"
+      },
+      {
+        "checker": "c160_file_r1_r2_check",
+        "status": "PASS",
+        "message": "Fields file_r1 and file_r2 check status: PASS"
+      },
+      {
+        "checker": "c170_fq_uniqueness_in_rgs",
+        "status": "PASS",
+        "message": "FASTQ uniqueness in read groups check status: PASS"
+      },
+      {
+        "checker": "c180_file_uniqueness",
+        "status": "PASS",
+        "message": "Files uniqueness check in files section status: PASS"
+      },
+      {
+        "checker": "c190_no_extra_files",
+        "status": "PASS",
+        "message": "No extra files check status: PASS"
+      },
+      {
+        "checker": "c200_rg_id_in_bam_uniqueness",
+        "status": "PASS",
+        "message": "'read_group_id_in_bam' uniqueness check status: PASS"
+      },
+      {
+        "checker": "c210_no_path_in_filename",
+        "status": "PASS",
+        "message": "No path in fileName check in 'files' section status: PASS"
+      },
+      {
+        "checker": "c220_no_rg_id_in_bam_for_fq",
+        "status": "PASS",
+        "message": "'read_group_id_in_bam' not populated for FASTQ check: PASS"
+      },
+      {
+        "checker": "c230_files_info_data_category",
+        "status": "PASS",
+        "message": "Field 'info.data_category' is found populated with 'Sequencing Reads'. Validation status: PASS"
+      },
+      {
+        "checker": "c240_submitter_rg_id_collide_with_rg_id_in_bam",
+        "status": "PASS",
+        "message": "For any read group, when 'read_group_id_in_bam' is not populated, 'submitter_read_group_id' must NOT be the same as 'read_group_id_in_bam' of another read group from the same BAM file. Validation result: PASS"
+      },
+      {
+        "checker": "c250_file_data_type",
+        "status": "PASS",
+        "message": "Field 'dataType' is found populated with 'Submitted Reads'. Validation status: PASS"
+      },
+      {
+        "checker": "c260_filename_pattern",
+        "status": "PASS",
+        "message": "'fileName' matches expected pattern '^[A-Za-z0-9]{1}[A-Za-z0-9_\\.\\-]*\\.(bam|fq\\.gz|fastq\\.gz|fq\\.bz2|fastq\\.bz2)$' in 'files' section. Validation status: PASS"
+      },
+      {
+        "checker": "c605_all_files_accessible",
+        "status": "PASS",
+        "message": "All data files accessible check: PASS"
+      },
+      {
+        "checker": "c608_bam_sanity",
+        "status": "PASS",
+        "message": "BAM sanity check by samtools quickcheck. Validation result: PASS"
+      },
+      {
+        "checker": "c609_fastq_sanity",
+        "status": "PASS",
+        "message": "No FASTQ Files to check"
+      },
+      {
+        "checker": "c610_rg_id_in_bam",
+        "status": "PASS",
+        "message": "Read group ID in BAM header check: PASS"
+      },
+      {
+        "checker": "c620_submitter_read_group_id_match",
+        "status": "PASS",
+        "message": "For each read group, when 'read_group_id_in_bam' is not provided, 'submitter_read_group_id' in the metadata JSON must match RG ID in BAM. Validation result: PASS"
+      },
+      {
+        "checker": "c630_rg_id_in_bam_match",
+        "status": "PASS",
+        "message": "'read_group_id_in_bam' in metadata matches RG ID in BAM check: PASS"
+      },
+      {
+        "checker": "c640_one_sm_in_bam_header",
+        "status": "PASS",
+        "message": "One and only one SM in @RG BAM header check: PASS"
+      },
+      {
+        "checker": "c650_sm_in_bam_matches_metadata",
+        "status": "WARNING",
+        "message": "SM in BAM header is empty. Validation status: WARNING. NOTE that submitterSampleId in metadata JSON will be used in the header of ARGO uniformly aligned sequences."
+      },
+      {
+        "checker": "c660_metadata_in_bam_rg_header",
+        "status": "WARNING",
+        "message": "Information (excluding ID and SM which are validated elsewhere) in BAM @RG header does NOT match experiment/read group info in the metadata JSON. NOTE that information in the metadata JSON document will be kept and used in ICGC ARGO uniform analysis while unmatched info in BAM header will be discarded. Details of the difference: [BAM anon_chr1_complete.bam @RG QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1 vs QCMG_22f321c6-ff3f-11e4-8e8b-f8a0800c69f0_130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:866d65b8-ff3f-11e4-b413-bdbd66be296d:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1 vs QCMG_866d65b8-ff3f-11e4-b413-bdbd66be296d_130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:91ce15f2-ff3e-11e4-9d73-85b485b025f8:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1 vs QCMG_91ce15f2-ff3e-11e4-9d73-85b485b025f8_130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:dd3f83b8-ff3e-11e4-81af-910d0943bdb6:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1 vs QCMG_dd3f83b8-ff3e-11e4-81af-910d0943bdb6_130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1_8043985)]"
+      },
+      {
+        "checker": "c670_rg_is_paired_in_bam",
+        "status": "PASS",
+        "message": "Read group pair status in BAM check: PASS"
+      },
+      {
+        "checker": "c680_repeated_read_names_per_group_in_bam",
+        "status": "PASS",
+        "message": "Repeated Read names within Read groups in BAM not found: PASS"
+      },
+      {
+        "checker": "c681_fileSize_match",
+        "status": "PASS",
+        "message": "The fileSize calculated from the sequencing files matches the info provided in metadata JSON: PASS"
+      },
+      {
+        "checker": "c683_fileMd5sum_match",
+        "status": "SKIPPED",
+        "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"
+      }
+    ]
+  }
+}
diff --git a/validate-seqtools/tests/test-job-bam.json b/validate-seqtools/tests/test-job-bam.json
index 17bddb6..696a4a8 100644
--- a/validate-seqtools/tests/test-job-bam.json
+++ b/validate-seqtools/tests/test-job-bam.json
@@ -1,5 +1,5 @@
 {
     "json_file": "input/anon_chr1_complete.json",
     "files": ["input/anon_chr1_complete.bam"],
-    "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING.jsonl"
-}
\ No newline at end of file
+    "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl"
+}

From 88b2c4763d4dcfb55940c4752563e26470086e42 Mon Sep 17 00:00:00 2001
From: edsu7 <22638361+edsu7@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:12:25 -0500
Subject: [PATCH 3/5] update build-test-release.yml

---
 .github/workflows/build-test-release.yml | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build-test-release.yml b/.github/workflows/build-test-release.yml
index 0ba985b..e5a5458 100644
--- a/.github/workflows/build-test-release.yml
+++ b/.github/workflows/build-test-release.yml
@@ -19,10 +19,13 @@ jobs:
     steps:
     - uses: actions/checkout@v2
 
-    - name: Set up Python 3.6
-      uses: actions/setup-python@v2
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
       with:
-        python-version: 3.6
+        python-version: "3.10"
+        architecture: "x64"
+      env:
+        AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache
 
     - name: Extract package name and version from branch name
       id: get_pkg_info
@@ -80,10 +83,13 @@ jobs:
     steps:
     - uses: actions/checkout@v2
 
-    - name: Set up Python 3.6
-      uses: actions/setup-python@v2
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
       with:
-        python-version: 3.6
+        python-version: "3.10"
+        architecture: "x64"
+      env:
+        AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache
 
     - name: Install dependencies
       run: |

From 8aae37100931b866e28704cac2e4d66753584c6c Mon Sep 17 00:00:00 2001
From: edsu7 <22638361+edsu7@users.noreply.github.com>
Date: Thu, 9 Feb 2023 15:58:01 -0500
Subject: [PATCH 4/5] bump `seq-tools` ver and add test scenario
 `test-job-bam-w-skips`

---
 validate-seqtools/Dockerfile                  |   3 +-
 validate-seqtools/main.nf                     |   8 +-
 validate-seqtools/main.py                     |   2 +-
 validate-seqtools/tests/checker.nf            |   8 +-
 ....PASS-with-WARNING-and-SKIPPED-check.jsonl | 162 +-----------------
 .../tests/test-job-bam-w-skips.json           |   6 +
 validate-seqtools/tests/test-job-bam.json     |   2 +-
 7 files changed, 21 insertions(+), 170 deletions(-)
 create mode 100644 validate-seqtools/tests/test-job-bam-w-skips.json

diff --git a/validate-seqtools/Dockerfile b/validate-seqtools/Dockerfile
index 5f8a7c9..b8a0361 100644
--- a/validate-seqtools/Dockerfile
+++ b/validate-seqtools/Dockerfile
@@ -37,8 +37,7 @@ RUN cd /tmp &&\
 
 # Install seq-tools
 
-#RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.4
-RUN pip install git+https://github.com/icgc-argo/seq-tools.git@add-parallel
+RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.4
 #RUN git clone https://github.com/icgc-argo/seq-tools.git@1.1.0 &&\
 #    git clone https://github.com/icgc-argo/seq-tools.git@1.1.0
 #    cd seq-tools &&\
diff --git a/validate-seqtools/main.nf b/validate-seqtools/main.nf
index 88871c6..7583315 100755
--- a/validate-seqtools/main.nf
+++ b/validate-seqtools/main.nf
@@ -46,7 +46,7 @@ params.publish_dir = ""  // set to empty string will disable publishDir
 
 // tool specific parmas go here, add / change as needed
 params.json_file = ""
-params.skippable_tests = ["c683","c685"]
+params.skippable_tests = []
 params.files = ""
 
 
@@ -60,6 +60,7 @@ process validateSeqtools {
   input:  // input, make update as needed
     path json_file
     path files
+    val skippable_tests
 
   output:  // output, make update as needed
     path "validation_report.*.jsonl", emit: validation_log
@@ -72,7 +73,7 @@ process validateSeqtools {
     cp ${json_file} local_copy
     python3 /tools/main.py \
       -j local_copy \
-      -k ${params.skippable_tests.join(" ")} \
+      -k ${skippable_tests.join(" ")} \
       -t ${params.cpus} \
       > seq-tools.log 2>&1
 
@@ -97,6 +98,7 @@ process validateSeqtools {
 workflow {
   validateSeqtools(
     file(params.json_file),
-    Channel.fromPath(params.files).collect()
+    Channel.fromPath(params.files).collect(),
+    params.skippable_tests
   )
 }
diff --git a/validate-seqtools/main.py b/validate-seqtools/main.py
index 6546341..6c431db 100755
--- a/validate-seqtools/main.py
+++ b/validate-seqtools/main.py
@@ -37,7 +37,7 @@ def main():
     parser = argparse.ArgumentParser(description='Tool: validate-seqtools')
     parser.add_argument('-j', '--json-file', dest='json_file', type=str,
                         help='JSON file containing molecular data to be validated', required=True)
-    parser.add_argument('-k', '--skippable_tests', dest='skippable_tests', nargs="+",default=[],
+    parser.add_argument('-k', '--skippable_tests', dest='skippable_tests', nargs="*",default=[],
                         help='Tests to skip')
     parser.add_argument('-t', '--threads', dest='threads', default=1,
                         help='threads to speed up operations')    
diff --git a/validate-seqtools/tests/checker.nf b/validate-seqtools/tests/checker.nf
index 770bb44..67bea51 100755
--- a/validate-seqtools/tests/checker.nf
+++ b/validate-seqtools/tests/checker.nf
@@ -42,6 +42,7 @@ params.container_registry = ""
 params.container_version = ""
 params.container = ""
 
+params.skippable_tests=[]
 
 include { validateSeqtools } from '../main'
 
@@ -70,11 +71,13 @@ workflow checker {
     input_json
     input_files
     expected_output
+    skippable_tests
 
   main:
     validateSeqtools(
       input_json,
-      input_files
+      input_files,
+      skippable_tests
     )
 
     file_smart_diff(
@@ -88,6 +91,7 @@ workflow {
   checker(
     file(params.json_file),
     Channel.fromPath(params.files).collect(),
-    file(params.expected_output)
+    file(params.expected_output),
+    params.skippable_tests
   )
 }
diff --git a/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl b/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl
index d8c8234..ea6be96 100644
--- a/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl
+++ b/validate-seqtools/tests/expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl
@@ -1,161 +1 @@
-{
-  "tool": {
-    "name": "seq-tools",
-    "version": "1.2.4"
-  },
-  "metadata_file": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/6e/81222b6d743166c0774cbed4414ae5/local_copy",
-  "data_dir": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/6e/81222b6d743166c0774cbed4414ae5",
-  "started_at": "2023-02-07T20:21:57.058Z",
-  "ended_at": "2023-02-07T20:21:57.278Z",
-  "validation": {
-    "status": "PASS-with-WARNING-and-SKIPPED-check",
-    "message": "Please see individual checks for details",
-    "checks": [
-      {
-        "checker": "c110_rg_id_uniqueness",
-        "status": "PASS",
-        "message": "Read group ID uniqueness check status: PASS"
-      },
-      {
-        "checker": "c120_permissible_char_in_rg_id",
-        "status": "PASS",
-        "message": "Read group ID permissible character check status: PASS"
-      },
-      {
-        "checker": "c130_one_sample",
-        "status": "PASS",
-        "message": "One and only one sample check status: PASS"
-      },
-      {
-        "checker": "c140_platform_unit_uniqueness",
-        "status": "PASS",
-        "message": "Platform unit uniqueness check status: PASS"
-      },
-      {
-        "checker": "c150_rg_count_match",
-        "status": "PASS",
-        "message": "Read groups count check status: PASS"
-      },
-      {
-        "checker": "c160_file_r1_r2_check",
-        "status": "PASS",
-        "message": "Fields file_r1 and file_r2 check status: PASS"
-      },
-      {
-        "checker": "c170_fq_uniqueness_in_rgs",
-        "status": "PASS",
-        "message": "FASTQ uniqueness in read groups check status: PASS"
-      },
-      {
-        "checker": "c180_file_uniqueness",
-        "status": "PASS",
-        "message": "Files uniqueness check in files section status: PASS"
-      },
-      {
-        "checker": "c190_no_extra_files",
-        "status": "PASS",
-        "message": "No extra files check status: PASS"
-      },
-      {
-        "checker": "c200_rg_id_in_bam_uniqueness",
-        "status": "PASS",
-        "message": "'read_group_id_in_bam' uniqueness check status: PASS"
-      },
-      {
-        "checker": "c210_no_path_in_filename",
-        "status": "PASS",
-        "message": "No path in fileName check in 'files' section status: PASS"
-      },
-      {
-        "checker": "c220_no_rg_id_in_bam_for_fq",
-        "status": "PASS",
-        "message": "'read_group_id_in_bam' not populated for FASTQ check: PASS"
-      },
-      {
-        "checker": "c230_files_info_data_category",
-        "status": "PASS",
-        "message": "Field 'info.data_category' is found populated with 'Sequencing Reads'. Validation status: PASS"
-      },
-      {
-        "checker": "c240_submitter_rg_id_collide_with_rg_id_in_bam",
-        "status": "PASS",
-        "message": "For any read group, when 'read_group_id_in_bam' is not populated, 'submitter_read_group_id' must NOT be the same as 'read_group_id_in_bam' of another read group from the same BAM file. Validation result: PASS"
-      },
-      {
-        "checker": "c250_file_data_type",
-        "status": "PASS",
-        "message": "Field 'dataType' is found populated with 'Submitted Reads'. Validation status: PASS"
-      },
-      {
-        "checker": "c260_filename_pattern",
-        "status": "PASS",
-        "message": "'fileName' matches expected pattern '^[A-Za-z0-9]{1}[A-Za-z0-9_\\.\\-]*\\.(bam|fq\\.gz|fastq\\.gz|fq\\.bz2|fastq\\.bz2)$' in 'files' section. Validation status: PASS"
-      },
-      {
-        "checker": "c605_all_files_accessible",
-        "status": "PASS",
-        "message": "All data files accessible check: PASS"
-      },
-      {
-        "checker": "c608_bam_sanity",
-        "status": "PASS",
-        "message": "BAM sanity check by samtools quickcheck. Validation result: PASS"
-      },
-      {
-        "checker": "c609_fastq_sanity",
-        "status": "PASS",
-        "message": "No FASTQ Files to check"
-      },
-      {
-        "checker": "c610_rg_id_in_bam",
-        "status": "PASS",
-        "message": "Read group ID in BAM header check: PASS"
-      },
-      {
-        "checker": "c620_submitter_read_group_id_match",
-        "status": "PASS",
-        "message": "For each read group, when 'read_group_id_in_bam' is not provided, 'submitter_read_group_id' in the metadata JSON must match RG ID in BAM. Validation result: PASS"
-      },
-      {
-        "checker": "c630_rg_id_in_bam_match",
-        "status": "PASS",
-        "message": "'read_group_id_in_bam' in metadata matches RG ID in BAM check: PASS"
-      },
-      {
-        "checker": "c640_one_sm_in_bam_header",
-        "status": "PASS",
-        "message": "One and only one SM in @RG BAM header check: PASS"
-      },
-      {
-        "checker": "c650_sm_in_bam_matches_metadata",
-        "status": "WARNING",
-        "message": "SM in BAM header is empty. Validation status: WARNING. NOTE that submitterSampleId in metadata JSON will be used in the header of ARGO uniformly aligned sequences."
-      },
-      {
-        "checker": "c660_metadata_in_bam_rg_header",
-        "status": "WARNING",
-        "message": "Information (excluding ID and SM which are validated elsewhere) in BAM @RG header does NOT match experiment/read group info in the metadata JSON. NOTE that information in the metadata JSON document will be kept and used in ICGC ARGO uniform analysis while unmatched info in BAM header will be discarded. Details of the difference: [BAM anon_chr1_complete.bam @RG QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1 vs QCMG_22f321c6-ff3f-11e4-8e8b-f8a0800c69f0_130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:866d65b8-ff3f-11e4-b413-bdbd66be296d:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1 vs QCMG_866d65b8-ff3f-11e4-b413-bdbd66be296d_130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:91ce15f2-ff3e-11e4-9d73-85b485b025f8:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1 vs QCMG_91ce15f2-ff3e-11e4-9d73-85b485b025f8_130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:dd3f83b8-ff3e-11e4-81af-910d0943bdb6:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1 vs QCMG_dd3f83b8-ff3e-11e4-81af-910d0943bdb6_130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1_8043985)]"
-      },
-      {
-        "checker": "c670_rg_is_paired_in_bam",
-        "status": "PASS",
-        "message": "Read group pair status in BAM check: PASS"
-      },
-      {
-        "checker": "c680_repeated_read_names_per_group_in_bam",
-        "status": "PASS",
-        "message": "Repeated Read names within Read groups in BAM not found: PASS"
-      },
-      {
-        "checker": "c681_fileSize_match",
-        "status": "PASS",
-        "message": "The fileSize calculated from the sequencing files matches the info provided in metadata JSON: PASS"
-      },
-      {
-        "checker": "c683_fileMd5sum_match",
-        "status": "SKIPPED",
-        "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"
-      }
-    ]
-  }
-}
+{"tool": {"name": "seq-tools", "version": "1.2.4"}, "metadata_file": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/7c/89eab7957d2922c8b8379b03dba2db/local_copy", "data_dir": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/7c/89eab7957d2922c8b8379b03dba2db", "started_at": "2023-02-09T20:43:17.966Z", "ended_at": "2023-02-09T20:43:18.170Z", "validation": {"status": "PASS-with-WARNING-and-SKIPPED-check", "message": "Please see individual checks for details", "checks": [{"checker": "c110_rg_id_uniqueness", "status": "PASS", "message": "Read group ID uniqueness check status: PASS"}, {"checker": "c120_permissible_char_in_rg_id", "status": "PASS", "message": "Read group ID permissible character check status: PASS"}, {"checker": "c130_one_sample", "status": "PASS", "message": "One and only one sample check status: PASS"}, {"checker": "c140_platform_unit_uniqueness", "status": "PASS", "message": "Platform unit uniqueness check status: PASS"}, {"checker": "c150_rg_count_match", "status": "PASS", "message": "Read groups count check status: PASS"}, {"checker": "c160_file_r1_r2_check", "status": "PASS", "message": "Fields file_r1 and file_r2 check status: PASS"}, {"checker": "c170_fq_uniqueness_in_rgs", "status": "PASS", "message": "FASTQ uniqueness in read groups check status: PASS"}, {"checker": "c180_file_uniqueness", "status": "PASS", "message": "Files uniqueness check in files section status: PASS"}, {"checker": "c190_no_extra_files", "status": "PASS", "message": "No extra files check status: PASS"}, {"checker": "c200_rg_id_in_bam_uniqueness", "status": "PASS", "message": "'read_group_id_in_bam' uniqueness check status: PASS"}, {"checker": "c210_no_path_in_filename", "status": "PASS", "message": "No path in fileName check in 'files' section status: PASS"}, {"checker": "c220_no_rg_id_in_bam_for_fq", "status": "PASS", "message": "'read_group_id_in_bam' not populated for FASTQ check: PASS"}, {"checker": "c230_files_info_data_category", "status": "PASS", "message": "Field 'info.data_category' is found populated with 'Sequencing Reads'. Validation status: PASS"}, {"checker": "c240_submitter_rg_id_collide_with_rg_id_in_bam", "status": "PASS", "message": "For any read group, when 'read_group_id_in_bam' is not populated, 'submitter_read_group_id' must NOT be the same as 'read_group_id_in_bam' of another read group from the same BAM file. Validation result: PASS"}, {"checker": "c250_file_data_type", "status": "PASS", "message": "Field 'dataType' is found populated with 'Submitted Reads'. Validation status: PASS"}, {"checker": "c260_filename_pattern", "status": "PASS", "message": "'fileName' matches expected pattern '^[A-Za-z0-9]{1}[A-Za-z0-9_\\.\\-]*\\.(bam|fq\\.gz|fastq\\.gz|fq\\.bz2|fastq\\.bz2)$' in 'files' section. Validation status: PASS"}, {"checker": "c605_all_files_accessible", "status": "PASS", "message": "All data files accessible check: PASS"}, {"checker": "c608_bam_sanity", "status": "PASS", "message": "BAM sanity check by samtools quickcheck. Validation result: PASS"}, {"checker": "c609_fastq_sanity", "status": "SKIPPED", "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"}, {"checker": "c610_rg_id_in_bam", "status": "PASS", "message": "Read group ID in BAM header check: PASS"}, {"checker": "c620_submitter_read_group_id_match", "status": "PASS", "message": "For each read group, when 'read_group_id_in_bam' is not provided, 'submitter_read_group_id' in the metadata JSON must match RG ID in BAM. Validation result: PASS"}, {"checker": "c630_rg_id_in_bam_match", "status": "PASS", "message": "'read_group_id_in_bam' in metadata matches RG ID in BAM check: PASS"}, {"checker": "c640_one_sm_in_bam_header", "status": "PASS", "message": "One and only one SM in @RG BAM header check: PASS"}, {"checker": "c650_sm_in_bam_matches_metadata", "status": "WARNING", "message": "SM in BAM header is empty. Validation status: WARNING. NOTE that submitterSampleId in metadata JSON will be used in the header of ARGO uniformly aligned sequences."}, {"checker": "c660_metadata_in_bam_rg_header", "status": "WARNING", "message": "Information (excluding ID and SM which are validated elsewhere) in BAM @RG header does NOT match experiment/read group info in the metadata JSON. NOTE that information in the metadata JSON document will be kept and used in ICGC ARGO uniform analysis while unmatched info in BAM header will be discarded. Details of the difference: [BAM anon_chr1_complete.bam @RG QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1 vs QCMG_22f321c6-ff3f-11e4-8e8b-f8a0800c69f0_130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:866d65b8-ff3f-11e4-b413-bdbd66be296d:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1 vs QCMG_866d65b8-ff3f-11e4-b413-bdbd66be296d_130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:91ce15f2-ff3e-11e4-9d73-85b485b025f8:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1 vs QCMG_91ce15f2-ff3e-11e4-9d73-85b485b025f8_130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:dd3f83b8-ff3e-11e4-81af-910d0943bdb6:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1 vs QCMG_dd3f83b8-ff3e-11e4-81af-910d0943bdb6_130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1_8043985)]"}, {"checker": "c670_rg_is_paired_in_bam", "status": "PASS", "message": "Read group pair status in BAM check: PASS"}, {"checker": "c680_repeated_read_names_per_group_in_bam", "status": "PASS", "message": "Repeated Read names within Read groups in BAM not found: PASS"}, {"checker": "c681_fileSize_match", "status": "SKIPPED", "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"}, {"checker": "c683_fileMd5sum_match", "status": "SKIPPED", "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"}]}}
diff --git a/validate-seqtools/tests/test-job-bam-w-skips.json b/validate-seqtools/tests/test-job-bam-w-skips.json
new file mode 100644
index 0000000..2593150
--- /dev/null
+++ b/validate-seqtools/tests/test-job-bam-w-skips.json
@@ -0,0 +1,6 @@
+{
+    "json_file": "input/anon_chr1_complete.json",
+    "files": ["input/anon_chr1_complete.bam"],
+    "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl",
+    "skippable_tests" : ["c681","c683","c609"]
+}
diff --git a/validate-seqtools/tests/test-job-bam.json b/validate-seqtools/tests/test-job-bam.json
index 696a4a8..0a7700f 100644
--- a/validate-seqtools/tests/test-job-bam.json
+++ b/validate-seqtools/tests/test-job-bam.json
@@ -1,5 +1,5 @@
 {
     "json_file": "input/anon_chr1_complete.json",
     "files": ["input/anon_chr1_complete.bam"],
-    "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl"
+    "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING.jsonl",
 }

From ef2d8a0dd3458098bfd2bbaff57836a10e1553ee Mon Sep 17 00:00:00 2001
From: Linda Xiang <linda.xiang@oicr.on.ca>
Date: Fri, 10 Feb 2023 10:56:54 -0500
Subject: [PATCH 5/5] print error message when providing unskippable checks

---
 validate-seqtools/main.nf                 | 24 +++++++++++++----------
 validate-seqtools/tests/test-job-bam.json |  2 +-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/validate-seqtools/main.nf b/validate-seqtools/main.nf
index 7583315..c06d013 100755
--- a/validate-seqtools/main.nf
+++ b/validate-seqtools/main.nf
@@ -77,17 +77,21 @@ process validateSeqtools {
       -t ${params.cpus} \
       > seq-tools.log 2>&1
 
-    if ls validation_report.INVALID*.jsonl 1> /dev/null 2>&1; then     
-      echo "Payload is INVALID. Please check out details in validation report under: "
-      pwd
-      exit 1
-    elif ls validation_report.UNKNOWN*.jsonl 1> /dev/null 2>&1;
-    then
-      echo "Payload is UNKNOWN. Please check out details in validation report under: "
-      pwd
-      exit 1
+    if ls validation_report.*.jsonl 1> /dev/null 2>&1; then
+      if ls validation_report.INVALID*.jsonl 1> /dev/null 2>&1; then     
+        echo "Payload is INVALID. Please check out details in validation report under: "
+        pwd
+        exit 1
+      elif ls validation_report.UNKNOWN*.jsonl 1> /dev/null 2>&1;
+      then
+        echo "Payload is UNKNOWN. Please check out details in validation report under: "
+        pwd
+        exit 1
+      else
+        echo 0
+      fi
     else
-      echo 0
+      cat seq-tools.log && exit 1
     fi
     """
 }
diff --git a/validate-seqtools/tests/test-job-bam.json b/validate-seqtools/tests/test-job-bam.json
index 0a7700f..ce7d419 100644
--- a/validate-seqtools/tests/test-job-bam.json
+++ b/validate-seqtools/tests/test-job-bam.json
@@ -1,5 +1,5 @@
 {
     "json_file": "input/anon_chr1_complete.json",
     "files": ["input/anon_chr1_complete.bam"],
-    "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING.jsonl",
+    "expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING.jsonl"
 }