Skip to content

Commit

Permalink
Merge pull request #67 from icgc-argo/validate-seqtools@0.1.6
Browse files Browse the repository at this point in the history
[release]
  • Loading branch information
edsu7 authored Feb 10, 2023
2 parents b36b549 + ef2d8a0 commit 77b3cff
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 24 deletions.
3 changes: 1 addition & 2 deletions validate-seqtools/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ RUN cd /tmp &&\

# Install seq-tools

RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.3

RUN pip install git+https://github.com/icgc-argo/seq-tools.git@1.2.4
#RUN git clone https://github.com/icgc-argo/seq-tools.git@1.1.0 &&\
# git clone https://github.com/icgc-argo/seq-tools.git@1.1.0
# cd seq-tools &&\
Expand Down
32 changes: 22 additions & 10 deletions validate-seqtools/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
/* this block is auto-generated based on info from pkg.json where */
/* changes can be made if needed, do NOT modify this block manually */
nextflow.enable.dsl = 2
version = '0.1.5'
version = '0.1.6'

container = [
'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.validate-seqtools'
Expand All @@ -46,7 +46,7 @@ params.publish_dir = "" // set to empty string will disable publishDir

// tool specific parmas go here, add / change as needed
params.json_file = ""
params.skip_md5sum_check = false
params.skippable_tests = []
params.files = ""


Expand All @@ -60,27 +60,38 @@ process validateSeqtools {
input: // input, make update as needed
path json_file
path files
val skippable_tests

output: // output, make update as needed
path "validation_report.*.jsonl", emit: validation_log
path "local_copy", emit: validated_payload

script:
// add and initialize variables here as needed
args_skip_md5sum_check = params.skip_md5sum_check ? "--skip_md5sum_check " : ""

"""
cp ${json_file} local_copy
python3 /tools/main.py \
-j local_copy \
${args_skip_md5sum_check} \
-k ${skippable_tests.join(" ")} \
-t ${params.cpus} \
> seq-tools.log 2>&1
if ls validation_report.INVALID*.jsonl 1> /dev/null 2>&1; then
echo "Payload is INVALID. Please check out details in validation report under: "
pwd
exit 1
if ls validation_report.*.jsonl 1> /dev/null 2>&1; then
if ls validation_report.INVALID*.jsonl 1> /dev/null 2>&1; then
echo "Payload is INVALID. Please check out details in validation report under: "
pwd
exit 1
elif ls validation_report.UNKNOWN*.jsonl 1> /dev/null 2>&1;
then
echo "Payload is UNKNOWN. Please check out details in validation report under: "
pwd
exit 1
else
echo 0
fi
else
exit 0
cat seq-tools.log && exit 1
fi
"""
}
Expand All @@ -91,6 +102,7 @@ process validateSeqtools {
workflow {
validateSeqtools(
file(params.json_file),
Channel.fromPath(params.files).collect()
Channel.fromPath(params.files).collect(),
params.skippable_tests
)
}
18 changes: 12 additions & 6 deletions validate-seqtools/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,21 @@ def main():
parser = argparse.ArgumentParser(description='Tool: validate-seqtools')
parser.add_argument('-j', '--json-file', dest='json_file', type=str,
help='JSON file containing molecular data to be validated', required=True)
parser.add_argument('-k', '--skip_md5sum_check', dest='skip_md5sum', action='store_true',
help='JSON file containing molecular data to be validated')
parser.add_argument('-k', '--skippable_tests', dest='skippable_tests', nargs="*",default=[],
help='Tests to skip')
parser.add_argument('-t', '--threads', dest='threads', default=1,
help='threads to speed up operations')
args = parser.parse_args()

# Check if successful
if args.skip_md5sum :
cmd="seq-tools validate "+args.json_file+" --skip_md5sum_check"
else:
cmd="seq-tools validate "+args.json_file
cmd="seq-tools validate "+args.json_file

if args.skippable_tests:
for test in args.skippable_tests:
cmd+=" -k "+test
if args.threads:
cmd+=" -t "+str(args.threads)

result=subprocess.run(cmd,shell=True)


Expand Down
2 changes: 1 addition & 1 deletion validate-seqtools/pkg.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "validate-seqtools",
"version": "0.1.5",
"version": "0.1.6",
"description": "Using Seq-tools, validates molecular",
"main": "main.nf",
"deprecated": false,
Expand Down
11 changes: 7 additions & 4 deletions validate-seqtools/tests/checker.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
/* this block is auto-generated based on info from pkg.json where */
/* changes can be made if needed, do NOT modify this block manually */
nextflow.enable.dsl = 2
version = '0.1.5'
version = '0.1.6'

container = [
'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.validate-seqtools'
Expand All @@ -42,10 +42,10 @@ params.container_registry = ""
params.container_version = ""
params.container = ""

params.skippable_tests=[]

include { validateSeqtools } from '../main'


process file_smart_diff {
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"

Expand All @@ -71,11 +71,13 @@ workflow checker {
input_json
input_files
expected_output
skippable_tests

main:
validateSeqtools(
input_json,
input_files
input_files,
skippable_tests
)

file_smart_diff(
Expand All @@ -89,6 +91,7 @@ workflow {
checker(
file(params.json_file),
Channel.fromPath(params.files).collect(),
file(params.expected_output)
file(params.expected_output),
params.skippable_tests
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"tool": {"name": "seq-tools", "version": "1.2.4"}, "metadata_file": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/7c/89eab7957d2922c8b8379b03dba2db/local_copy", "data_dir": "/Users/esu/Desktop/GitHub/icgc-argo/argo-data-submission/validate-seqtools/tests/work/7c/89eab7957d2922c8b8379b03dba2db", "started_at": "2023-02-09T20:43:17.966Z", "ended_at": "2023-02-09T20:43:18.170Z", "validation": {"status": "PASS-with-WARNING-and-SKIPPED-check", "message": "Please see individual checks for details", "checks": [{"checker": "c110_rg_id_uniqueness", "status": "PASS", "message": "Read group ID uniqueness check status: PASS"}, {"checker": "c120_permissible_char_in_rg_id", "status": "PASS", "message": "Read group ID permissible character check status: PASS"}, {"checker": "c130_one_sample", "status": "PASS", "message": "One and only one sample check status: PASS"}, {"checker": "c140_platform_unit_uniqueness", "status": "PASS", "message": "Platform unit uniqueness check status: PASS"}, {"checker": "c150_rg_count_match", "status": "PASS", "message": "Read groups count check status: PASS"}, {"checker": "c160_file_r1_r2_check", "status": "PASS", "message": "Fields file_r1 and file_r2 check status: PASS"}, {"checker": "c170_fq_uniqueness_in_rgs", "status": "PASS", "message": "FASTQ uniqueness in read groups check status: PASS"}, {"checker": "c180_file_uniqueness", "status": "PASS", "message": "Files uniqueness check in files section status: PASS"}, {"checker": "c190_no_extra_files", "status": "PASS", "message": "No extra files check status: PASS"}, {"checker": "c200_rg_id_in_bam_uniqueness", "status": "PASS", "message": "'read_group_id_in_bam' uniqueness check status: PASS"}, {"checker": "c210_no_path_in_filename", "status": "PASS", "message": "No path in fileName check in 'files' section status: PASS"}, {"checker": "c220_no_rg_id_in_bam_for_fq", "status": "PASS", "message": "'read_group_id_in_bam' not populated for FASTQ check: PASS"}, {"checker": "c230_files_info_data_category", "status": "PASS", "message": "Field 'info.data_category' is found populated with 'Sequencing Reads'. Validation status: PASS"}, {"checker": "c240_submitter_rg_id_collide_with_rg_id_in_bam", "status": "PASS", "message": "For any read group, when 'read_group_id_in_bam' is not populated, 'submitter_read_group_id' must NOT be the same as 'read_group_id_in_bam' of another read group from the same BAM file. Validation result: PASS"}, {"checker": "c250_file_data_type", "status": "PASS", "message": "Field 'dataType' is found populated with 'Submitted Reads'. Validation status: PASS"}, {"checker": "c260_filename_pattern", "status": "PASS", "message": "'fileName' matches expected pattern '^[A-Za-z0-9]{1}[A-Za-z0-9_\\.\\-]*\\.(bam|fq\\.gz|fastq\\.gz|fq\\.bz2|fastq\\.bz2)$' in 'files' section. Validation status: PASS"}, {"checker": "c605_all_files_accessible", "status": "PASS", "message": "All data files accessible check: PASS"}, {"checker": "c608_bam_sanity", "status": "PASS", "message": "BAM sanity check by samtools quickcheck. Validation result: PASS"}, {"checker": "c609_fastq_sanity", "status": "SKIPPED", "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"}, {"checker": "c610_rg_id_in_bam", "status": "PASS", "message": "Read group ID in BAM header check: PASS"}, {"checker": "c620_submitter_read_group_id_match", "status": "PASS", "message": "For each read group, when 'read_group_id_in_bam' is not provided, 'submitter_read_group_id' in the metadata JSON must match RG ID in BAM. Validation result: PASS"}, {"checker": "c630_rg_id_in_bam_match", "status": "PASS", "message": "'read_group_id_in_bam' in metadata matches RG ID in BAM check: PASS"}, {"checker": "c640_one_sm_in_bam_header", "status": "PASS", "message": "One and only one SM in @RG BAM header check: PASS"}, {"checker": "c650_sm_in_bam_matches_metadata", "status": "WARNING", "message": "SM in BAM header is empty. Validation status: WARNING. NOTE that submitterSampleId in metadata JSON will be used in the header of ARGO uniformly aligned sequences."}, {"checker": "c660_metadata_in_bam_rg_header", "status": "WARNING", "message": "Information (excluding ID and SM which are validated elsewhere) in BAM @RG header does NOT match experiment/read group info in the metadata JSON. NOTE that information in the metadata JSON document will be kept and used in ICGC ARGO uniform analysis while unmatched info in BAM header will be discarded. Details of the difference: [BAM anon_chr1_complete.bam @RG QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1 vs QCMG_22f321c6-ff3f-11e4-8e8b-f8a0800c69f0_130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:866d65b8-ff3f-11e4-b413-bdbd66be296d:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1 vs QCMG_866d65b8-ff3f-11e4-b413-bdbd66be296d_130711_7001243_0176_BD2B86ACXX.lane_8.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:91ce15f2-ff3e-11e4-9d73-85b485b025f8:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1 vs QCMG_91ce15f2-ff3e-11e4-9d73-85b485b025f8_130711_7001243_0176_BD2B86ACXX.lane_5.CTTGTA.1_8043985)]; [BAM anon_chr1_complete.bam @RG QCMG:dd3f83b8-ff3e-11e4-81af-910d0943bdb6:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1: (PU: QCMG:130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1 vs QCMG_dd3f83b8-ff3e-11e4-81af-910d0943bdb6_130711_7001243_0176_BD2B86ACXX.lane_6.CTTGTA.1_8043985)]"}, {"checker": "c670_rg_is_paired_in_bam", "status": "PASS", "message": "Read group pair status in BAM check: PASS"}, {"checker": "c680_repeated_read_names_per_group_in_bam", "status": "PASS", "message": "Repeated Read names within Read groups in BAM not found: PASS"}, {"checker": "c681_fileSize_match", "status": "SKIPPED", "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"}, {"checker": "c683_fileMd5sum_match", "status": "SKIPPED", "message": "This check was not performed as instructed by the command line option. Status: SKIPPED"}]}}
6 changes: 6 additions & 0 deletions validate-seqtools/tests/test-job-bam-w-skips.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"json_file": "input/anon_chr1_complete.json",
"files": ["input/anon_chr1_complete.bam"],
"expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING-and-SKIPPED-check.jsonl",
"skippable_tests" : ["c681","c683","c609"]
}
2 changes: 1 addition & 1 deletion validate-seqtools/tests/test-job-bam.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"json_file": "input/anon_chr1_complete.json",
"files": ["input/anon_chr1_complete.bam"],
"expected_output": "expected/anon_chr1_rnaseq.validation_report.PASS-with-WARNING.jsonl"
}
}

0 comments on commit 77b3cff

Please sign in to comment.