-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10 from icgc-argo/differentiate-json@0.1.0
[release]
- Loading branch information
Showing
17 changed files
with
745 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
.gitignore | ||
.nextflow* | ||
tests | ||
work | ||
outdir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
FROM ubuntu:18.04 | ||
|
||
LABEL org.opencontainers.image.source https://github.com/icgc-argo/argo-data-submission | ||
LABEL org.opencontainers.image.authors Edmund Su (edmund.su@oicr.on.ca) | ||
LABEL org.opencontainers.image.title ICGC ARGO Data download image | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y python3-pip python3-dev software-properties-common curl && \ | ||
apt-get install -y libz-dev pkg-config libtool m4 autotools-dev automake libncurses5-dev libbz2-dev liblzma-dev | ||
|
||
RUN pip3 install numpy | ||
|
||
RUN mkdir /tools | ||
|
||
ENV PATH="/tools:${PATH}" | ||
|
||
COPY *.py /tools/ | ||
|
||
RUN groupadd -g 1000 ubuntu &&\ | ||
useradd -l -u 1000 -g ubuntu ubuntu &&\ | ||
install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu | ||
|
||
USER ubuntu | ||
|
||
ENTRYPOINT ["/usr/bin/env"] | ||
|
||
CMD ["/bin/bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Package differentiate-json | ||
|
||
|
||
Compares User provided JSON against Pipeline's generate JSON denoting differences. Uses the `auto_generated.json`'s expected fields to compare in user provided | ||
|
||
## Inputs | ||
|
||
Two JSON files. | ||
|
||
|
||
## Outputs | ||
|
||
#### Successful instance | ||
Step succeeds and no `ERRORS.log` is generated | ||
|
||
#### Example of ERRORS.log | ||
``` | ||
Differing values found when comparing 'samples/donor/submitterDonorId' : user - EVIL_TEST_DONOR vs auto_gen - TEST_DONOR | ||
Differing values found when comparing 'read_groups/read_group_id_in_bam' : user - QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.GCACAG.1 vs auto_gen - QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1 | ||
'read_groups/read_length_r1' not found in user generated JSON | ||
'read_groups/read_length_r2' not found in user generated JSON | ||
``` | ||
|
||
#### Test run | ||
`nextflow run checker.nf -params-file test-job-decrypt.json` | ||
|
||
#### IRL run | ||
``` | ||
nextflow run main.nf -params-file tests/test-job-decrypt.json | ||
``` | ||
|
||
|
||
## Usage | ||
|
||
### Run the package directly | ||
|
||
With inputs prepared, you should be able to run the package directly using the following command. | ||
Please replace the params file with a real one (with all required parameters and input files). Example | ||
params file(s) can be found in the `tests` folder. | ||
|
||
``` | ||
nextflow run icgc-argo/argo-data-submission/differentiate-json/main.nf -r differentiate-json.v0.1.0 -params-file <your-params-json-file> | ||
``` | ||
|
||
### Import the package as a dependency | ||
|
||
To import this package into another package as a dependency, please follow these steps at the | ||
importing package side: | ||
|
||
1. add this package's URI `github.com/icgc-argo/argo-data-submission/differentiate-json@0.1.0` in the `dependencies` list of the `pkg.json` file | ||
2. run `wfpm install` to install the dependency | ||
3. add the `include` statement in the main Nextflow script to import the dependent package from this path: `./wfpr_modules/github.com/icgc-argo/argo-data-submission/differentiate-json@0.1.0/main.nf` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
/* | ||
Copyright (C) 2022, icgc-argo | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU Affero General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU Affero General Public License for more details. | ||
You should have received a copy of the GNU Affero General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
Authors: | ||
Edmund Su | ||
*/ | ||
|
||
/********************************************************************/ | ||
/* this block is auto-generated based on info from pkg.json where */ | ||
/* changes can be made if needed, do NOT modify this block manually */ | ||
nextflow.enable.dsl = 2 | ||
version = '0.1.0' // package version | ||
|
||
container = [ | ||
'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.differentiate-json' | ||
] | ||
default_container_registry = 'ghcr.io' | ||
/********************************************************************/ | ||
|
||
|
||
// universal params go here | ||
params.container_registry = "" | ||
params.container_version = "" | ||
params.container = "" | ||
|
||
params.cpus = 1 | ||
params.mem = 1 // GB | ||
params.publish_dir = "" // set to empty string will disable publishDir | ||
|
||
|
||
// tool specific parmas go here, add / change as needed | ||
params.user_generated_json="NO_FILE" | ||
params.auto_generated_json="NO_FILE" | ||
|
||
process differentiateJson { | ||
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" | ||
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir ? true : false | ||
|
||
cpus params.cpus | ||
memory "${params.mem} GB" | ||
|
||
input: // input, make update as needed | ||
path user_generated_json | ||
path auto_generated_json | ||
|
||
script: | ||
""" | ||
python3.6 /tools/main.py \\ | ||
-a ${user_generated_json} \\ | ||
-b ${auto_generated_json} | ||
""" | ||
} | ||
|
||
|
||
|
||
|
||
// this provides an entry point for this main script, so it can be run directly without clone the repo | ||
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx | ||
workflow { | ||
differentiateJson( | ||
file(params.user_generated_json), | ||
file(params.auto_generated_json) | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
Copyright (C) 2022, icgc-argo | ||
This program is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU Affero General Public License as published by | ||
the Free Software Foundation, either version 3 of the License, or | ||
(at your option) any later version. | ||
This program is distributed in the hope that it will be useful, | ||
but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
GNU Affero General Public License for more details. | ||
You should have received a copy of the GNU Affero General Public License | ||
along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
Authors: | ||
Edmund Su | ||
""" | ||
|
||
import os | ||
import sys | ||
import argparse | ||
import json | ||
import numpy | ||
|
||
|
||
def main(): | ||
""" | ||
Python implementation of tool: differentiate-json | ||
This is auto-generated Python code, please update as needed! | ||
""" | ||
|
||
parser = argparse.ArgumentParser(description='differentiate JSON metadata payload for SONG upload') | ||
parser.add_argument('-b', '--auto_generated', dest="auto_generated", help="auto generated json", required=True) | ||
parser.add_argument('-a', '--user_provided',dest="user_provided", help="user generated json", required=True) | ||
|
||
results = parser.parse_args() | ||
|
||
with open(results.auto_generated) as json_file: | ||
ag_dict = json.load(json_file) | ||
with open(results.user_provided) as json_file: | ||
up_dict = json.load(json_file) | ||
|
||
warnings=[] | ||
errors=[] | ||
dummy_nested_key=[] | ||
exceptions=[ | ||
"sample_barcode", | ||
"insert_size", | ||
"submitter_read_group_id", | ||
"legacyAnalysisId" | ||
] | ||
check_values(up_dict,ag_dict,warnings,errors,exceptions,[]) | ||
|
||
if len(warnings)>0: | ||
with open('WARNINGS.log', 'w') as f: | ||
for warning in warnings: | ||
f.write(warning+"\n") | ||
|
||
if len(errors)>0: | ||
with open('ERRORS.log', 'w') as f: | ||
for error in errors: | ||
f.write(error+"\n") | ||
raise ValueError(str(len(errors))+" errors detected. Please refer to ERRORS.log" ) | ||
|
||
|
||
|
||
def check_values(json_a,json_b,warnings:list,errors:list,exceptions:list,nested_key:None): | ||
for key in json_b: | ||
nested_key.append(key) | ||
|
||
###Check if key is an exception | ||
if key in exceptions: | ||
nested_key.pop() | ||
continue | ||
|
||
###Check if key is missing from auto | ||
if key not in json_a: | ||
msg="'"+"/".join(nested_key)+"' not found in user generated JSON" | ||
errors.append(msg) | ||
nested_key.pop() | ||
continue | ||
|
||
###If key object is dictionary result in recursion | ||
elif type(json_a[key])==dict: | ||
check_values(json_a[key],json_b[key],warnings,errors,exceptions,nested_key) | ||
|
||
###If key object is list : | ||
elif type(json_a[key])==list: | ||
|
||
###Check list lenght User vs Auto | ||
if len(json_a[key])!=len(json_b[key]): | ||
msg="Differing "+"/".join(nested_key)+" list length found in ' : user - "+str(len(json_a[key]))+" vs auto_gen - "+str(len(json_b[key])) | ||
errors.append(msg) | ||
nested_key.pop() | ||
continue | ||
|
||
###Check per ele entry in list | ||
for entry in enumerate(json_b[key]): | ||
|
||
###If key object ele is dictionary result in recursion | ||
if type(entry[1])==dict: | ||
check_values(json_a[key][entry[0]],json_b[key][entry[0]],warnings,errors,exceptions,nested_key) | ||
else: | ||
if json_a[key][entry[0]]!=json_b[key][entry[0]] and json_b[key][entry[0]] !=None: | ||
msg="Differing values found when comparing'"+"/".join(nested_key)+"' : user - "+str(json_a[key][entry[0]])+" vs auto_gen - "+str(json_b[key][entry[0]]) | ||
errors.append(msg) | ||
nested_key.pop(-1) | ||
continue | ||
|
||
|
||
if json_a[key]!=json_b[key] and json_b[key]!=None and type(json_a[key])!=dict and type(json_a[key])!=list: | ||
msg="Differing values found when comparing '"+"/".join(nested_key)+"' : user - "+str(json_a[key])+" vs auto_gen - "+str(json_b[key]) | ||
errors.append(msg) | ||
nested_key.pop() | ||
continue | ||
|
||
nested_key.pop() | ||
|
||
return(warnings,errors) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
docker { | ||
enabled = true | ||
runOptions = '-u \$(id -u):\$(id -g)' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
{ | ||
"name": "differentiate-json", | ||
"version": "0.1.0", | ||
"description": "Compares user provided JSON against pipeline autogenerated JSON", | ||
"main": "main.nf", | ||
"deprecated": false, | ||
"keywords": [ | ||
"bioinformatics" | ||
], | ||
"repository": { | ||
"type": "git", | ||
"url": "https://github.com/icgc-argo/argo-data-submission.git" | ||
}, | ||
"container": { | ||
"registries": [ | ||
{ | ||
"registry": "ghcr.io", | ||
"type": "docker", | ||
"org": "icgc-argo", | ||
"default": true | ||
} | ||
] | ||
}, | ||
"dependencies": [], | ||
"devDependencies": [], | ||
"contributors": [ | ||
{ | ||
"name": "Edmund Su", | ||
"email": "edmund.su@oicr.on.ca" | ||
} | ||
], | ||
"license": "GNU Affero General Public License v3", | ||
"bugReport": "https://github.com/icgc-argo/argo-data-submission/issues", | ||
"homepage": "https://github.com/icgc-argo/argo-data-submission#readme" | ||
} |
Oops, something went wrong.