Skip to content

Commit

Permalink
Merge pull request #10 from icgc-argo/differentiate-json@0.1.0
Browse files Browse the repository at this point in the history
[release]
  • Loading branch information
edsu7 authored Jun 23, 2022
2 parents caeafa0 + b4308ce commit a5829a4
Show file tree
Hide file tree
Showing 17 changed files with 745 additions and 0 deletions.
5 changes: 5 additions & 0 deletions differentiate-json/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.gitignore
.nextflow*
tests
work
outdir
27 changes: 27 additions & 0 deletions differentiate-json/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM ubuntu:18.04

LABEL org.opencontainers.image.source https://github.com/icgc-argo/argo-data-submission
LABEL org.opencontainers.image.authors Edmund Su (edmund.su@oicr.on.ca)
LABEL org.opencontainers.image.title ICGC ARGO Data download image

RUN apt-get update && \
apt-get install -y python3-pip python3-dev software-properties-common curl && \
apt-get install -y libz-dev pkg-config libtool m4 autotools-dev automake libncurses5-dev libbz2-dev liblzma-dev

RUN pip3 install numpy

RUN mkdir /tools

ENV PATH="/tools:${PATH}"

COPY *.py /tools/

RUN groupadd -g 1000 ubuntu &&\
useradd -l -u 1000 -g ubuntu ubuntu &&\
install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu

USER ubuntu

ENTRYPOINT ["/usr/bin/env"]

CMD ["/bin/bash"]
52 changes: 52 additions & 0 deletions differentiate-json/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Package differentiate-json


Compares User provided JSON against Pipeline's generate JSON denoting differences. Uses the `auto_generated.json`'s expected fields to compare in user provided

## Inputs

Two JSON files.


## Outputs

#### Successful instance
Step succeeds and no `ERRORS.log` is generated

#### Example of ERRORS.log
```
Differing values found when comparing 'samples/donor/submitterDonorId' : user - EVIL_TEST_DONOR vs auto_gen - TEST_DONOR
Differing values found when comparing 'read_groups/read_group_id_in_bam' : user - QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.GCACAG.1 vs auto_gen - QCMG:22f321c6-ff3f-11e4-8e8b-f8a0800c69f0:130711_7001243_0176_BD2B86ACXX.lane_7.CTTGTA.1
'read_groups/read_length_r1' not found in user generated JSON
'read_groups/read_length_r2' not found in user generated JSON
```

#### Test run
`nextflow run checker.nf -params-file test-job-decrypt.json`

#### IRL run
```
nextflow run main.nf -params-file tests/test-job-decrypt.json
```


## Usage

### Run the package directly

With inputs prepared, you should be able to run the package directly using the following command.
Please replace the params file with a real one (with all required parameters and input files). Example
params file(s) can be found in the `tests` folder.

```
nextflow run icgc-argo/argo-data-submission/differentiate-json/main.nf -r differentiate-json.v0.1.0 -params-file <your-params-json-file>
```

### Import the package as a dependency

To import this package into another package as a dependency, please follow these steps at the
importing package side:

1. add this package's URI `github.com/icgc-argo/argo-data-submission/differentiate-json@0.1.0` in the `dependencies` list of the `pkg.json` file
2. run `wfpm install` to install the dependency
3. add the `include` statement in the main Nextflow script to import the dependent package from this path: `./wfpr_modules/github.com/icgc-argo/argo-data-submission/differentiate-json@0.1.0/main.nf`
79 changes: 79 additions & 0 deletions differentiate-json/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env nextflow

/*
Copyright (C) 2022, icgc-argo
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Authors:
Edmund Su
*/

/********************************************************************/
/* this block is auto-generated based on info from pkg.json where */
/* changes can be made if needed, do NOT modify this block manually */
nextflow.enable.dsl = 2
version = '0.1.0' // package version

container = [
'ghcr.io': 'ghcr.io/icgc-argo/argo-data-submission.differentiate-json'
]
default_container_registry = 'ghcr.io'
/********************************************************************/


// universal params go here
params.container_registry = ""
params.container_version = ""
params.container = ""

params.cpus = 1
params.mem = 1 // GB
params.publish_dir = "" // set to empty string will disable publishDir


// tool specific parmas go here, add / change as needed
params.user_generated_json="NO_FILE"
params.auto_generated_json="NO_FILE"

process differentiateJson {
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir ? true : false

cpus params.cpus
memory "${params.mem} GB"

input: // input, make update as needed
path user_generated_json
path auto_generated_json

script:
"""
python3.6 /tools/main.py \\
-a ${user_generated_json} \\
-b ${auto_generated_json}
"""
}




// this provides an entry point for this main script, so it can be run directly without clone the repo
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
workflow {
differentiateJson(
file(params.user_generated_json),
file(params.auto_generated_json)
)
}
128 changes: 128 additions & 0 deletions differentiate-json/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Copyright (C) 2022, icgc-argo
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Authors:
Edmund Su
"""

import os
import sys
import argparse
import json
import numpy


def main():
"""
Python implementation of tool: differentiate-json
This is auto-generated Python code, please update as needed!
"""

parser = argparse.ArgumentParser(description='differentiate JSON metadata payload for SONG upload')
parser.add_argument('-b', '--auto_generated', dest="auto_generated", help="auto generated json", required=True)
parser.add_argument('-a', '--user_provided',dest="user_provided", help="user generated json", required=True)

results = parser.parse_args()

with open(results.auto_generated) as json_file:
ag_dict = json.load(json_file)
with open(results.user_provided) as json_file:
up_dict = json.load(json_file)

warnings=[]
errors=[]
dummy_nested_key=[]
exceptions=[
"sample_barcode",
"insert_size",
"submitter_read_group_id",
"legacyAnalysisId"
]
check_values(up_dict,ag_dict,warnings,errors,exceptions,[])

if len(warnings)>0:
with open('WARNINGS.log', 'w') as f:
for warning in warnings:
f.write(warning+"\n")

if len(errors)>0:
with open('ERRORS.log', 'w') as f:
for error in errors:
f.write(error+"\n")
raise ValueError(str(len(errors))+" errors detected. Please refer to ERRORS.log" )



def check_values(json_a,json_b,warnings:list,errors:list,exceptions:list,nested_key:None):
for key in json_b:
nested_key.append(key)

###Check if key is an exception
if key in exceptions:
nested_key.pop()
continue

###Check if key is missing from auto
if key not in json_a:
msg="'"+"/".join(nested_key)+"' not found in user generated JSON"
errors.append(msg)
nested_key.pop()
continue

###If key object is dictionary result in recursion
elif type(json_a[key])==dict:
check_values(json_a[key],json_b[key],warnings,errors,exceptions,nested_key)

###If key object is list :
elif type(json_a[key])==list:

###Check list lenght User vs Auto
if len(json_a[key])!=len(json_b[key]):
msg="Differing "+"/".join(nested_key)+" list length found in ' : user - "+str(len(json_a[key]))+" vs auto_gen - "+str(len(json_b[key]))
errors.append(msg)
nested_key.pop()
continue

###Check per ele entry in list
for entry in enumerate(json_b[key]):

###If key object ele is dictionary result in recursion
if type(entry[1])==dict:
check_values(json_a[key][entry[0]],json_b[key][entry[0]],warnings,errors,exceptions,nested_key)
else:
if json_a[key][entry[0]]!=json_b[key][entry[0]] and json_b[key][entry[0]] !=None:
msg="Differing values found when comparing'"+"/".join(nested_key)+"' : user - "+str(json_a[key][entry[0]])+" vs auto_gen - "+str(json_b[key][entry[0]])
errors.append(msg)
nested_key.pop(-1)
continue


if json_a[key]!=json_b[key] and json_b[key]!=None and type(json_a[key])!=dict and type(json_a[key])!=list:
msg="Differing values found when comparing '"+"/".join(nested_key)+"' : user - "+str(json_a[key])+" vs auto_gen - "+str(json_b[key])
errors.append(msg)
nested_key.pop()
continue

nested_key.pop()

return(warnings,errors)

if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions differentiate-json/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
docker {
enabled = true
runOptions = '-u \$(id -u):\$(id -g)'
}
35 changes: 35 additions & 0 deletions differentiate-json/pkg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"name": "differentiate-json",
"version": "0.1.0",
"description": "Compares user provided JSON against pipeline autogenerated JSON",
"main": "main.nf",
"deprecated": false,
"keywords": [
"bioinformatics"
],
"repository": {
"type": "git",
"url": "https://github.com/icgc-argo/argo-data-submission.git"
},
"container": {
"registries": [
{
"registry": "ghcr.io",
"type": "docker",
"org": "icgc-argo",
"default": true
}
]
},
"dependencies": [],
"devDependencies": [],
"contributors": [
{
"name": "Edmund Su",
"email": "edmund.su@oicr.on.ca"
}
],
"license": "GNU Affero General Public License v3",
"bugReport": "https://github.com/icgc-argo/argo-data-submission/issues",
"homepage": "https://github.com/icgc-argo/argo-data-submission#readme"
}
Loading

0 comments on commit a5829a4

Please sign in to comment.