Skip to content
This repository has been archived by the owner on Dec 18, 2023. It is now read-only.

Commit

Permalink
Merge pull request #85 from thehyve/test-data
Browse files Browse the repository at this point in the history
Fix pipeline for acceptance environment
  • Loading branch information
gijskant authored Oct 17, 2019
2 parents 1ff02be + f365446 commit 1e6687c
Show file tree
Hide file tree
Showing 62 changed files with 753 additions and 23 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ As sha1 hashes 40 characters long the rest of the file gets ignored:
`1625be750dab24057c4c82d62d27298236ebb04c diagnosis.txt`

For more information, see the [CSR data model](https://github.com/thehyve/python_csr2transmart#data-model) description
and an example of [input data files](./test_data/dropzone/E2E_TEST_DATA).
and an example of [input data files](test_data/E2E_TEST_DATA/dropzone).

## Usage

Expand Down
4 changes: 3 additions & 1 deletion luigi-pipeline/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class GitCommit(BaseTask):
def run(self):
with git_lock:
repo.index.add([self.directory_to_add])
if not repo.index.diff('HEAD'):
if repo.is_dirty():
repo.index.commit(self.commit_message)
logger.info('Commit changes in {} directory.'.format(self.directory_to_add))
else:
Expand Down Expand Up @@ -147,6 +147,8 @@ class CbioportalDataTransformation(BaseTask):
def run(self):
clinical_input_file = os.path.join(config.working_dir)
ngs_dir = os.path.join(config.input_data_dir, 'NGS')
if not os.path.isdir(ngs_dir):
ngs_dir = None
csr2cbioportal.csr2cbioportal(input_dir=clinical_input_file,
ngs_dir=ngs_dir,
output_dir=config.cbioportal_staging_dir)
Expand Down
8 changes: 4 additions & 4 deletions luigi.cfg-sample
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ transformation_config_dir= <path_to>/config
load_logs_dir_name=load_logs

# TranSMART study settings
transmart_copy_jar = <path_to>/transmart-core/transmart-copy/build/libs/transmart-copy-17.2.jar
transmart_copy_jar = <path_to>/transmart-core/transmart-copy/build/libs/transmart-copy-17.2.5.jar
study_id = CSR_STUDY
top_node = \Central Subject Registry\

Expand Down Expand Up @@ -63,15 +63,15 @@ offline_token = <transmart-user-offline-token>


[CbioportalDataValidation]
docker_image = cbioportal-image
docker_image = cbioportal-hg38:1.10.2


[CbioportalDataLoading]
docker_image = cbioportal-image
docker_image = cbioportal-hg38:1.10.2
server_name = pmc-cbioportal-test


[E2eTest]
PGADMINUSER = postgres
PGADMINPASSWORD = password
parent_drop_dir = <path_to>/pmc-conversion/test_data/dropzone/E2E_TEST_DATA
parent_drop_dir = <path_to>/pmc-conversion/test_data/E2E_TEST_DATA/dropzone
3 changes: 2 additions & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
csr2transmart==0.0.19
csr2transmart==0.0.23
docutils==0.14
lockfile==0.12.2
luigi==2.8.0
Expand All @@ -11,3 +11,4 @@ numpy==1.17.1
chardet==3.0.4
psycopg2-binary==2.7.4
click>=7.0,<8.0
requests>= 2.22.0,<2.23.0
38 changes: 22 additions & 16 deletions scripts/git_commons.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import os
import logging
import git
from git import Repo, InvalidGitRepositoryError

logger = logging.getLogger(__name__)


def get_git_repo(repo_dir):
def get_git_repo(repo_dir: str) -> Repo:
"""
Returns the git repository used for VCS of source and transformed data files. As well as load logs.
If it does not exist, it will create one.
Expand All @@ -14,26 +14,32 @@ def get_git_repo(repo_dir):
"""
if not os.path.exists(repo_dir):
return init_git_repo(repo_dir)

try:
return git.Repo(repo_dir)
except git.InvalidGitRepositoryError:
repo = Repo(repo_dir)
init_gitignore(repo)
return repo
except InvalidGitRepositoryError:
return init_git_repo(repo_dir)


def init_git_repo(repo_dir):
os.makedirs(repo_dir, exist_ok=True)
logger.info('Initializing git repository: {}'.format(repo_dir))
r = git.Repo.init(os.path.realpath(repo_dir))
ignore_list = ['.done-*', '.DS_Store']
logger.debug('Git ignore list: {}'.format(ignore_list))

gitignore = os.path.realpath(os.path.join(repo_dir, '.gitignore'))
def init_gitignore(repo: Repo):
gitignore = os.path.realpath(os.path.join(repo.working_tree_dir, '.gitignore'))
if os.path.isfile(gitignore):
return

ignore_list = ['.done-*', '.DS_Store']
logger.debug('Writing git ignore file')
with open(gitignore, 'w') as f:
f.write('\n'.join(ignore_list))
f.write('\n')

repo.index.add([gitignore])
repo.index.commit('Initial commit.')


r.index.add([gitignore])
r.index.commit('Initial commit.')
return r
def init_git_repo(repo_dir) -> Repo:
os.makedirs(repo_dir, exist_ok=True)
logger.info('Initializing git repository: {}'.format(repo_dir))
repo = Repo.init(os.path.realpath(repo_dir))
init_gitignore(repo)
return repo
File renamed without changes.

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test_data/test_logic/config/cbioportal_db_info/genes.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

162 changes: 162 additions & 0 deletions test_data/test_logic/config/ontology_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
{
"nodes": [
{
"name": "01. Patient information",
"children": [
{
"name": "03. Gender",
"concept_code": "Individual.gender"
},
{
"name": "01. Date of birth",
"concept_code": "Individual.birth_date"
},
{
"name": "02. Taxonomy",
"concept_code": "Individual.taxonomy"
},
{
"name": "04. Date of death",
"concept_code": "Individual.death_date"
},
{
"name": "Informed_consent",
"children": [
{
"name": "01. Informed consent type",
"concept_code": "Individual.ic_type"
},
{
"name": "Informed consent version",
"concept_code": "Individual.ic_version"
},
{
"name": "02. Date informed Consent given",
"concept_code": "Individual.ic_given_date"
},
{
"name": "03. Date informed consent withdrawn",
"concept_code": "Individual.ic_withdrawn_date"
},
{
"name": "04. Informed consent material",
"concept_code": "Individual.ic_material"
},
{
"name": "05. Informed consent data",
"concept_code": "Individual.ic_data"
},
{
"name": "06. Informed consent linking external database",
"concept_code": "Individual.ic_linking_ext"
},
{
"name": "07. Report hereditary susceptibility",
"concept_code": "Individual.report_her_susc"
}
]
}
]
},
{
"name": "02. Diagnosis information",
"children": [
{
"name": "02. Tumor type",
"concept_code": "Diagnosis.tumor_type"
},
{
"name": "03. Topography",
"concept_code": "Diagnosis.topography"
},
{
"name": "Treatment",
"concept_code": "Diagnosis.treatment_protocol"
},
{
"name": "04. Tumor stage",
"concept_code": "Diagnosis.tumor_stage"
},
{
"name": "01. Date of diagnosis",
"concept_code": "Diagnosis.diagnosis_date"
},
{
"name": "05. Center of treatment",
"concept_code": "Diagnosis.center_treatment"
}
]
},
{
"name": "03. Biosource information",
"children": [
{
"name": "06. Biosource dedicated for specific study",
"concept_code": "Biosource.biosource_dedicated"
},
{
"name": "01. Biosource parent",
"concept_code": "Biosource.src_biosource_id"
},
{
"name": "03. Tissue",
"concept_code": "Biosource.tissue"
},
{
"name": "02. Date of biosource",
"concept_code": "Biosource.biosource_date"
},
{
"name": "04. Disease status",
"concept_code": "Biosource.disease_status"
},
{
"name": "05. Tumor percentage",
"concept_code": "Biosource.tumor_percentage"
}
]
},
{
"name": "04. Biomaterial information",
"children": [
{
"name": "01. Biomaterial parent",
"concept_code": "Biomaterial.src_biomaterial_id"
},
{
"name": "02. Date of biomaterial",
"concept_code": "Biomaterial.biomaterial_date"
},
{
"name": "03. Biomaterial type",
"concept_code": "Biomaterial.type"
}
]
},
{
"name": "05. Study information",
"children": [
{
"name": "01. Study ID",
"concept_code": "Study.study_id"
},
{
"name": "02. Study acronym",
"concept_code": "Study.acronym"
},
{
"name": "03. Study title",
"concept_code": "Study.title"
},
{
"name": "Study datadictionary",
"concept_code": "Study.datadictionary"
},
{
"name": "04. Individual Study ID",
"concept_code": "IndividualStudy.individual_study_id"
}
]
}
]
}
Loading

0 comments on commit 1e6687c

Please sign in to comment.