Skip to content

Commit

Permalink
unit test lint fix
Browse files Browse the repository at this point in the history
  • Loading branch information
krishnaswamypradeep committed Oct 3, 2024
1 parent 0cc4864 commit a33a8de
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 87 deletions.
87 changes: 51 additions & 36 deletions scripts/biomedical/NCBI_Gene/scripts/format_ncbi_gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,37 +321,54 @@
}

GENE_QUALIFIER_DICT = {
'NOT acts_upstream_of': 'dcs:GOTermQualifierNotActsUpstreamOf',
'NOT acts_upstream_of':
'dcs:GOTermQualifierNotActsUpstreamOf',
'NOT acts_upstream_of_or_within':
'dcs:GOTermQualifierNotActsUpstreamOfOrWithin',
'dcs:GOTermQualifierNotActsUpstreamOfOrWithin',
'NOT acts_upstream_of_or_within_negative_effect':
'dcs:GOTermQualifierNotActsUpstreamOfOrWithinNegativeEffect',
'dcs:GOTermQualifierNotActsUpstreamOfOrWithinNegativeEffect',
'NOT acts_upstream_of_or_within_positive_effect':
'dcs:GOTermQualifierNotActsUpstreamOfOrWithinPositiveEffect',
'NOT colocalizes_with': 'dcs:GOTermQualifierNotColocalizesWith',
'NOT contributes_to': 'dcs:GOTermQualifierNotContributesTo',
'NOT enables': 'dcs:GOTermQualifierNotEnables',
'NOT involved_in': 'dcs:GOTermQualifierNotInvolvedIn',
'NOT is_active_in': 'dcs:GOTermQualifierNotIsActiveIn',
'NOT located_in': 'dcs:GOTermQualifierNotLocatedIn',
'NOT part_of': 'dcs:GOTermQualifierNotPartOf',
'acts_upstream_of': 'dcs:GOTermQualifierActsUpstreamOf',
'dcs:GOTermQualifierNotActsUpstreamOfOrWithinPositiveEffect',
'NOT colocalizes_with':
'dcs:GOTermQualifierNotColocalizesWith',
'NOT contributes_to':
'dcs:GOTermQualifierNotContributesTo',
'NOT enables':
'dcs:GOTermQualifierNotEnables',
'NOT involved_in':
'dcs:GOTermQualifierNotInvolvedIn',
'NOT is_active_in':
'dcs:GOTermQualifierNotIsActiveIn',
'NOT located_in':
'dcs:GOTermQualifierNotLocatedIn',
'NOT part_of':
'dcs:GOTermQualifierNotPartOf',
'acts_upstream_of':
'dcs:GOTermQualifierActsUpstreamOf',
'acts_upstream_of_negative_effect':
'dcs:GOTermQualifierActsUpstreamOfNegativeEffect',
'acts_upstream_of_or_within': 'dcs:GOTermQualifierActsUpstreamOfOrWithin',
'dcs:GOTermQualifierActsUpstreamOfNegativeEffect',
'acts_upstream_of_or_within':
'dcs:GOTermQualifierActsUpstreamOfOrWithin',
'acts_upstream_of_or_within_negative_effect':
'dcs:GOTermQualifierActsUpstreamOfOrWithinNegativeEffect',
'dcs:GOTermQualifierActsUpstreamOfOrWithinNegativeEffect',
'acts_upstream_of_or_within_positive_effect':
'dcs:GOTermQualifierActsUpstreamOfOrWithinPositiveEffect',
'dcs:GOTermQualifierActsUpstreamOfOrWithinPositiveEffect',
'acts_upstream_of_positive_effect':
'dcs:GOTermQualifierActsUpstreamOfPositiveEffect',
'colocalizes_with': 'dcs:GOTermQualifierColocalizesWith',
'contributes_to': 'dcs:GOTermQualifierContributesTo',
'enables': 'dcs:GOTermQualifierEnables',
'involved_in': 'dcs:GOTermQualifierInvolvedIn',
'is_active_in': 'dcs:GOTermQualifierIsActiveIn',
'located_in': 'dcs:GOTermQualifierLocatedIn',
'part_of': 'dcs:GOTermQualifierPartOf'
'dcs:GOTermQualifierActsUpstreamOfPositiveEffect',
'colocalizes_with':
'dcs:GOTermQualifierColocalizesWith',
'contributes_to':
'dcs:GOTermQualifierContributesTo',
'enables':
'dcs:GOTermQualifierEnables',
'involved_in':
'dcs:GOTermQualifierInvolvedIn',
'is_active_in':
'dcs:GOTermQualifierIsActiveIn',
'located_in':
'dcs:GOTermQualifierLocatedIn',
'part_of':
'dcs:GOTermQualifierPartOf'
}

GENE_CATEGORY_DICT = {
Expand Down Expand Up @@ -522,8 +539,7 @@ def process_gene_info(self, file_to_process: str, return_dict) -> None:
feature_type_entries = set()
unique_dbXrefs_list = []
unique_dbXrefs = set()
input_file = path_join(SOURCE_FILE_PATH + '/gene_info',
file_to_process)
input_file = path_join(SOURCE_FILE_PATH + '/gene_info', file_to_process)
with open(
path_join(OUTPUT_FILE_PATH + '/gene_info',
file_to_process.replace('txt', 'csv')),
Expand Down Expand Up @@ -696,9 +712,7 @@ def save_gene_info_files(self, mcf_file_path: str):
for feature_type in FEATURE_TYPE_ENTRIES:
ftype = json.loads(feature_type)
mfc_entry = NCBI_GENE_SCHEMA_EMUN_MCF.format(
type=ftype['type'],
item=ftype['entry'],
name=ftype['name'])
type=ftype['type'], item=ftype['entry'], name=ftype['name'])
file.write(mfc_entry)

def _get_feature_type_list(self, value):
Expand Down Expand Up @@ -787,7 +801,7 @@ def process_csv_file(self) -> None:
else:
Gene_PubMedID[input_row[1]] = {
'dcid':
f"dcid:{GENE_ID_DCID_MAPPING[input_row[1]]}",
f"dcid:{GENE_ID_DCID_MAPPING[input_row[1]]}",
'PubMedID': [input_row[2]]
}
#Gene_PubMedID[input_row[1]].append()
Expand Down Expand Up @@ -937,7 +951,7 @@ def process_csv_file(self) -> None:
if input_row[4] in GENE_ID_DCID_MAPPING:
Gene_orthologs[input_row[1]] = {
'dcid':
f"dcid:{GENE_ID_DCID_MAPPING[input_row[1]]}",
f"dcid:{GENE_ID_DCID_MAPPING[input_row[1]]}",
'ortholog': [
f"dcid:{GENE_ID_DCID_MAPPING[input_row[4]]}"
]
Expand Down Expand Up @@ -1019,7 +1033,7 @@ def process_csv_file(self) -> None:
try:
Gene_group[input_row[1]] = {
'GeneID':
f'dcid:{GENE_ID_DCID_MAPPING[input_row[1]]}',
f'dcid:{GENE_ID_DCID_MAPPING[input_row[1]]}',
column_name: [
f'dcid:{GENE_ID_DCID_MAPPING[input_row[4]]}'
]
Expand Down Expand Up @@ -1119,7 +1133,8 @@ def parse_gene_mim2gene_row(self, dcid, input_row):
if len(input_row[3]) > 1:
Source_lst = [
f'{GENE_OMIM_SOURCE_DICT.get(x.strip(), x.strip())}'
for x in input_row[3].strip().split(';') if len(x) > 1
for x in input_row[3].strip().split(';')
if len(x) > 1
]
row['Source'] = ",".join(Source_lst)

Expand Down Expand Up @@ -1562,7 +1577,7 @@ def main(_):
]
for neighbors_file in gene_Neighbors_shard_files:
neighbors_proc = Process(target=GeneNeighbors().process_csv_file,
args=(neighbors_file, ))
args=(neighbors_file,))
procs_Neighbors.append(neighbors_proc)
neighbors_proc.start()

Expand All @@ -1575,7 +1590,7 @@ def main(_):
f for f in listdir(join(SOURCE_FILE_PATH, 'gene2go'))
]
for go_file in gene_Go_shard_files:
go_proc = Process(target=Gene2Go().process_csv_file, args=(go_file, ))
go_proc = Process(target=Gene2Go().process_csv_file, args=(go_file,))
procs_Go.append(go_proc)
go_proc.start()

Expand All @@ -1589,7 +1604,7 @@ def main(_):
]
for accession_file in gene_Accession_shard_files:
accession_proc = Process(target=Gene2Accession().process_csv_file,
args=(accession_file, ))
args=(accession_file,))
procs_Accession.append(accession_proc)
accession_proc.start()

Expand Down
131 changes: 80 additions & 51 deletions scripts/biomedical/NCBI_Gene/scripts/format_ncbi_gene_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,19 @@
Author: Pradeep Kumar Krishnaswamy
Date: 20-Sep-2024
"""

import os
import sys
import unittest
from .format_ncbi_gene import *

_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPT_DIR)

#import format_ncbi_gene as gene

from .format_ncbi_gene import GeneInfo, Gene2Accession, Gene2Go, GeneNeighbors, GeneMim2gene, GeneRifs_Basic, Gene2Ensembl


class NCBIGeneTest(unittest.TestCase):
class NcbiGeneTest(unittest.TestCase):

def test_check_gene_info_parser(self):
""" Unit test to parse gene_info row
Expand All @@ -30,36 +37,51 @@ def test_check_gene_info_parser(self):
unique_dbXrefs_list = []
unique_dbXrefs = set()
input_row = [
'2010893', '33370007', 'rbcL', 'CGW41_pgp092', 'Lo_mil1Pt0025',
'-', '-', '-',
'2010893', '33370007', 'rbcL', 'CGW41_pgp092', 'Lo_mil1Pt0025', '-',
'-', '-',
'ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit',
'protein-coding', '-', '-', '-', '-', '20230531', '-'
]
expected_result = {
'taxID': 'dcid:bio/LobeliaMildbraedii',
'GeneID': '33370007',
'dcid': 'bio/33370007',
'Symbol': '"rbcL"',
'synonym': 'Lo_mil1Pt0025',
'chromosome': '',
'map_location': '',
'taxID':
'dcid:bio/LobeliaMildbraedii',
'GeneID':
'33370007',
'dcid':
'bio/33370007',
'Symbol':
'"rbcL"',
'synonym':
'Lo_mil1Pt0025',
'chromosome':
'',
'map_location':
'',
'description':
'"ribulose-15-bisphosphate carboxylaseoxygenase large subunit"',
'type_of_gene': '',
'Full_name_from_nomenclature_authority': '',
'Nomenclature_status': '',
'Other_designations': '',
'Modification_date': '2023-05-31',
'regulatory': '',
'misc_feature': '',
'misc_recomb': ''
'"ribulose-15-bisphosphate carboxylaseoxygenase large subunit"',
'type_of_gene':
'',
'Full_name_from_nomenclature_authority':
'',
'Nomenclature_status':
'',
'Other_designations':
'',
'Modification_date':
'2023-05-31',
'regulatory':
'',
'misc_feature':
'',
'misc_recomb':
''
}

row = GeneInfo().parse_gene_info_row('dcid:bio/LobeliaMildbraedii',
gene_id_dcid_mapping,
feature_type_entries,
unique_dbXrefs_list,
unique_dbXrefs, input_row)
row = gene.GeneInfo().parse_gene_info_row('dcid:bio/LobeliaMildbraedii',
gene_id_dcid_mapping,
feature_type_entries,
unique_dbXrefs_list,
unique_dbXrefs, input_row)
self.assertDictEqual(row, expected_result)

def test_check_gene_neighbors_parser(self):
Expand All @@ -83,8 +105,8 @@ def test_check_gene_neighbors_parser(self):
'chromosome': 'chr9.part0',
'assembly': '"Reference PAN1.0 Primary Assembly"'
}
row = GeneNeighbors().parse_gene_neighbors_row('bio/122811710',
input_row)
row = gene.GeneNeighbors().parse_gene_neighbors_row(
'bio/122811710', input_row)
self.assertDictEqual(row, expected_result)

def test_check_gene_mim2gene_parser(self):
Expand All @@ -102,30 +124,37 @@ def test_check_gene_mim2gene_parser(self):
'dcid': 'bio/ncbi_100652748_omim_620758',
'MedGenCUI_dcid': ''
}
row = GeneMim2gene().parse_gene_mim2gene_row('bio/ncbi_100652748',
input_row)
row = gene.GeneMim2gene().parse_gene_mim2gene_row(
'bio/ncbi_100652748', input_row)
self.assertDictEqual(row, expected_result)

def test_check_gene_gene2go_parser(self):
""" Unit test to parse gene_gene3go row
"""
input_row = [
'75485', '128904962', 'GO:0048025', 'IEA', 'involved_in',
'negative regulation of mRNA splicing, via spliceosome',
'30032202', 'Process'
'negative regulation of mRNA splicing, via spliceosome', '30032202',
'Process'
]
expected_result = {
'GeneID': 'dcid:bio/128904962',
'dcid': 'bio/GO_0048025',
'GO_ID': 'GO:0048025',
'Evidence': 'dcs:GOTermEvidenceCodeElectronicAnnotation',
'Qualifier': 'dcs:GOTermQualifierInvolvedIn',
'GeneID':
'dcid:bio/128904962',
'dcid':
'bio/GO_0048025',
'GO_ID':
'GO:0048025',
'Evidence':
'dcs:GOTermEvidenceCodeElectronicAnnotation',
'Qualifier':
'dcs:GOTermQualifierInvolvedIn',
'GO_term':
'"negative regulation of mRNA splicing, via spliceosome"',
'PubMed': '"30032202"',
'Category': 'dcs:GeneOntologyCategoryBiologicalProcess'
'"negative regulation of mRNA splicing, via spliceosome"',
'PubMed':
'"30032202"',
'Category':
'dcs:GeneOntologyCategoryBiologicalProcess'
}
row = Gene2Go().parse_gene_gene2go_row('bio/128904962', input_row)
row = gene.Gene2Go().parse_gene_gene2go_row('bio/128904962', input_row)
self.assertDictEqual(row, expected_result)

def test_check_gene_gene2ensembl_parser(self):
Expand All @@ -145,7 +174,7 @@ def test_check_gene_gene2ensembl_parser(self):
'Ensembl_protein_identifier': ''
}

row = Gene2Ensembl().parse_gene_gene2ensembl_row(
row = gene.Gene2Ensembl().parse_gene_gene2ensembl_row(
'bio/ncbi_113218477', input_row)
self.assertDictEqual(row, expected_result)

Expand All @@ -158,21 +187,21 @@ def test_check_gene_rifs_basic_parser(self):
]
expected_result = {
'GeneID':
'dcid:bio/ncbi_3188',
'dcid:bio/ncbi_3188',
'dcid':
'bio/ncbi_3188_16171461',
'bio/ncbi_3188_16171461',
'name':
'"ncbi 3188 PubMed 16171461 Reference Into Function"',
'"ncbi 3188 PubMed 16171461 Reference Into Function"',
'dateModified':
'2010-01-21',
'2010-01-21',
'pubMedId':
'16171461',
'16171461',
'GeneRifText':
'"the relative levels of hnRNP F and H2 in cells, as well as the target sequences in the downstream GRS on pre-mRNA, influence gene expression"'
'"the relative levels of hnRNP F and H2 in cells, as well as the target sequences in the downstream GRS on pre-mRNA, influence gene expression"'
}

row = GeneRifs_Basic().parse_gene_generifs_row('bio/ncbi_3188',
input_row)
row = gene.GeneRifs_Basic().parse_gene_generifs_row(
'bio/ncbi_3188', input_row)
self.assertDictEqual(row, expected_result)

def test_check_gene_gene2accession_parser(self):
Expand Down Expand Up @@ -203,7 +232,7 @@ def test_check_gene_gene2accession_parser(self):
'mature_peptide_gi': ''
}

row = Gene2Accession().parse_gene_gene2accession_row(
row = gene.Gene2Accession().parse_gene_gene2accession_row(
'bio/ncbi_113218477', input_row)
self.assertDictEqual(row, expected_result)

Expand Down

0 comments on commit a33a8de

Please sign in to comment.