Skip to content

Commit

Permalink
Update tests & Format code
Browse files Browse the repository at this point in the history
  • Loading branch information
zhx828 committed Nov 14, 2023
1 parent 7bff352 commit 5e2ef88
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 26 deletions.
46 changes: 30 additions & 16 deletions AnnotatorCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from urllib3 import Retry
from datetime import date


logging.basicConfig(level=logging.INFO)
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
Expand All @@ -27,7 +26,8 @@

API_REQUEST_RETRY_STATUS_FORCELIST = [429, 500, 502, 503, 504]

csv.field_size_limit(int(ct.c_ulong(-1).value // 2)) # Deal with overflow problem on Windows, https://stackoverflow.co/120m/questions/15063936/csv-error-field-larger-than-field-limit-131072
csv.field_size_limit(int(ct.c_ulong(
-1).value // 2)) # Deal with overflow problem on Windows, https://stackoverflow.co/120m/questions/15063936/csv-error-field-larger-than-field-limit-131072
sizeLimit = csv.field_size_limit()
csv.field_size_limit(sizeLimit) # for reading large files

Expand Down Expand Up @@ -185,7 +185,8 @@ def setsampleidsfileterfile(f):
HGVSG_HEADER = 'HGVSG'
# columns for copy number alteration
CNA_HEADERS = [ALTERATION_HEADER, 'COPY_NUMBER_ALTERATION', 'CNA', 'GISTIC']
HGVS_HEADERS = [ALTERATION_HEADER, HGVSP_SHORT_HEADER, HGVSP_HEADER, HGVSG_HEADER, 'AMINO_ACID_CHANGE', 'FUSION'] + CNA_HEADERS
HGVS_HEADERS = [ALTERATION_HEADER, HGVSP_SHORT_HEADER, HGVSP_HEADER, HGVSG_HEADER, 'AMINO_ACID_CHANGE',
'FUSION'] + CNA_HEADERS
SAMPLE_HEADERS = ['SAMPLE_ID', 'TUMOR_SAMPLE_BARCODE']
PROTEIN_START_HEADERS = ['PROTEIN_START']
PROTEIN_END_HEADERS = ['PROTEIN_END']
Expand Down Expand Up @@ -254,7 +255,8 @@ def getOncokbInfo():

def validate_oncokb_token():
if not oncokb_annotation_api_url.startswith(DEFAULT_ONCOKB_URL):
log.warning("OncoKB base url has been specified by the user that is different from the default www.oncokb.org. The token validation is skipped.")
log.warning(
"OncoKB base url has been specified by the user that is different from the default www.oncokb.org. The token validation is skipped.")
return None

if oncokb_api_bearer_token is None or not oncokb_api_bearer_token:
Expand Down Expand Up @@ -495,7 +497,8 @@ def append_headers(outf, newncols, include_descriptions, genomic_change_annotati

outf.write("\n")
return newncols



def processalterationevents(eventfile, outfile, previousoutfile, defaultCancerType, cancerTypeMap,
annotatehotspots, user_input_query_type, default_reference_genome, include_descriptions):
if annotatehotspots:
Expand Down Expand Up @@ -641,7 +644,8 @@ def process_alteration(maffilereader, outf, maf_headers, alteration_column_names

end = get_cell_content(row, iend)

if start is None and iproteinpos >= 0 and row[iproteinpos] != "" and row[iproteinpos] != "." and row[iproteinpos] != "-":
if start is None and iproteinpos >= 0 and row[iproteinpos] != "" and row[iproteinpos] != "." and \
row[iproteinpos] != "-":
poss = row[iproteinpos].split('/')[0].split('-')
try:
if len(poss) > 0:
Expand Down Expand Up @@ -973,7 +977,8 @@ def get_cna(cell_value, annotate_gain_loss=False):
return cna


def process_gistic_data(outf, gistic_data_file, defaultCancerType, cancerTypeMap, annotate_gain_loss, include_descriptions):
def process_gistic_data(outf, gistic_data_file, defaultCancerType, cancerTypeMap, annotate_gain_loss,
include_descriptions):
with open(gistic_data_file, DEFAULT_READ_FILE_MODE) as infile:
reader = csv.reader(infile, delimiter='\t')
headers = readheaders(reader)
Expand Down Expand Up @@ -1022,13 +1027,15 @@ def process_gistic_data(outf, gistic_data_file, defaultCancerType, cancerTypeMap
rows.append([sample, cancer_type, hugo, cna_type])
queries.append(CNAQuery(hugo, cna_type, cancer_type))

headers = ['SAMPLE_ID', 'CANCER_TYPE', 'HUGO_SYMBOL', 'ALTERATION'] + get_oncokb_annotation_column_headers(include_descriptions, False)
headers = ['SAMPLE_ID', 'CANCER_TYPE', 'HUGO_SYMBOL', 'ALTERATION'] + get_oncokb_annotation_column_headers(
include_descriptions, False)
outf.write('\t'.join(headers))
outf.write('\n')
return headers, rows, queries


def process_individual_cna_file(outf, cna_data_file, defaultCancerType, cancerTypeMap, annotate_gain_loss, include_descriptions):
def process_individual_cna_file(outf, cna_data_file, defaultCancerType, cancerTypeMap, annotate_gain_loss,
include_descriptions):
with open(cna_data_file, DEFAULT_READ_FILE_MODE) as infile:
reader = csv.reader(infile, delimiter='\t')
headers = readheaders(reader)
Expand Down Expand Up @@ -1069,7 +1076,8 @@ def process_individual_cna_file(outf, cna_data_file, defaultCancerType, cancerTy
return row_headers, rows, queries


def process_cna_data(cnafile, outfile, previousoutfile, defaultCancerType, cancerTypeMap, include_descriptions, annotate_gain_loss=False,
def process_cna_data(cnafile, outfile, previousoutfile, defaultCancerType, cancerTypeMap, include_descriptions,
annotate_gain_loss=False,
cna_format=CNA_FILE_FORMAT_GISTIC):
if os.path.isfile(previousoutfile):
cacheannotated(previousoutfile, defaultCancerType, cancerTypeMap)
Expand Down Expand Up @@ -1484,7 +1492,8 @@ def __init__(self, hugo, hgvs, cancertype, reference_genome=None, consequence=No
self.referenceGenome = reference_genome.value

def __repr__(self):
return ",".join([self.gene.hugoSymbol, self.alteration, self.tumorType, self.consequence, self.proteinStart, self.proteinEnd, self.referenceGenome])
return ",".join([self.gene.hugoSymbol, self.alteration, self.tumorType, self.consequence, self.proteinStart,
self.proteinEnd, self.referenceGenome])


class HGVSgQuery:
Expand Down Expand Up @@ -1601,7 +1610,8 @@ def pull_protein_change_info(queries, include_descriptions, annotate_hotspot):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation, include_descriptions, False, annotate_hotspot))
processed_annotation.append(
process_oncokb_annotation(query_annotation, include_descriptions, False, annotate_hotspot))
return processed_annotation


Expand Down Expand Up @@ -1629,7 +1639,8 @@ def pull_hgvsg_info(queries, include_descriptions, annotate_hotspot):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation, include_descriptions, True, annotate_hotspot))
processed_annotation.append(
process_oncokb_annotation(query_annotation, include_descriptions, True, annotate_hotspot))
return processed_annotation


Expand Down Expand Up @@ -1657,7 +1668,8 @@ def pull_genomic_change_info(queries, include_descriptions, annotate_hotspot):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation, include_descriptions, True, annotate_hotspot))
processed_annotation.append(
process_oncokb_annotation(query_annotation, include_descriptions, True, annotate_hotspot))
return processed_annotation


Expand Down Expand Up @@ -1687,7 +1699,8 @@ def pull_cna_info(queries, include_descriptions):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation, include_descriptions, False, annotate_hotspot=False))
processed_annotation.append(
process_oncokb_annotation(query_annotation, include_descriptions, False, annotate_hotspot=False))
return processed_annotation


Expand Down Expand Up @@ -1721,7 +1734,8 @@ def pull_structural_variant_info(queries, include_descriptions):

processed_annotation = []
for query_annotation in annotation:
processed_annotation.append(process_oncokb_annotation(query_annotation, include_descriptions, False, annotate_hotspot=False))
processed_annotation.append(
process_oncokb_annotation(query_annotation, include_descriptions, False, annotate_hotspot=False))
return processed_annotation


Expand Down
2 changes: 1 addition & 1 deletion flake8.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[flake8]
ignore = E501,W503
ignore = E501,W503,E126
24 changes: 15 additions & 9 deletions test_Annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,14 +314,20 @@ def test_fake_cna():
fake_gene_one_query_suite(annotations, True)


def check_brca2_s1882_without_cancertype(annotation):
assert len(annotation) == NUMBER_OF_ANNOTATION_COLUMNS
assert annotation[MUTATION_EFFECT_INDEX] == 'Likely Loss-of-function'
assert annotation[ONCOGENIC_INDEX] == 'Likely Oncogenic'
assert annotation[HIGHEST_LEVEL_INDEX] == 'LEVEL_1'
assert annotation[LEVEL_1_INDEX] == 'Olaparib,Olaparib+Bevacizumab,Rucaparib,Olaparib+Abiraterone+Prednisone,Niraparib,Olaparib+Abiraterone+Prednisolone,Talazoparib+Enzalutamide'
assert annotation[LEVEL_2_INDEX] == 'Olaparib,Rucaparib,Niraparib'
assert annotation[LEVEL_3A_INDEX] == 'Olaparib,Talazoparib'
def check_brca2_s1882_without_cancertype(annotation, genomic_query=False):
assert len(annotation) == NUMBER_OF_GC_ANNOTATION_COLUMNS if genomic_query else NUMBER_OF_ANNOTATION_COLUMNS
assert annotation[(
NUMBER_OF_ONCOKB_ANNOTATION_GC_COLUMNS + MUTATION_EFFECT_INDEX) if genomic_query else MUTATION_EFFECT_INDEX] == 'Likely Loss-of-function'
assert annotation[(
NUMBER_OF_ONCOKB_ANNOTATION_GC_COLUMNS + ONCOGENIC_INDEX) if genomic_query else ONCOGENIC_INDEX] == 'Likely Oncogenic'
assert annotation[(
NUMBER_OF_ONCOKB_ANNOTATION_GC_COLUMNS + HIGHEST_LEVEL_INDEX) if genomic_query else HIGHEST_LEVEL_INDEX] == 'LEVEL_1'
assert annotation[(
NUMBER_OF_ONCOKB_ANNOTATION_GC_COLUMNS + LEVEL_1_INDEX) if genomic_query else LEVEL_1_INDEX] == 'Olaparib,Olaparib+Bevacizumab,Rucaparib,Olaparib+Abiraterone+Prednisone,Niraparib,Olaparib+Abiraterone+Prednisolone,Talazoparib+Enzalutamide,Niraparib+Abiraterone Acetate+Prednisone'
assert annotation[(
NUMBER_OF_ONCOKB_ANNOTATION_GC_COLUMNS + LEVEL_2_INDEX) if genomic_query else LEVEL_2_INDEX] == 'Olaparib,Rucaparib,Niraparib'
assert annotation[(
NUMBER_OF_ONCOKB_ANNOTATION_GC_COLUMNS + LEVEL_3A_INDEX) if genomic_query else LEVEL_3A_INDEX] == 'Olaparib,Talazoparib'


@pytest.mark.skipif(ONCOKB_API_TOKEN in (None, ''), reason="oncokb api token required")
Expand All @@ -344,4 +350,4 @@ def test_duplicated_treatments():
annotations = pull_genomic_change_info(queries, False, False)
assert len(annotations) == 1

check_brca2_s1882_without_cancertype(annotations[0])
check_brca2_s1882_without_cancertype(annotations[0], True)

0 comments on commit 5e2ef88

Please sign in to comment.