Skip to content

Commit

Permalink
Merge branch 'SQANTI-reads-devel' of https://github.com/ConesaLab/SQA…
Browse files Browse the repository at this point in the history
…NTI3 into SQANTI-reads-devel

Merging
  • Loading branch information
carolinamonzo committed Dec 4, 2024
2 parents 116da80 + 8a317c6 commit 5892222
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 10 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ For detailed documentation, please visit [the SQANTI3 wiki](https://github.com/C

* [Running SQANTI3 rescue](https://github.com/ConesaLab/SQANTI3/wiki/Running-SQANTI3-rescue)

* [Running SQANTI-reads](https://github.com/ConesaLab/SQANTI3/wiki/Running-SQANTI%E2%80%90reads-(under-development))

* [Tutorial: running SQANTI3 on an example dataset](https://github.com/ConesaLab/SQANTI3/wiki/Tutorial:-running-SQANTI3-on-an-example-dataset)

Please, note that we are currently updating and expanding the wiki to provide as much information as possible and
Expand Down
33 changes: 27 additions & 6 deletions sqanti3_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,12 +884,33 @@ def calc_exon_overlap(query_exons, ref_exons):
return sum(q_bases.values())

def get_diff_tss_tts(trec, ref):
# Calculating differences between transcript start sites (TSS) and
# Trasncript termination site (TTS) of two transcripts
if trec.strand == '+':
diff_tss = trec.txStart - ref.txStart
diff_tts = trec.txEnd - ref.txEnd
# In positive (+) strand transcripts:
# TSS is calculated as reference start - transcript start
# TTS is calculated as transcript end - reference end
# This way, TSS < 0 means the transcript is shortened, and
# TSS > 0 means that transcript is elongated. Similarly, a
# TTS < 0 means that transcript is shortened, and a TTS > 0
# means that the transcript is elongated
diff_tss = ref.txStart - trec.txStart
diff_tts = trec.txEnd - ref.txEnd
else:
# In negative (-) strand transcripts:
# The transcripts in negative strands are loaded with trans.start = end
# and trans.end = start, to assure that trans.end > trans.start
# regardless of the transcript. TTS and TSS are calculated with
# the same formula, but taking the loading fact into account,
# The formulas are inverted
# TTS is ref start - transcript start
# TSS is transcript end - treference end
# Being consistent in that TSS < 0 means the transcript is shortened, and
# TSS > 0 means that transcript is elongated. Similarly, a
# TTS < 0 means that transcript is shortened, and a TTS > 0
# means that the transcript is elongated
diff_tts = ref.txStart - trec.txStart
diff_tss = ref.txEnd - trec.txEnd
diff_tss = trec.txEnd - ref.txEnd
return diff_tss, diff_tts


Expand All @@ -899,7 +920,7 @@ def get_gene_diff_tss_tts(isoform_hit):
nearest_start_diff, nearest_end_diff = float('inf'), float('inf')
for ref_gene in isoform_hit.genes:
for x in start_ends_by_gene[ref_gene]['begin']:
d = trec.txStart - x
d = x - trec.txStart
if abs(d) < abs(nearest_start_diff):
nearest_start_diff = d
for x in start_ends_by_gene[ref_gene]['end']:
Expand All @@ -911,8 +932,8 @@ def get_gene_diff_tss_tts(isoform_hit):
isoform_hit.tss_gene_diff = nearest_start_diff if nearest_start_diff!=float('inf') else 'NA'
isoform_hit.tts_gene_diff = nearest_end_diff if nearest_end_diff!=float('inf') else 'NA'
else:
isoform_hit.tss_gene_diff = -nearest_end_diff if nearest_start_diff!=float('inf') else 'NA'
isoform_hit.tts_gene_diff = -nearest_start_diff if nearest_end_diff!=float('inf') else 'NA'
isoform_hit.tss_gene_diff = nearest_end_diff if nearest_start_diff!=float('inf') else 'NA'
isoform_hit.tts_gene_diff = nearest_start_diff if nearest_end_diff!=float('inf') else 'NA'

def categorize_incomplete_matches(trec, ref):
"""
Expand Down
2 changes: 1 addition & 1 deletion sqanti3_wrapper.conf
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,4 @@ rescue_rules_json_file="${json_for_rules}"

rescue_ml_reference_genome=${reference_fasta}
rescue_ml_reference_gtf=${reference_gtf}
rescue_ml_threshold=${threshold}
rescue_ml_threshold=${threshold}
6 changes: 3 additions & 3 deletions sqanti3_wrapper.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /bin/bash -xe
filter_corrected_gtf#! /bin/bash -xe

# Author: Fabián Robledo
# Email: fabian.robledo@csic.es
Expand Down Expand Up @@ -155,7 +155,7 @@ function main () {
if [ -z ${filter_mode} ] || [ ${filter_mode} == "ml" ] || [ $filter_mode == "both" ];
then
${sqanti3_filter} ml ${filter_ml_ouput_folder} \
${filter_ml_prefix} ${filter_isoforms} ${filter_isoannotgff3} \
${filter_ml_prefix} ${filter_corrected_gtf} ${filter_isoforms} ${filter_isoannotgff3} \
${filter_sam} ${filter_faa} ${monoexonic} ${filter_monoexonic} ${filter_ml_percent_training} ${filter_ml_TP} \
${filter_ml_TN} ${filter_ml_threshold} \
${filter_ml_max_class_size} ${filter_ml_intermediate_files} ${filter_ml_intrapriming} \
Expand Down Expand Up @@ -207,4 +207,4 @@ function main () {
source "$1"

# Execuing the main function
main
main

0 comments on commit 5892222

Please sign in to comment.