Skip to content

Commit

Permalink
Merge pull request #358 from ConesaLab/SQANTI-reads-devel
Browse files Browse the repository at this point in the history
Fixing stuff for paper revision
  • Loading branch information
carolinamonzo authored Nov 26, 2024
2 parents 705ed94 + 2b77897 commit 5c6544a
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 12 deletions.
3 changes: 2 additions & 1 deletion sqanti_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def main():
parser.add_argument('-fl','--factor_level', type=str, dest="FACTORLVL", required=False, help='Factor level to evaluate for underannotation', default = None)
parser.add_argument('--all_tables', dest="ALLTABLES", action='store_true', help='Export all output tables. Default tables are gene counts, ujc counts, length_summary, cv and and underannotated gene tables')
parser.add_argument('--pca_tables', dest="PCATABLES", action='store_true', help='Export table for making PCA plots')
parser.add_argument('--report', type=str, choices = ["pdf", "html", "both"], default = 'pdf', help = "\t\tDefault: pdf")
parser.add_argument('--verbose', help = 'If verbose is run, it will print all steps, by default it is FALSE', action="store_true")
parser.add_argument('-v', '--version', help="Display program version number.", action='version', version='sqanti-reads '+str(__version__))

Expand All @@ -200,7 +201,7 @@ def main():
# Run plotting script
plotting_script_path = os.path.join(os.path.dirname(__file__), 'utilities', 'sqanti_reads_tables_and_plots_02ndk.py')

cmd_plotting = f"python {plotting_script_path} --ref {args.annotation} --design {args.inDESIGN} -o {args.dir} --gene-expression {args.ANNOTEXP} --jxn-expression {args.JXNEXP} --perc-coverage {args.PERCCOV} --perc-junctions {args.PERCMAXJXN}"
cmd_plotting = f"python {plotting_script_path} --ref {args.annotation} --design {args.inDESIGN} -o {args.dir} --gene-expression {args.ANNOTEXP} --jxn-expression {args.JXNEXP} --perc-coverage {args.PERCCOV} --perc-junctions {args.PERCMAXJXN} --report {args.report}"
if args.inFACTOR:
cmd_plotting = cmd_plotting + f" --factor {args.inFACTOR}"
if args.FACTORLVL != None:
Expand Down
83 changes: 72 additions & 11 deletions utilities/sqanti_reads_tables_and_plots_02ndk.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import linkage, leaves_list
from matplotlib.ticker import FixedLocator
from pdf2image import convert_from_path
import base64
from jinja2 import Template
import io

## Update options
def getOptions():
Expand All @@ -37,6 +41,7 @@ def getOptions():
parser.add_argument('-fl','--factor-level', type=str, dest="FACTORLVL", required=False, help='Factor level to evaluate for underannotation', default = None)
parser.add_argument('--all-tables', dest="ALLTABLES", action='store_true', help='Export all output tables. Default tables are gene counts, ujc counts, length_summary, cv and cand underannotated gene tables')
parser.add_argument('--pca-tables', dest="PCATABLES", action='store_true', help='Export table for making PCA plots')
parser.add_argument('--report', type=str, choices = ["pdf", "html", "both"], default = 'pdf', help = "\t\tDefault: pdf")

args = parser.parse_args()
return args
Expand Down Expand Up @@ -754,7 +759,7 @@ def categorize_gene(group):
#plt.title('Number of Genes in each annotation category')
plt.xlabel('Gene Category')
plt.ylabel('Number of Genes')
plt.xticks(rotation=45, ha="right")
plt.xticks(rotation=90, ha="right")
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
# Add the counts on top of each bar
Expand Down Expand Up @@ -1281,6 +1286,8 @@ def plot_side_by_side_bars(*args, **kwargs):
"mono_in_multi": '#aec6cf'
}

cat_order = ["FSM", "ISM", "NIC", "NNC", "AS", "FUS", "GENIC", "GI", "INTER"]

#Define sample color palette

unique_sampleIDs = all_gene_percs_long_DF['sampleID'].unique()
Expand Down Expand Up @@ -1313,7 +1320,8 @@ def plot_side_by_side_bars(*args, **kwargs):
alpha=0.6,
legend=True,
height=8,
aspect=0.7
aspect=0.7,
order = cat_order
)
g.set_xticklabels(rotation=0)
g.set_axis_labels("Structural Category", "Percentages")
Expand Down Expand Up @@ -1346,7 +1354,8 @@ def plot_side_by_side_bars(*args, **kwargs):
alpha=0.6,
legend=True,
height=8,
aspect=0.7
aspect=0.7,
order = cat_order
)
g.set_xticklabels(rotation=0)
g.set_axis_labels("Structural Category", "Percentages")
Expand Down Expand Up @@ -1964,6 +1973,9 @@ def plot_pdf(out_path, all_gene_percs_long_DF, annot_gene_percs_long_DF, all_gen
"mono_in_multi": '#aec6cf'
}

cat_order = ["FSM", "ISM", "NIC", "NNC", "AS", "FUS", "GENIC", "GI", "INTER"]
cat_order_stacked = ["INTER", "GI", "GENIC", "FUS", "AS", "NNC", "NIC", "ISM", "FSM"]

#Define sample color palette

unique_sampleIDs = all_gene_percs_long_DF['sampleID'].unique()
Expand All @@ -1984,8 +1996,8 @@ def plot_pdf(out_path, all_gene_percs_long_DF, annot_gene_percs_long_DF, all_gen
palette = sample_color_palette
plt.figure(figsize=(10, 7.5))
sns.stripplot(x='Category', y='Percentage', data=all_gene_percs_long_DF, jitter=0,
alpha=0.6, hue='sampleID', dodge=False, **{'linewidth': 0, 's': 20}, palette = palette)
plt.xticks(rotation=45)
alpha=0.6, hue='sampleID', dodge=False, **{'linewidth': 0, 's': 20}, palette = palette, order = cat_order)
plt.xticks(rotation=90)
plt.ylabel('Percentage')
plt.title(' Percent reads in each structural category - All Genes')
plt.legend(title='SampleID', bbox_to_anchor=(1.05, 1), loc='upper left')
Expand All @@ -1996,17 +2008,18 @@ def plot_pdf(out_path, all_gene_percs_long_DF, annot_gene_percs_long_DF, all_gen

plt.figure(figsize=(10, 7.5))
sns.stripplot(x='Category', y='Percentage', data=annot_gene_percs_long_DF, jitter=0,
alpha=0.6, hue='sampleID', dodge=False, **{'linewidth': 0, 's': 20}, palette = palette)
plt.xticks(rotation=45)
alpha=0.6, hue='sampleID', dodge=False, **{'linewidth': 0, 's': 20}, palette = palette, order = cat_order)
plt.xticks(rotation=90)
plt.ylabel('Percentage')
plt.title(' Percent reads in each structural category - ANnotated Gnes')
plt.title(' Percent reads in each structural category - Annotated Genes')
plt.legend(title='SampleID', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
matplotlib.rcParams['pdf.fonttype'] = 42
pdf.savefig()
plt.close()

categories = [col for col in all_gene_percs_pivot_DF.columns if col not in ['sampleID', exp_factor]]
categories = [cat for cat in cat_order_stacked if cat in categories]

cols = ['sampleID'] + categories
all_gene_percs_pivot_DF = all_gene_percs_pivot_DF[cols]
Expand All @@ -2032,7 +2045,8 @@ def plot_pdf(out_path, all_gene_percs_long_DF, annot_gene_percs_long_DF, all_gen


categories = [cat for cat in ['FSM', 'ISM', 'NIC', 'NNC', 'GI', 'GENIC'] if cat in annot_gene_percs_pivot_DF.columns]
palette = [category_color_palette[cat] for cat in categories]
categories = [cat for cat in cat_order_stacked if cat in categories]
palette = [category_color_palette[cat] for cat in categories]

annot_gene_percs_pivot_DF = annot_gene_percs_pivot_DF.sort_values(by= 'sampleID')
plt.figure(figsize=(10, 7.5))
Expand Down Expand Up @@ -2189,7 +2203,7 @@ def plot_pdf(out_path, all_gene_percs_long_DF, annot_gene_percs_long_DF, all_gen
sns.boxplot(x='sampleID', y='percentage', hue='sampleID' ,data=melted_annotated_gene_DF[melted_annotated_gene_DF['category'] == category],
palette=sample_color_palette)
plt.title(f'Gene distribution - {category}')
plt.xticks(rotation=45)
plt.xticks(rotation=90)
plt.tight_layout()
matplotlib.rcParams['pdf.fonttype'] = 42
pdf.savefig()
Expand Down Expand Up @@ -2307,7 +2321,7 @@ def plot_pdf(out_path, all_gene_percs_long_DF, annot_gene_percs_long_DF, all_gen
plt.close()

#Violin plots
sns.violinplot(x='sampleID', y='length', data=length_DF2, palette = sample_color_palette)
sns.violinplot(x='sampleID', y='length', data=length_DF2, palette = sample_color_palette, legend = False, hue = "sampleID")
plt.xlabel('Sample ID')
plt.ylabel('Length')
plt.title('Read Length Distribution')
Expand Down Expand Up @@ -2508,6 +2522,44 @@ def plot_pdf(out_path, all_gene_percs_long_DF, annot_gene_percs_long_DF, all_gen
matplotlib.rcParams['pdf.fonttype'] = 42
pdf.savefig()
plt.close()

def makeHTML(drty, prefx, sufx):
pages = convert_from_path(drty + prefx + sufx)
# Encode each page as a base64 string
encoded_images = []
for page in pages:
buffer = io.BytesIO()
page.save(buffer, format="PNG")
buffer.seek(0)
encoded_images.append(base64.b64encode(buffer.read()).decode("utf-8"))

# HTML template for embedding images
html_template = """
<!DOCTYPE html>
<html>
<head>
<title>PDF to HTML</title>
</head>
<body>
<h1>PDF Report</h1>
{% for img in images %}
<div>
<img src="data:image/png;base64,{{ img }}" style="width: 100%; height: auto;">
</div>
{% endfor %}
</body>
</html>
"""

# Render HTML with images
template = Template(html_template)
html_content = template.render(images=encoded_images)

# Save to HTML file
with open(drty + prefx + sufx[:-4] + ".html", "w") as f:
f.write(html_content)

print(f"HTML report saved as {drty + prefx + sufx[:-4]}.html")


def main():
Expand All @@ -2525,6 +2577,15 @@ def main():
else:
plot_pdf_by_factor(os.path.join(args.OUT, args.PREFIX + '_plots.pdf'), *dfs_for_plotting)

if args.report in ("both", "html"):
makeHTML(args.OUT, args.PREFIX, '_plots.pdf')
makeHTML(args.OUT, args.PREFIX, '_annotation_plots.pdf')

if args.report == "html":
os.remove(f"{args.OUT}/{args.PREFIX}_plots.pdf")
os.remove(f"{args.OUT}/{args.PREFIX}_annotation_plots.pdf")


if __name__ == '__main__':
#Parse command line arguments
global args
Expand Down

0 comments on commit 5c6544a

Please sign in to comment.