From 6a9f84289911bc0896bd3b0359a4d866c2db78be Mon Sep 17 00:00:00 2001
From: acesnik <anthony.cesnik@gmail.com>
Date: Mon, 26 Jul 2021 14:25:30 +0200
Subject: [PATCH 1/2] add variables to skip some plotting

---
 2_ProteinFucciPsuedotime.py                   | 193 +++++++++---------
 3_RNAFucciPseudotime.py                       | 177 ++++++++--------
 SingleCellProteogenomics/ProteinBimodality.py |  67 +++---
 .../ProteinCellCycleDependence.py             | 118 +++++------
 .../RNACellCycleDependence.py                 |   6 +-
 .../RNADataPreparation.py                     |  12 +-
 SingleCellProteogenomics/RNAVelocity.py       |   7 +-
 7 files changed, 299 insertions(+), 281 deletions(-)

diff --git a/2_ProteinFucciPsuedotime.py b/2_ProteinFucciPsuedotime.py
index ca225fe..7ab644f 100644
--- a/2_ProteinFucciPsuedotime.py
+++ b/2_ProteinFucciPsuedotime.py
@@ -17,6 +17,8 @@
 import numpy as np
 import scipy
 
+ALL_PLOTS_AND_ANALYSES = False
+
 # Make PDF text readable
 plt.rcParams["pdf.fonttype"] = 42
 plt.rcParams["ps.fonttype"] = 42
@@ -112,46 +114,47 @@
 )
 
 # Compare variances for protein and microtubules, the internal control for each image
-removeThese = pd.read_csv("input/ProteinData/ReplicatesToRemove.txt", header=None)[0] # make these independent samples for one-sided Kruskal-Wallis tests
-utils.general_boxplot(
-    (var_comp[~np.isin(u_well_plates, removeThese)], var_mt[~np.isin(u_well_plates, removeThese)]),
-    ("Protein", "Microtubules"),
-    "",
-    "Variance",
-    "",
-    False,
-    f"figures/ProteinMicrotubuleVariances.pdf",
-)
-utils.general_boxplot(
-    (cv_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)]),
-    ("Protein", "Microtubules"),
-    "",
-    "CV",
-    "",
-    False,
-    f"figures/ProteinMicrotubuleCVs.pdf",
-)
-utils.general_boxplot(
-    (gini_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)]),
-    ("Protein", "Microtubules"),
-    "",
-    "Gini",
-    "",
-    False,
-    f"figures/ProteinMicrotubuleGinis.pdf",
-)
-p_varProt_varMt = 2*scipy.stats.kruskal(var_comp[~np.isin(u_well_plates, removeThese)], var_mt[~np.isin(u_well_plates, removeThese)])[1]
-p_cvProt_cvMt = 2*scipy.stats.kruskal(cv_comp[~np.isin(u_well_plates, removeThese)], cv_mt[~np.isin(u_well_plates, removeThese)])[1]
-p_giniProt_giniMt = 2*scipy.stats.kruskal(gini_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)])[1]
-print(
-    f"{p_varProt_varMt}: p-value for difference between protein and microtubule variances"
-)
-print(
-    f"{p_cvProt_cvMt}: p-value for difference between protein and microtubule CVs"
-)
-print(
-    f"{p_giniProt_giniMt}: p-value for difference between protein and microtubule Gini indices"
-)
+if ALL_PLOTS_AND_ANALYSES:
+    removeThese = pd.read_csv("input/ProteinData/ReplicatesToRemove.txt", header=None)[0] # make these independent samples for one-sided Kruskal-Wallis tests
+    utils.general_boxplot(
+        (var_comp[~np.isin(u_well_plates, removeThese)], var_mt[~np.isin(u_well_plates, removeThese)]),
+        ("Protein", "Microtubules"),
+        "",
+        "Variance",
+        "",
+        False,
+        f"figures/ProteinMicrotubuleVariances.pdf",
+    )
+    utils.general_boxplot(
+        (cv_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)]),
+        ("Protein", "Microtubules"),
+        "",
+        "CV",
+        "",
+        False,
+        f"figures/ProteinMicrotubuleCVs.pdf",
+    )
+    utils.general_boxplot(
+        (gini_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)]),
+        ("Protein", "Microtubules"),
+        "",
+        "Gini",
+        "",
+        False,
+        f"figures/ProteinMicrotubuleGinis.pdf",
+    )
+    p_varProt_varMt = 2*scipy.stats.kruskal(var_comp[~np.isin(u_well_plates, removeThese)], var_mt[~np.isin(u_well_plates, removeThese)])[1]
+    p_cvProt_cvMt = 2*scipy.stats.kruskal(cv_comp[~np.isin(u_well_plates, removeThese)], cv_mt[~np.isin(u_well_plates, removeThese)])[1]
+    p_giniProt_giniMt = 2*scipy.stats.kruskal(gini_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)])[1]
+    print(
+        f"{p_varProt_varMt}: p-value for difference between protein and microtubule variances"
+    )
+    print(
+        f"{p_cvProt_cvMt}: p-value for difference between protein and microtubule CVs"
+    )
+    print(
+        f"{p_giniProt_giniMt}: p-value for difference between protein and microtubule Gini indices"
+    )
 
 #%% Gaussian clustering to identify biomodal intensity distributions
 bimodal_results = ProteinBimodality.identify_bimodal_intensity_distributions(
@@ -166,6 +169,7 @@
     wp_iscell,
     wp_isnuc,
     wp_iscyto,
+    ALL_PLOTS_AND_ANALYSES
 )
 wp_isbimodal_fcpadj_pass, wp_bimodal_cluster_idxs, wp_isbimodal_generally, wp_bimodal_fcmaxmin = (
     bimodal_results
@@ -201,34 +205,36 @@
     wp_isbimodal_fcpadj_pass,
     wp_bimodal_cluster_idxs,
     wp_comp_kruskal_gaussccd_adj,
+    ALL_PLOTS_AND_ANALYSES
 )
 wp_comp_ccd_difffromrng, mean_diff_from_rng_mt, wp_comp_ccd_clust1, wp_comp_ccd_clust2, wp_ccd_unibimodal, wp_comp_ccd_gauss, perc_var_comp, mean_diff_from_rng, wp_comp_eq_percvar_adj, mean_diff_from_rng_clust1, wp_comp_eq_percvar_adj_clust1, mean_diff_from_rng_clust2, wp_comp_eq_percvar_adj_clust2, mvavgs_x, mvavgs_comp, curr_pols, curr_ab_norms, mvperc_comps, curr_freds, curr_fgreens, curr_mockbulk_phases, curr_area_cell, curr_ab_nuc, curr_well_plate_imgnb, folder = (
     ccd_results
 )
 
 # Move the temporal average plots to more informative places
-ProteinCellCycleDependence.copy_mvavg_plots_protein(
-    folder,
-    wp_ensg,
-    wp_comp_ccd_difffromrng,
-    wp_isbimodal_fcpadj_pass,
-    wp_comp_ccd_clust1,
-    wp_comp_ccd_clust2,
-    wp_ccd_unibimodal,
-    wp_comp_ccd_gauss,
-)
-ProteinCellCycleDependence.global_plots_protein(
-    alphaa,
-    u_well_plates,
-    wp_ccd_unibimodal,
-    perc_var_comp,
-    mean_mean_comp,
-    gini_comp,
-    cv_comp,
-    mean_diff_from_rng,
-    wp_comp_eq_percvar_adj,
-    wp_comp_kruskal_gaussccd_adj,
-)
+if ALL_PLOTS_AND_ANALYSES:
+    ProteinCellCycleDependence.copy_mvavg_plots_protein(
+        folder,
+        wp_ensg,
+        wp_comp_ccd_difffromrng,
+        wp_isbimodal_fcpadj_pass,
+        wp_comp_ccd_clust1,
+        wp_comp_ccd_clust2,
+        wp_ccd_unibimodal,
+        wp_comp_ccd_gauss,
+    )
+    ProteinCellCycleDependence.global_plots_protein(
+        alphaa,
+        u_well_plates,
+        wp_ccd_unibimodal,
+        perc_var_comp,
+        mean_mean_comp,
+        gini_comp,
+        cv_comp,
+        mean_diff_from_rng,
+        wp_comp_eq_percvar_adj,
+        wp_comp_kruskal_gaussccd_adj,
+    )
 
 # Analyze the CCD results and save them
 ccd_comp, nonccd_comp, bioccd = ProteinCellCycleDependence.analyze_ccd_variation_protein(
@@ -282,34 +288,35 @@
 )
 
 # Perform comparison to LASSO for finding CCD proteins
-ProteinCellCycleDependence.compare_to_lasso_analysis(
-    u_well_plates,
-    pol_sort_norm_rev,
-    pol_sort_well_plate,
-    pol_sort_ab_cell,
-    pol_sort_ab_nuc,
-    pol_sort_ab_cyto,
-    pol_sort_mt_cell,
-    pol_sort_fred,
-    pol_sort_fgreen,
-    wp_iscell,
-    wp_isnuc,
-    wp_iscyto,
-    wp_ensg,
-    ccd_comp,
-)
-
-# Generate UMAPs to illustrate cutoffs and stability
-ProteinCellCycleDependence.generate_protein_umaps(
-    u_well_plates,
-    pol_sort_norm_rev,
-    pol_sort_well_plate,
-    pol_sort_ab_cell,
-    pol_sort_ab_nuc,
-    pol_sort_ab_cyto,
-    pol_sort_mt_cell,
-    wp_iscell,
-    wp_isnuc,
-    wp_iscyto,
-    mean_diff_from_rng,
-)
+if ALL_PLOTS_AND_ANALYSES:
+    ProteinCellCycleDependence.compare_to_lasso_analysis(
+        u_well_plates,
+        pol_sort_norm_rev,
+        pol_sort_well_plate,
+        pol_sort_ab_cell,
+        pol_sort_ab_nuc,
+        pol_sort_ab_cyto,
+        pol_sort_mt_cell,
+        pol_sort_fred,
+        pol_sort_fgreen,
+        wp_iscell,
+        wp_isnuc,
+        wp_iscyto,
+        wp_ensg,
+        ccd_comp,
+    )
+    
+    # Generate UMAPs to illustrate cutoffs and stability
+    ProteinCellCycleDependence.generate_protein_umaps(
+        u_well_plates,
+        pol_sort_norm_rev,
+        pol_sort_well_plate,
+        pol_sort_ab_cell,
+        pol_sort_ab_nuc,
+        pol_sort_ab_cyto,
+        pol_sort_mt_cell,
+        wp_iscell,
+        wp_isnuc,
+        wp_iscyto,
+        mean_diff_from_rng,
+    )
diff --git a/3_RNAFucciPseudotime.py b/3_RNAFucciPseudotime.py
index 9b48e0c..c91d3b9 100644
--- a/3_RNAFucciPseudotime.py
+++ b/3_RNAFucciPseudotime.py
@@ -19,6 +19,8 @@
 import shutil
 import os
 
+ALL_PLOTS_AND_ANALYSES = False
+
 # Make PDF text readable
 plt.rcParams["pdf.fonttype"] = 42
 plt.rcParams["ps.fonttype"] = 42
@@ -32,17 +34,6 @@
 nonccd_comp = np.load("output/pickles/nonccd_comp.npy", allow_pickle=True)
 u_plates = ["355","356","357"]
 
-#%% Check for new RNA inputs
-newinputsfolder = "newinputs/RNAData/"
-if os.path.exists(newinputsfolder):
-    for file in os.listdir(newinputsfolder):
-        filepath=f"{newinputsfolder}{file}"
-        targetfile=f"input/RNAData/{os.path.basename(file)}"
-        targetfilepc=f"{targetfile}.protein_coding.csv"
-        if os.path.exists(targetfile): os.remove(targetfile)
-        if os.path.exists(targetfilepc): os.remove(targetfilepc)
-        shutil.copyfile(filepath, targetfile)
-
 #%% Convert FACS intensities for FUCCI markers to pseudotime using the same polar coordinate methods as for protein
 # Idea: Use the polar coordinate pseudotime calculations to calculate the pseudotime for each cell
 # Execution: Adapt Devin's code for the cells sorted for RNA-Seq
@@ -54,13 +45,14 @@
 FucciPseudotime.pseudotime_rna(adata)
 
 #%% Single cell RNA-Seq data preparation and general analysis
-RNADataPreparation.general_plots(u_plates)
-RNADataPreparation.analyze_noncycling_cells(u_plates)
-adata, phasesfilt = RNADataPreparation.qc_filtering(
-    adata, do_log_normalize=True, do_remove_blob=False
-)
-adata = RNADataPreparation.zero_center_fucci(adata)
-RNADataPreparation.plot_pca_for_batch_effect_analysis(adata, "BeforeRemovingNoncycling")
+if ALL_PLOTS_AND_ANALYSES:
+    RNADataPreparation.general_plots(u_plates)
+    RNADataPreparation.analyze_noncycling_cells(u_plates)
+    adata, phasesfilt = RNADataPreparation.qc_filtering(
+        adata, do_log_normalize=True, do_remove_blob=False
+    )
+    adata = RNADataPreparation.zero_center_fucci(adata)
+    RNADataPreparation.plot_pca_for_batch_effect_analysis(adata, "BeforeRemovingNoncycling")
 
 #%% Idea: Similar to mock-bulk analysis for proteins, we can evaluate each gene bundled by phase across cells
 # Execution: Make boxplots of RNA expression by phase
@@ -73,31 +65,34 @@
     adata, do_log_normalize=True, do_remove_blob=True
 )
 adata = RNADataPreparation.zero_center_fucci(adata)
-RNADataPreparation.plot_markers_vs_reads(adata)
-RNADataPreparation.plot_pca_for_batch_effect_analysis(adata, "AfterRemovingNoncycling")
-g1 = adata.obs["phase"] == "G1"
-s = adata.obs["phase"] == "S-ph"
-g2 = adata.obs["phase"] == "G2M"
-do_make_boxplots = False
-if do_make_boxplots:
-    for iii, ensg in enumerate(adata.var_names):
-        maxtpm = np.max(
-            np.concatenate((adata.X[g1, iii], adata.X[s, iii], adata.X[g2, iii]))
-        )
-        RNACellCycleDependence.boxplot_result(
-            adata.X[g1, iii] / maxtpm,
-            adata.X[s, iii] / maxtpm,
-            adata.X[g2, iii] / maxtpm,
-            "figures/RNABoxplotByPhase",
-            ensg,
-        )
+
+if ALL_PLOTS_AND_ANALYSES:
+    RNADataPreparation.plot_markers_vs_reads(adata)
+    RNADataPreparation.plot_pca_for_batch_effect_analysis(adata, "AfterRemovingNoncycling")
+    g1 = adata.obs["phase"] == "G1"
+    s = adata.obs["phase"] == "S-ph"
+    g2 = adata.obs["phase"] == "G2M"
+    do_make_boxplots = False
+    if do_make_boxplots:
+        for iii, ensg in enumerate(adata.var_names):
+            maxtpm = np.max(
+                np.concatenate((adata.X[g1, iii], adata.X[s, iii], adata.X[g2, iii]))
+            )
+            RNACellCycleDependence.boxplot_result(
+                adata.X[g1, iii] / maxtpm,
+                adata.X[s, iii] / maxtpm,
+                adata.X[g2, iii] / maxtpm,
+                "figures/RNABoxplotByPhase",
+                ensg,
+            )
 
 #%% Idea: Display general RNA expression patterns in single cells using UMAP dimensionality reduction, and display with FUCCI pseudotime overlayed
-FucciPseudotime.pseudotime_umap(adata)  # Generate a UMAP with the pseudotime overlayed
+if ALL_PLOTS_AND_ANALYSES:
+    FucciPseudotime.pseudotime_umap(adata)  # Generate a UMAP with the pseudotime overlayed
 
-# We can also show that the cycle pattern remains when the curated CCD genes or CCD proteins are removed,
-# demonstrating that there's still valuable information about cell cycling beyond what was called CCD
-RNADataPreparation.demonstrate_umap_cycle_without_ccd(adata)
+    # We can also show that the cycle pattern remains when the curated CCD genes or CCD proteins are removed,
+    # demonstrating that there's still valuable information about cell cycling beyond what was called CCD
+    RNADataPreparation.demonstrate_umap_cycle_without_ccd(adata)
 
 # Read in the curated CCD genes / CCD proteins from the present work / Non-CCD genes from the present work; filter for genes that weren't filtered in QC of RNA-Seq
 ccd_regev_filtered, ccd_filtered, nonccd_filtered = utils.ccd_gene_lists(adata)
@@ -110,12 +105,13 @@
 normalized_exp_data = (expression_data.T / np.max(expression_data, axis=0)[:, None]).T
 
 # Log-log FUCCI plot with RNA expression overlayed
-RNACellCycleDependence.plot_expression_facs(
-    adata,
-    wp_ensg[np.isin(wp_ensg, adata.var_names)],
-    normalized_exp_data,
-    "figures/GeneExpressionFucci",
-)
+if ALL_PLOTS_AND_ANALYSES:
+    RNACellCycleDependence.plot_expression_facs(
+        adata,
+        wp_ensg[np.isin(wp_ensg, adata.var_names)],
+        normalized_exp_data,
+        "figures/GeneExpressionFucci",
+    )
 
 # Cluster the expression into phases and analyze it that way
 bulk_phase_tests = RNACellCycleDependence.analyze_ccd_variation_by_phase_rna(
@@ -133,32 +129,12 @@
     adata_nonccdprotein,
     adata_regevccdgenes,
     biotype_to_use,
+    make_mvavg_plots_isoforms=ALL_PLOTS_AND_ANALYSES,
 )
 percent_ccd_variance, total_gini, mean_diff_from_rng, pass_meandiff, eq_percvar_adj, fucci_time_inds, norm_exp_sort, moving_averages, mvavg_xvals, perms, ccdtranscript, ccdprotein, mvpercs = (
     rna_ccd_analysis_results
 )
 
-RNACellCycleDependence.plot_umap_ccd_cutoffs(adata, mean_diff_from_rng)
-RNACellCycleDependence.figures_ccd_analysis_rna(
-    adata,
-    percent_ccd_variance,
-    mean_diff_from_rng,
-    pass_meandiff,
-    eq_percvar_adj,
-    wp_ensg,
-    ccd_comp,
-    ccd_regev_filtered,
-)
-RNACellCycleDependence.plot_overall_and_ccd_variances(
-    adata,
-    biotype_to_use,
-    total_gini,
-    percent_ccd_variance,
-    pass_meandiff,
-    adata_ccdprotein,
-    adata_nonccdprotein,
-    adata_regevccdgenes,
-)
 RNACellCycleDependence.make_plotting_dataframe(
     adata,
     ccdtranscript,
@@ -169,17 +145,41 @@
     moving_averages,
     mvpercs,
 )
-RNACellCycleDependence.compare_to_lasso_analysis(adata, ccdtranscript)
-RNACellCycleDependence.analyze_cnv_calls(adata, ccdtranscript)
+    
+if ALL_PLOTS_AND_ANALYSES:
+    RNACellCycleDependence.plot_umap_ccd_cutoffs(adata, mean_diff_from_rng)
+    RNACellCycleDependence.figures_ccd_analysis_rna(
+        adata,
+        percent_ccd_variance,
+        mean_diff_from_rng,
+        pass_meandiff,
+        eq_percvar_adj,
+        wp_ensg,
+        ccd_comp,
+        ccd_regev_filtered,
+    )
+    RNACellCycleDependence.plot_overall_and_ccd_variances(
+        adata,
+        biotype_to_use,
+        total_gini,
+        percent_ccd_variance,
+        pass_meandiff,
+        adata_ccdprotein,
+        adata_nonccdprotein,
+        adata_regevccdgenes,
+    )
+    RNACellCycleDependence.compare_to_lasso_analysis(adata, ccdtranscript)
+    RNACellCycleDependence.analyze_cnv_calls(adata, ccdtranscript)
 
 #%% Moving average calculations and randomization analysis for the spike-in internal controls
-adata_spikeins, phases_spikeins = RNADataPreparation.read_counts_and_phases(
-    valuetype, use_spike_ins=True, biotype_to_use="", u_plates=u_plates
-)
-sc.pp.filter_genes(adata_spikeins, min_cells=100)
-print(f"data shape after filtering: {adata_spikeins.X.shape}")
-
-RNACellCycleDependence.ccd_analysis_of_spikeins(adata_spikeins, perms)
+if ALL_PLOTS_AND_ANALYSES:
+    adata_spikeins, phases_spikeins = RNADataPreparation.read_counts_and_phases(
+        valuetype, use_spike_ins=True, biotype_to_use="", u_plates=u_plates
+    )
+    sc.pp.filter_genes(adata_spikeins, min_cells=100)
+    print(f"data shape after filtering: {adata_spikeins.X.shape}")
+    
+    RNACellCycleDependence.ccd_analysis_of_spikeins(adata_spikeins, perms)
 
 #%% Analyze RNA velocity
 valuetype, use_spikeins, biotype_to_use = "Tpms", False, "protein_coding"
@@ -189,17 +189,18 @@
 adata_blobless, phasesfilt = RNADataPreparation.qc_filtering(
     adata, do_log_normalize=True, do_remove_blob=False
 )
-RNAVelocity.analyze_rna_velocity(adata_blobless, mean_diff_from_rng)
+RNAVelocity.analyze_rna_velocity(adata_blobless, mean_diff_from_rng, ALL_PLOTS_AND_ANALYSES)
 
 #%% Analyze isoforms
-adata_isoform, ccdtranscript_isoform = RNACellCycleDependence.analyze_isoforms(
-    adata, ccdtranscript, wp_ensg, ccd_comp, nonccd_comp, u_plates
-)
-RNACellCycleDependence.compare_genes_to_isoforms(
-    adata,
-    ccdprotein,
-    ccdtranscript,
-    adata_nonccdprotein,
-    adata_isoform,
-    ccdtranscript_isoform,
-)
+if ALL_PLOTS_AND_ANALYSES:
+    adata_isoform, ccdtranscript_isoform = RNACellCycleDependence.analyze_isoforms(
+        adata, ccdtranscript, wp_ensg, ccd_comp, nonccd_comp, u_plates, make_mvavg_plots_isoforms=ALL_PLOTS_AND_ANALYSES
+    )
+    RNACellCycleDependence.compare_genes_to_isoforms(
+        adata,
+        ccdprotein,
+        ccdtranscript,
+        adata_nonccdprotein,
+        adata_isoform,
+        ccdtranscript_isoform,
+    )
diff --git a/SingleCellProteogenomics/ProteinBimodality.py b/SingleCellProteogenomics/ProteinBimodality.py
index a36076a..ffe9136 100644
--- a/SingleCellProteogenomics/ProteinBimodality.py
+++ b/SingleCellProteogenomics/ProteinBimodality.py
@@ -17,7 +17,7 @@
 
 def identify_bimodal_intensity_distributions(u_well_plates, wp_ensg,
              pol_sort_well_plate, pol_sort_norm_rev, pol_sort_ab_cell, pol_sort_ab_nuc, pol_sort_ab_cyto, pol_sort_mt_cell,
-             wp_iscell, wp_isnuc, wp_iscyto):
+             wp_iscell, wp_isnuc, wp_iscyto, do_plotting):
     '''
     Some proteins display bimodal intensity distributions. 
     This method seeks to identify distributions with high- and low-expressing cells, 
@@ -81,38 +81,39 @@ def identify_bimodal_intensity_distributions(u_well_plates, wp_ensg,
     print(f"{sum(~wp_isbimodal_generally[~wp_removeReplicate])}: number of proteins displaying unimodal distributions ({sum(~wp_isbimodal_generally)/len(wp_isbimodal_generally)}%)")
     print(f"{sum(wp_isbimodal_generally[~wp_removeReplicate])}: number of proteins displaying bimodal distributions ({sum(wp_isbimodal_generally)/len(wp_isbimodal_generally)}%)")
     
-    # Show that the intensity measurements are reasonable for these bimodal samples
-    plt.hist(np.concatenate(np.array(wp_intensities, dtype=object)[wp_isbimodal_generally]))
-    plt.xlabel("Mean intensity")
-    plt.ylabel("Count")
-    plt.title("Intensities of Cells within Bimodal Distributions\nAre Similar to those Overall")
-    # plt.show()
-    plt.close()
-
-    print("Illustrate the significantly distinct high- and low-expressing cell populations")
-    plt.scatter(np.log(wp_bimodal_fcmeans) / np.log(2), -np.log10(wp_isbimodal_padj), c=wp_isbimodal_generally, alpha=0.5, cmap="bwr_r")
-    plt.xlabel("Log2 Fold Change Between Gaussian Clusters")
-    plt.ylabel("-Log10 Adj. p-Value for Difference Between Clusters")
-    plt.savefig("figures/BimodalSignificance_GeneralBimodality.png")
-    # plt.show()
-    plt.close()
-
-    print("Illustrate the significantly distinct high- and low-expressing cell populations")
-    print("with no difference in pseudotime. These are evaluated separately for CCD.")
-    plt.scatter(np.log(wp_bimodal_fcmeans) / np.log(2), -np.log10(wp_isbimodal_padj), c=wp_isbimodal_fcpadj_pass, alpha=0.5, cmap="bwr_r")
-    plt.xlabel("Log2 Fold Change Between Gaussian Clusters")
-    plt.ylabel("-Log10 Adj. p-Value for Difference Between Clusters")
-    plt.savefig("figures/BimodalSignificance.png")
-    plt.savefig("figures/BimodalSignificance.pdf")
-    # plt.show()
-    plt.close()
+    if do_plotting:
+        # Show that the intensity measurements are reasonable for these bimodal samples
+        plt.hist(np.concatenate(np.array(wp_intensities, dtype=object)[wp_isbimodal_generally]))
+        plt.xlabel("Mean intensity")
+        plt.ylabel("Count")
+        plt.title("Intensities of Cells within Bimodal Distributions\nAre Similar to those Overall")
+        # plt.show()
+        plt.close()
     
-    print("Illustrate the samples with sufficient cell count for CCD evaluation of high- and low-expressing cell populations.")
-    plt.scatter([sum(c1[0]) for c1 in wp_bimodal_cluster_idxs], [sum(c1[1]) for c1 in wp_bimodal_cluster_idxs], c=wp_enoughcellsinbothclusters, alpha=0.5, cmap="bwr_r")
-    plt.xlabel("Cell Count, Cluster 1")
-    plt.ylabel("Cell Count, Cluster 2")
-    plt.savefig("figures/BimodalCellCount.png")
-    # plt.show()
-    plt.close()
+        print("Illustrate the significantly distinct high- and low-expressing cell populations")
+        plt.scatter(np.log(wp_bimodal_fcmeans) / np.log(2), -np.log10(wp_isbimodal_padj), c=wp_isbimodal_generally, alpha=0.5, cmap="bwr_r")
+        plt.xlabel("Log2 Fold Change Between Gaussian Clusters")
+        plt.ylabel("-Log10 Adj. p-Value for Difference Between Clusters")
+        plt.savefig("figures/BimodalSignificance_GeneralBimodality.png")
+        # plt.show()
+        plt.close()
+    
+        print("Illustrate the significantly distinct high- and low-expressing cell populations")
+        print("with no difference in pseudotime. These are evaluated separately for CCD.")
+        plt.scatter(np.log(wp_bimodal_fcmeans) / np.log(2), -np.log10(wp_isbimodal_padj), c=wp_isbimodal_fcpadj_pass, alpha=0.5, cmap="bwr_r")
+        plt.xlabel("Log2 Fold Change Between Gaussian Clusters")
+        plt.ylabel("-Log10 Adj. p-Value for Difference Between Clusters")
+        plt.savefig("figures/BimodalSignificance.png")
+        plt.savefig("figures/BimodalSignificance.pdf")
+        # plt.show()
+        plt.close()
+        
+        print("Illustrate the samples with sufficient cell count for CCD evaluation of high- and low-expressing cell populations.")
+        plt.scatter([sum(c1[0]) for c1 in wp_bimodal_cluster_idxs], [sum(c1[1]) for c1 in wp_bimodal_cluster_idxs], c=wp_enoughcellsinbothclusters, alpha=0.5, cmap="bwr_r")
+        plt.xlabel("Cell Count, Cluster 1")
+        plt.ylabel("Cell Count, Cluster 2")
+        plt.savefig("figures/BimodalCellCount.png")
+        # plt.show()
+        plt.close()
     
     return wp_isbimodal_fcpadj_pass, wp_bimodal_cluster_idxs, wp_isbimodal_generally, wp_bimodal_fcmaxmin
\ No newline at end of file
diff --git a/SingleCellProteogenomics/ProteinCellCycleDependence.py b/SingleCellProteogenomics/ProteinCellCycleDependence.py
index bf6d266..78b1940 100644
--- a/SingleCellProteogenomics/ProteinCellCycleDependence.py
+++ b/SingleCellProteogenomics/ProteinCellCycleDependence.py
@@ -89,7 +89,8 @@ def cell_cycle_dependence_protein(u_well_plates, wp_ensg, wp_ab, use_log_ccd, do
         pol_sort_fred, pol_sort_fgreen, pol_sort_mockbulk_phases,
         pol_sort_area_cell, pol_sort_area_nuc, pol_sort_well_plate_imgnb,
         wp_iscell, wp_isnuc, wp_iscyto,
-        wp_isbimodal_fcpadj_pass, wp_bimodal_cluster_idxs, wp_comp_kruskal_gaussccd_adj):
+        wp_isbimodal_fcpadj_pass, wp_bimodal_cluster_idxs, wp_comp_kruskal_gaussccd_adj,
+        do_plotting):
     '''
     Use a moving average model of protein expression over the cell cycle to determine cell cycle dependence.
     Generates plots for each gene
@@ -192,13 +193,14 @@ def cell_cycle_dependence_protein(u_well_plates, wp_ensg, wp_ab, use_log_ccd, do
             windows2 = np.asarray([np.arange(start, start + WINDOW) for start in np.arange(sum(clust2_idx) - WINDOW + 1)])
             mvperc1 = MovingAverages.mvpercentiles(curr_comp_norm[clust1_idx][windows1])
             mvperc2 = MovingAverages.mvpercentiles(curr_comp_norm[clust2_idx][windows2])
-            MovingAverages.temporal_mov_avg_protein(curr_pol[clust1_idx], curr_comp_norm[clust1_idx], 
-                mvavgs_x_clust1[-1], mvavgs_comp_clust1[-1], mvperc1, None, folder, fileprefixes[i] + "_clust1")
-            MovingAverages.temporal_mov_avg_protein(curr_pol[clust2_idx], curr_comp_norm[clust2_idx], 
-                mvavgs_x_clust2[-1], mvavgs_comp_clust2[-1], mvperc2, None, folder, fileprefixes[i] + "_clust2")
+            if do_plotting:
+                MovingAverages.temporal_mov_avg_protein(curr_pol[clust1_idx], curr_comp_norm[clust1_idx], 
+                    mvavgs_x_clust1[-1], mvavgs_comp_clust1[-1], mvperc1, None, folder, fileprefixes[i] + "_clust1")
+                MovingAverages.temporal_mov_avg_protein(curr_pol[clust2_idx], curr_comp_norm[clust2_idx], 
+                    mvavgs_x_clust2[-1], mvavgs_comp_clust2[-1], mvperc2, None, folder, fileprefixes[i] + "_clust2")
         
         # Make example plots for the randomization trials for the NFAT5 example in manuscript
-        if wp_ensg[i] == "ENSG00000102908":
+        if wp_ensg[i] == "ENSG00000102908" and do_plotting:
             median_rng_idx = np.argsort(curr_percvar_rng_comp)
             for iii, idx in enumerate([0,len(curr_percvar_rng_comp)//4,len(curr_percvar_rng_comp)//2,3*len(curr_percvar_rng_comp)//4,len(curr_percvar_rng_comp)-1]):
                 MovingAverages.temporal_mov_avg_randomization_example_protein(curr_pol, curr_comp_norm, curr_comp_perm[median_rng_idx[idx]], 
@@ -229,36 +231,37 @@ def cell_cycle_dependence_protein(u_well_plates, wp_ensg, wp_ab, use_log_ccd, do
         windows = np.asarray([np.arange(start, start + WINDOW) for start in np.arange(len(curr_pol) - WINDOW + 1)])
         mvperc_comp = MovingAverages.mvpercentiles(curr_ab_norm[windows])
         mvperc_comps.append(mvperc_comp)
-        MovingAverages.temporal_mov_avg_protein(curr_pol, curr_ab_norm, 
-            mvavg_xvals, mvavg_comp, mvperc_comp, None, folder, fileprefixes[i])
-        
-        # Make the plots for microtubules (takes 10 mins for all, so just do the arbitrary one in the Fig 1)
-        if well == "C07_55405991":
-            mvperc_mt =  MovingAverages.mvpercentiles(curr_mt_cell_norm[windows])
-            MovingAverages.temporal_mov_avg_protein(curr_pol, curr_mt_cell_norm, 
-                    mvavg_xvals, mvavg_mt, mvperc_mt, None, folder_mt, f"{fileprefixes[i]}_mt")
-            if well == "C07_55405991": # keep here in case making all pseudotime plots
-                pd.DataFrame({
-                    "ENSG" : wp_ensg[i],
-                    "Antibody" : wp_ab[i],
-                    "Compartment" : "Cell",
-                    "CCD" : "No",
-                    "cell_pseudotime" : [",".join([str(ppp) for ppp in pp]) for pp in [curr_pol]],
-                    "cell_intensity" : [",".join([str(yyy) for yyy in yy]) for yy in [curr_mt_cell_norm]],
-                    "mvavg_x" : [",".join([str(xxx) for xxx in xx]) for xx in [mvavg_xvals]],
-                    "mvavg_y" : [",".join([str(yyy) for yyy in yy]) for yy in [mvavg_mt]],
-                    "mvavgs_10p" : [",".join([str(yyy) for yyy in yy]) for yy in [mvperc_mt[0]]],
-                    "mvavgs_90p" : [",".join([str(yyy) for yyy in yy]) for yy in [mvperc_mt[-1]]],
-                    "mvavgs_25p" : [",".join([str(yyy) for yyy in yy]) for yy in [mvperc_mt[1]]],
-                    "mvavgs_75p" : [",".join([str(yyy) for yyy in yy]) for yy in [mvperc_mt[-2]]],
-                    "phase" : [",".join(pp) for pp in [curr_mockbulk_phase]],
-                    "WellPlate" : u_well_plates[i]}).to_csv(
-                        "output/mtplottingline.tsv", index=False, sep="\t")
-        
-        # Make the plots for each bimodal protein
-        if clusters is not None:
+        if do_plotting:
             MovingAverages.temporal_mov_avg_protein(curr_pol, curr_ab_norm, 
-                mvavg_xvals, mvavg_comp, mvperc_comp, clusters, folder, fileprefixes[i] + "_clust1&2")
+                mvavg_xvals, mvavg_comp, mvperc_comp, None, folder, fileprefixes[i])
+            
+            # Make the plots for microtubules (takes 10 mins for all, so just do the arbitrary one in the Fig 1)
+            if well == "C07_55405991":
+                mvperc_mt =  MovingAverages.mvpercentiles(curr_mt_cell_norm[windows])
+                MovingAverages.temporal_mov_avg_protein(curr_pol, curr_mt_cell_norm, 
+                        mvavg_xvals, mvavg_mt, mvperc_mt, None, folder_mt, f"{fileprefixes[i]}_mt")
+                if well == "C07_55405991": # keep here in case making all pseudotime plots
+                    pd.DataFrame({
+                        "ENSG" : wp_ensg[i],
+                        "Antibody" : wp_ab[i],
+                        "Compartment" : "Cell",
+                        "CCD" : "No",
+                        "cell_pseudotime" : [",".join([str(ppp) for ppp in pp]) for pp in [curr_pol]],
+                        "cell_intensity" : [",".join([str(yyy) for yyy in yy]) for yy in [curr_mt_cell_norm]],
+                        "mvavg_x" : [",".join([str(xxx) for xxx in xx]) for xx in [mvavg_xvals]],
+                        "mvavg_y" : [",".join([str(yyy) for yyy in yy]) for yy in [mvavg_mt]],
+                        "mvavgs_10p" : [",".join([str(yyy) for yyy in yy]) for yy in [mvperc_mt[0]]],
+                        "mvavgs_90p" : [",".join([str(yyy) for yyy in yy]) for yy in [mvperc_mt[-1]]],
+                        "mvavgs_25p" : [",".join([str(yyy) for yyy in yy]) for yy in [mvperc_mt[1]]],
+                        "mvavgs_75p" : [",".join([str(yyy) for yyy in yy]) for yy in [mvperc_mt[-2]]],
+                        "phase" : [",".join(pp) for pp in [curr_mockbulk_phase]],
+                        "WellPlate" : u_well_plates[i]}).to_csv(
+                            "output/mtplottingline.tsv", index=False, sep="\t")
+        
+            # Make the plots for each bimodal protein
+            if clusters is not None:
+                MovingAverages.temporal_mov_avg_protein(curr_pol, curr_ab_norm, 
+                    mvavg_xvals, mvavg_comp, mvperc_comp, clusters, folder, fileprefixes[i] + "_clust1&2")
         
     alpha_ccd = 0.01
     perc_var_cell, perc_var_nuc, perc_var_cyto, perc_var_mt = np.array(perc_var_cell),np.array(perc_var_nuc),np.array(perc_var_cyto),np.array(perc_var_mt) # percent variance attributed to cell cycle (mean POI intensities)
@@ -323,28 +326,29 @@ def cell_cycle_dependence_protein(u_well_plates, wp_ensg, wp_ab, use_log_ccd, do
     
     wp_ccd_unibimodal = wp_comp_ccd_difffromrng | wp_comp_ccd_clust1 | wp_comp_ccd_clust2
 
-    plt.figure(figsize=(10,10))
-    plt.scatter(perc_var_comp_withbimodal, mean_diff_from_rng_withbimodal, c=wp_comp_ccd_difffromrng_withbimodal, cmap="bwr_r")
-    plt.hlines(MIN_MEAN_PERCVAR_DIFF_FROM_RANDOM, np.min(perc_var_comp), np.max(perc_var_comp), color="gray")
-    plt.xlabel("Percent Variance Explained by Cell Cycle")
-    plt.ylabel("Mean Difference from Random")
-    plt.savefig("figures/MedianDiffFromRandom.png")
-    plt.savefig("figures/MedianDiffFromRandom.pdf")
-    # plt.show()
-    plt.close()
-    
-    pervar_adj_withbimodal_nextafter = np.nextafter(wp_comp_eq_percvar_adj_withbimodal, wp_comp_eq_percvar_adj_withbimodal + 1)
-    plt.figure(figsize=(10,10))
-    plt.scatter(mean_diff_from_rng_withbimodal, -np.log10(pervar_adj_withbimodal_nextafter), c=wp_comp_ccd_difffromrng_withbimodal, cmap="bwr_r")
-    plt.vlines(MIN_MEAN_PERCVAR_DIFF_FROM_RANDOM, 
-               np.min(-np.log10(pervar_adj_withbimodal_nextafter)), 
-               np.max(-np.log10(pervar_adj_withbimodal_nextafter)), color="gray")
-    plt.xlabel("Mean Difference from Random")
-    plt.ylabel("-log10 adj p-value from randomization")
-    plt.savefig("figures/MedianDiffFromRandomVolcano.png")
-    plt.savefig("figures/MedianDiffFromRandomVolcano.pdf")
-    # plt.show()
-    plt.close()    
+    if do_plotting:
+        plt.figure(figsize=(10,10))
+        plt.scatter(perc_var_comp_withbimodal, mean_diff_from_rng_withbimodal, c=wp_comp_ccd_difffromrng_withbimodal, cmap="bwr_r")
+        plt.hlines(MIN_MEAN_PERCVAR_DIFF_FROM_RANDOM, np.min(perc_var_comp), np.max(perc_var_comp), color="gray")
+        plt.xlabel("Percent Variance Explained by Cell Cycle")
+        plt.ylabel("Mean Difference from Random")
+        plt.savefig("figures/MedianDiffFromRandom.png")
+        plt.savefig("figures/MedianDiffFromRandom.pdf")
+        # plt.show()
+        plt.close()
+        
+        pervar_adj_withbimodal_nextafter = np.nextafter(wp_comp_eq_percvar_adj_withbimodal, wp_comp_eq_percvar_adj_withbimodal + 1)
+        plt.figure(figsize=(10,10))
+        plt.scatter(mean_diff_from_rng_withbimodal, -np.log10(pervar_adj_withbimodal_nextafter), c=wp_comp_ccd_difffromrng_withbimodal, cmap="bwr_r")
+        plt.vlines(MIN_MEAN_PERCVAR_DIFF_FROM_RANDOM, 
+                   np.min(-np.log10(pervar_adj_withbimodal_nextafter)), 
+                   np.max(-np.log10(pervar_adj_withbimodal_nextafter)), color="gray")
+        plt.xlabel("Mean Difference from Random")
+        plt.ylabel("-log10 adj p-value from randomization")
+        plt.savefig("figures/MedianDiffFromRandomVolcano.png")
+        plt.savefig("figures/MedianDiffFromRandomVolcano.pdf")
+        # plt.show()
+        plt.close()    
     
     return (wp_comp_ccd_difffromrng, mean_diff_from_rng_mt, wp_comp_ccd_clust1, wp_comp_ccd_clust2, wp_ccd_unibimodal, 
         wp_comp_ccd_gauss, perc_var_comp, mean_diff_from_rng, wp_comp_eq_percvar_adj, 
diff --git a/SingleCellProteogenomics/RNACellCycleDependence.py b/SingleCellProteogenomics/RNACellCycleDependence.py
index 26ab88b..df9e02b 100644
--- a/SingleCellProteogenomics/RNACellCycleDependence.py
+++ b/SingleCellProteogenomics/RNACellCycleDependence.py
@@ -23,7 +23,7 @@
 
 np.random.seed(0) # Get the same results each time
 WINDOW = 100 # Number of points for moving average window for protein analysis
-PERMUTATIONS = 100#10000 # Number of permutations used for randomization analysis
+PERMUTATIONS = 10000 # Number of permutations used for randomization analysis
 PERMUTATIONS_ISOFORMS = 100
 MIN_MEAN_PERCVAR_DIFF_FROM_RANDOM = 0.08 # Cutoff used for percent additional variance explained by the cell cycle than random
 
@@ -292,7 +292,7 @@ def compare_genes_to_isoforms(adata, ccdprotein, ccdtranscript, adata_nonccdprot
     print(f"{sum(ccdIsoformsPerCcdProtein > 0) / sum(ccdprotein)}% of CCD proteins had at least one CCD transcript isoform")
     print(f"{sum(ccdIsoformsPerNonCcdProtein > 0) / sum(adata_nonccdprotein)}% of non-CCD proteins had at least one CCD transcript isoform")
     
-def analyze_isoforms(adata, ccdtranscript, wp_ensg, ccd_comp, nonccd_comp, u_plates):
+def analyze_isoforms(adata, ccdtranscript, wp_ensg, ccd_comp, nonccd_comp, u_plates, make_mvavg_plots_isoforms=False):
     '''Analyze the isoform-level results over the cell cycle'''
     # Read in the data & QC analysis
     valuetype, use_spikeins, biotype_to_use = "Tpms", False, "protein_coding"
@@ -307,7 +307,7 @@ def analyze_isoforms(adata, ccdtranscript, wp_ensg, ccd_comp, nonccd_comp, u_pla
     bioccd = np.genfromtxt("input/ProteinData/BiologicallyDefinedCCD.txt", dtype='str') # from mitotic structures in the protein work
     ccd_regev_filtered_isoform, ccd_filtered_isoform, nonccd_filtered_isoform = utils.ccd_gene_lists(adata_isoform)
     adata_ccdprotein_isoform, adata_nonccdprotein_isoform, adata_regevccdgenes_isoform = RNADataPreparation.is_ccd(adata_isoform, wp_ensg, ccd_comp, nonccd_comp, bioccd, ccd_regev_filtered_isoform)
-    rna_ccd_analysis_results = analyze_ccd_variation_by_mvavg_rna(adata_isoform, wp_ensg, ccd_comp, bioccd, adata_nonccdprotein_isoform, adata_regevccdgenes_isoform, biotype_to_use, True)
+    rna_ccd_analysis_results = analyze_ccd_variation_by_mvavg_rna(adata_isoform, wp_ensg, ccd_comp, bioccd, adata_nonccdprotein_isoform, adata_regevccdgenes_isoform, biotype_to_use, use_isoforms=True, make_mvavg_plots_isoforms=make_mvavg_plots_isoforms)
     percent_ccd_variance_isoform, total_gini_isoform, mean_diff_from_rng_isoform, pass_meandiff_isoform, eq_percvar_adj_isoform, fucci_time_inds_isoform, norm_exp_sort_isoform, moving_averages_isoform, mvavg_xvals_isoform, perms_isoform, ccdtranscript_isoform, ccdprotein_isoform, mvpercs_isoform = rna_ccd_analysis_results    
     return adata_isoform, ccdtranscript_isoform
 
diff --git a/SingleCellProteogenomics/RNADataPreparation.py b/SingleCellProteogenomics/RNADataPreparation.py
index 31f3aea..48741aa 100644
--- a/SingleCellProteogenomics/RNADataPreparation.py
+++ b/SingleCellProteogenomics/RNADataPreparation.py
@@ -117,8 +117,10 @@ def qc_filtering(adata, do_log_normalize, do_remove_blob):
     if do_log_normalize: sc.pp.log1p(adata)
     louvain_file="input/RNAData/louvain_original.npy"
     louvain = np.load(louvain_file, allow_pickle=True)
-    adata.obs["original_louvain_wp"] = louvain[:,0]
-    adata.obs["original_louvain"] = louvain[:,1]
+    original_louvain_wp = louvain[:,0]
+    keepThese = np.isin(original_louvain_wp, adata.obs["Well_Plate"]) # can have slightly different number of cells
+    adata.obs["original_louvain_wp"] = louvain[:,0][keepThese]
+    adata.obs["original_louvain"] = louvain[:,1][keepThese]
     if do_remove_blob: 
         removeThese = np.isin(adata.obs["Well_Plate"], adata.obs["original_louvain_wp"][adata.obs["original_louvain"] == "5"])
         adata = adata[~removeThese,:]
@@ -201,8 +203,8 @@ def analyze_noncycling_cells(u_plates):
         for md in mindists:
             sc.pp.neighbors(adata, n_neighbors=nn, n_pcs=40)
             sc.tl.umap(adata, min_dist=md)
-            sc.pl.umap(adata, color="louvain", show=False, save=f"louvain_clusters_before_nn{nn}_md{md}.pdf")
-    sc.tl.rank_genes_groups(adata, groupby="louvain")
+            sc.pl.umap(adata, color="original_louvain", show=False, save=f"louvain_clusters_before_nn{nn}_md{md}.pdf")
+    sc.tl.rank_genes_groups(adata, groupby="original_louvain")
     p_blob=[a[5] for a in adata.uns["rank_genes_groups"]["pvals_adj"]]
     p_blob_sig = np.array(p_blob) < 0.01
     ensg_blob_sig=np.array([a[5] for a in adata.uns["rank_genes_groups"]["names"]])[p_blob_sig]
@@ -217,7 +219,7 @@ def analyze_noncycling_cells(u_plates):
         for md in mindists:
             sc.pp.neighbors(adata, n_neighbors=nn, n_pcs=40)
             sc.tl.umap(adata, min_dist=md)
-            sc.pl.umap(adata, color="louvain", show=False, save=f"louvain_clusters_after_nn{nn}_md{md}.pdf")
+            sc.pl.umap(adata, color="original_louvain", show=False, save=f"louvain_clusters_after_nn{nn}_md{md}.pdf")
 
 def demonstrate_umap_cycle_without_ccd(adata):
     '''
diff --git a/SingleCellProteogenomics/RNAVelocity.py b/SingleCellProteogenomics/RNAVelocity.py
index dbbbd94..dc07ee7 100644
--- a/SingleCellProteogenomics/RNAVelocity.py
+++ b/SingleCellProteogenomics/RNAVelocity.py
@@ -15,8 +15,8 @@
 plt.rcParams['pdf.fonttype'], plt.rcParams['ps.fonttype'], plt.rcParams['savefig.dpi'] = 42, 42, 300 #Make PDF text readable
 plt.rcParams['figure.figsize'] = (10, 10)
 
-def analyze_rna_velocity(adata_withblob, mean_diff_from_rng):
-    adata_blobless = adata_withblob[adata_withblob.obs["louvain"] != "5",:]
+def analyze_rna_velocity(adata_withblob, mean_diff_from_rng, do_all_plotting):
+    adata_blobless = adata_withblob[adata_withblob.obs["original_louvain"] != "5",:]
     
     # Make the chosen cutoff UMAP
     cutoff=0.08
@@ -35,6 +35,9 @@ def analyze_rna_velocity(adata_withblob, mean_diff_from_rng):
         "y_umap" : [x[1] for x in adata_withCutoff.obsm['velocity_umap']]}).to_csv(
             f"figures/velocityStreamUmap{cutoff}CCD.tsv", index=False, sep="\t")
     
+    if not do_all_plotting:
+        return
+            
     # Profile different cutoffs for CCD and non-CCD, UMAP and tSNE to evaluate params and consistency
     for cutoff in (np.arange(20) + 1) / 100:
         adata_withCutoff = adata_blobless[:,mean_diff_from_rng <= cutoff]

From 446eb386bb55e12c99fe6f8692e58c5b6ffd3b9a Mon Sep 17 00:00:00 2001
From: acesnik <anthony.cesnik@gmail.com>
Date: Mon, 26 Jul 2021 16:30:46 +0200
Subject: [PATCH 2/2] make the quicker option an option

---
 2_ProteinFucciPsuedotime.py | 552 ++++++++++++++++++------------------
 3_RNAFucciPseudotime.py     | 348 ++++++++++++-----------
 2 files changed, 458 insertions(+), 442 deletions(-)

diff --git a/2_ProteinFucciPsuedotime.py b/2_ProteinFucciPsuedotime.py
index 7ab644f..dc3e99c 100644
--- a/2_ProteinFucciPsuedotime.py
+++ b/2_ProteinFucciPsuedotime.py
@@ -16,307 +16,315 @@
 import pandas as pd
 import numpy as np
 import scipy
+import argparse
 
-ALL_PLOTS_AND_ANALYSES = False
-
-# Make PDF text readable
-plt.rcParams["pdf.fonttype"] = 42
-plt.rcParams["ps.fonttype"] = 42
-plt.rcParams["savefig.dpi"] = 300
-
-#%% Read in the protein data
-import_dict = Loaders.load_protein_fucci_pseudotime()
-u_plate, well_plate, well_plate_imgnb, well_plate_imgnb_objnb, u_well_plates = (
-    import_dict["u_plate"],
-    import_dict["well_plate"],
-    import_dict["well_plate_imgnb"],
-    import_dict["well_plate_imgnb_objnb"],
-    import_dict["u_well_plates"],
-)
-ab_nuc, ab_cyto, ab_cell, mt_cell = (
-    import_dict["ab_nuc"],
-    import_dict["ab_cyto"],
-    import_dict["ab_cell"],
-    import_dict["mt_cell"],
-)
-area_cell, area_nuc = import_dict["area_cell"], import_dict["area_nuc"]
-wp_ensg, wp_ab = import_dict["wp_ensg"], import_dict["wp_ab"]
-green_fucci, red_fucci = import_dict["green_fucci"], import_dict["red_fucci"]
-log_green_fucci_zeroc, log_red_fucci_zeroc = (
-    import_dict["log_green_fucci_zeroc"],
-    import_dict["log_red_fucci_zeroc"],
-)
-log_green_fucci_zeroc_rescale, log_red_fucci_zeroc_rescale = (
-    import_dict["log_green_fucci_zeroc_rescale"],
-    import_dict["log_red_fucci_zeroc_rescale"],
-)
-wp_comp_kruskal_gaussccd_adj, wp_pass_kruskal_gaussccd_bh_comp = (
-    import_dict["wp_comp_kruskal_gaussccd_adj"],
-    import_dict["wp_pass_kruskal_gaussccd_bh_comp"],
-)
-fucci_data = import_dict["fucci_data"]
-wp_iscell, wp_isnuc, wp_iscyto = (
-    import_dict["wp_iscell"],
-    import_dict["wp_isnuc"],
-    import_dict["wp_iscyto"],
-)
-curr_wp_phases, mockbulk_phases = (
-    import_dict["curr_wp_phases"],
-    import_dict["mockbulk_phases"],
-)
-
-#%%
-# Idea: Calculate the polar coordinates and other stuff
-# Exec: Devin's calculations
-# Output: fucci plot with polar coordinates
-pseudotime_result = FucciPseudotime.pseudotime_protein(
-    fucci_data,
-    ab_nuc,
-    ab_cyto,
-    ab_cell,
-    mt_cell,
-    area_cell,
-    area_nuc,
-    well_plate,
-    well_plate_imgnb,
-    well_plate_imgnb_objnb,
-    log_red_fucci_zeroc_rescale,
-    log_green_fucci_zeroc_rescale,
-    mockbulk_phases,
-)
-pol_sort_well_plate, pol_sort_norm_rev, pol_sort_well_plate_imgnb, pol_sort_well_plate_imgnb_objnb, pol_sort_ab_nuc, pol_sort_ab_cyto, pol_sort_ab_cell, pol_sort_mt_cell, pol_sort_area_cell, pol_sort_area_nuc, pol_sort_fred, pol_sort_fgreen, pol_sort_mockbulk_phases = (
-    pseudotime_result
-)
-
-#%% Calculate measures of variance of protein abundance in single cells
-# Idea: Calculate measures of variance, and show them in plots
-# Execution: Now that we already have the data filtered for variability, this is just descriptive.
-# Output: scatters of antibody vs microtubule variances by different measures of variaibility
-
-# toggle for using log-transformed intensities
-# we decided to use natural intensities
-use_log = False
-calculate_variaton_result = ProteinVariability.calculate_variation(
-    use_log,
-    u_well_plates,
-    wp_iscell,
-    wp_isnuc,
-    wp_iscyto,
-    pol_sort_well_plate,
-    pol_sort_ab_cell,
-    pol_sort_ab_nuc,
-    pol_sort_ab_cyto,
-    pol_sort_mt_cell,
-    pol_sort_well_plate_imgnb,
-)
-mean_mean_comp, var_comp, gini_comp, cv_comp, var_cell, gini_cell, cv_cell, var_mt, gini_mt, cv_mt = (
-    calculate_variaton_result
-)
-
-# Compare variances for protein and microtubules, the internal control for each image
-if ALL_PLOTS_AND_ANALYSES:
-    removeThese = pd.read_csv("input/ProteinData/ReplicatesToRemove.txt", header=None)[0] # make these independent samples for one-sided Kruskal-Wallis tests
-    utils.general_boxplot(
-        (var_comp[~np.isin(u_well_plates, removeThese)], var_mt[~np.isin(u_well_plates, removeThese)]),
-        ("Protein", "Microtubules"),
-        "",
-        "Variance",
-        "",
-        False,
-        f"figures/ProteinMicrotubuleVariances.pdf",
+if __name__ == "__main__":
+    description = "Single cell proteogenomic analysis script -- protein cell cycle dependence"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument(
+        "--quicker", action="store_true", help="Skip some plotting and analyses, namely for HPA releases."
     )
-    utils.general_boxplot(
-        (cv_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)]),
-        ("Protein", "Microtubules"),
-        "",
-        "CV",
-        "",
-        False,
-        f"figures/ProteinMicrotubuleCVs.pdf",
+    args = parser.parse_args()
+    doAllPlotsAndAnalyses = not args.quicker
+    
+    # Make PDF text readable
+    plt.rcParams["pdf.fonttype"] = 42
+    plt.rcParams["ps.fonttype"] = 42
+    plt.rcParams["savefig.dpi"] = 300
+    
+    #%% Read in the protein data
+    import_dict = Loaders.load_protein_fucci_pseudotime()
+    u_plate, well_plate, well_plate_imgnb, well_plate_imgnb_objnb, u_well_plates = (
+        import_dict["u_plate"],
+        import_dict["well_plate"],
+        import_dict["well_plate_imgnb"],
+        import_dict["well_plate_imgnb_objnb"],
+        import_dict["u_well_plates"],
     )
-    utils.general_boxplot(
-        (gini_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)]),
-        ("Protein", "Microtubules"),
-        "",
-        "Gini",
-        "",
-        False,
-        f"figures/ProteinMicrotubuleGinis.pdf",
+    ab_nuc, ab_cyto, ab_cell, mt_cell = (
+        import_dict["ab_nuc"],
+        import_dict["ab_cyto"],
+        import_dict["ab_cell"],
+        import_dict["mt_cell"],
     )
-    p_varProt_varMt = 2*scipy.stats.kruskal(var_comp[~np.isin(u_well_plates, removeThese)], var_mt[~np.isin(u_well_plates, removeThese)])[1]
-    p_cvProt_cvMt = 2*scipy.stats.kruskal(cv_comp[~np.isin(u_well_plates, removeThese)], cv_mt[~np.isin(u_well_plates, removeThese)])[1]
-    p_giniProt_giniMt = 2*scipy.stats.kruskal(gini_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)])[1]
-    print(
-        f"{p_varProt_varMt}: p-value for difference between protein and microtubule variances"
+    area_cell, area_nuc = import_dict["area_cell"], import_dict["area_nuc"]
+    wp_ensg, wp_ab = import_dict["wp_ensg"], import_dict["wp_ab"]
+    green_fucci, red_fucci = import_dict["green_fucci"], import_dict["red_fucci"]
+    log_green_fucci_zeroc, log_red_fucci_zeroc = (
+        import_dict["log_green_fucci_zeroc"],
+        import_dict["log_red_fucci_zeroc"],
     )
-    print(
-        f"{p_cvProt_cvMt}: p-value for difference between protein and microtubule CVs"
+    log_green_fucci_zeroc_rescale, log_red_fucci_zeroc_rescale = (
+        import_dict["log_green_fucci_zeroc_rescale"],
+        import_dict["log_red_fucci_zeroc_rescale"],
     )
-    print(
-        f"{p_giniProt_giniMt}: p-value for difference between protein and microtubule Gini indices"
+    wp_comp_kruskal_gaussccd_adj, wp_pass_kruskal_gaussccd_bh_comp = (
+        import_dict["wp_comp_kruskal_gaussccd_adj"],
+        import_dict["wp_pass_kruskal_gaussccd_bh_comp"],
     )
-
-#%% Gaussian clustering to identify biomodal intensity distributions
-bimodal_results = ProteinBimodality.identify_bimodal_intensity_distributions(
-    u_well_plates,
-    wp_ensg,
-    pol_sort_well_plate,
-    pol_sort_norm_rev,
-    pol_sort_ab_cell,
-    pol_sort_ab_nuc,
-    pol_sort_ab_cyto,
-    pol_sort_mt_cell,
-    wp_iscell,
-    wp_isnuc,
-    wp_iscyto,
-    ALL_PLOTS_AND_ANALYSES
-)
-wp_isbimodal_fcpadj_pass, wp_bimodal_cluster_idxs, wp_isbimodal_generally, wp_bimodal_fcmaxmin = (
-    bimodal_results
-)
-
-#%% Determine cell cycle dependence for each protein
-use_log_ccd = False
-do_remove_outliers = True
-alphaa = 0.05
-
-# Determine cell cycle dependence for proteins
-ccd_results = ProteinCellCycleDependence.cell_cycle_dependence_protein(
-    u_well_plates,
-    wp_ensg,
-    wp_ab,
-    use_log_ccd,
-    do_remove_outliers,
-    pol_sort_well_plate,
-    pol_sort_norm_rev,
-    pol_sort_ab_cell,
-    pol_sort_ab_nuc,
-    pol_sort_ab_cyto,
-    pol_sort_mt_cell,
-    pol_sort_fred,
-    pol_sort_fgreen,
-    pol_sort_mockbulk_phases,
-    pol_sort_area_cell,
-    pol_sort_area_nuc,
-    pol_sort_well_plate_imgnb,
-    wp_iscell,
-    wp_isnuc,
-    wp_iscyto,
-    wp_isbimodal_fcpadj_pass,
-    wp_bimodal_cluster_idxs,
-    wp_comp_kruskal_gaussccd_adj,
-    ALL_PLOTS_AND_ANALYSES
-)
-wp_comp_ccd_difffromrng, mean_diff_from_rng_mt, wp_comp_ccd_clust1, wp_comp_ccd_clust2, wp_ccd_unibimodal, wp_comp_ccd_gauss, perc_var_comp, mean_diff_from_rng, wp_comp_eq_percvar_adj, mean_diff_from_rng_clust1, wp_comp_eq_percvar_adj_clust1, mean_diff_from_rng_clust2, wp_comp_eq_percvar_adj_clust2, mvavgs_x, mvavgs_comp, curr_pols, curr_ab_norms, mvperc_comps, curr_freds, curr_fgreens, curr_mockbulk_phases, curr_area_cell, curr_ab_nuc, curr_well_plate_imgnb, folder = (
-    ccd_results
-)
-
-# Move the temporal average plots to more informative places
-if ALL_PLOTS_AND_ANALYSES:
-    ProteinCellCycleDependence.copy_mvavg_plots_protein(
-        folder,
-        wp_ensg,
-        wp_comp_ccd_difffromrng,
-        wp_isbimodal_fcpadj_pass,
-        wp_comp_ccd_clust1,
-        wp_comp_ccd_clust2,
-        wp_ccd_unibimodal,
-        wp_comp_ccd_gauss,
+    fucci_data = import_dict["fucci_data"]
+    wp_iscell, wp_isnuc, wp_iscyto = (
+        import_dict["wp_iscell"],
+        import_dict["wp_isnuc"],
+        import_dict["wp_iscyto"],
+    )
+    curr_wp_phases, mockbulk_phases = (
+        import_dict["curr_wp_phases"],
+        import_dict["mockbulk_phases"],
+    )
+    
+    #%%
+    # Idea: Calculate the polar coordinates and other stuff
+    # Exec: Devin's calculations
+    # Output: fucci plot with polar coordinates
+    pseudotime_result = FucciPseudotime.pseudotime_protein(
+        fucci_data,
+        ab_nuc,
+        ab_cyto,
+        ab_cell,
+        mt_cell,
+        area_cell,
+        area_nuc,
+        well_plate,
+        well_plate_imgnb,
+        well_plate_imgnb_objnb,
+        log_red_fucci_zeroc_rescale,
+        log_green_fucci_zeroc_rescale,
+        mockbulk_phases,
+    )
+    pol_sort_well_plate, pol_sort_norm_rev, pol_sort_well_plate_imgnb, pol_sort_well_plate_imgnb_objnb, pol_sort_ab_nuc, pol_sort_ab_cyto, pol_sort_ab_cell, pol_sort_mt_cell, pol_sort_area_cell, pol_sort_area_nuc, pol_sort_fred, pol_sort_fgreen, pol_sort_mockbulk_phases = (
+        pseudotime_result
     )
-    ProteinCellCycleDependence.global_plots_protein(
-        alphaa,
+    
+    #%% Calculate measures of variance of protein abundance in single cells
+    # Idea: Calculate measures of variance, and show them in plots
+    # Execution: Now that we already have the data filtered for variability, this is just descriptive.
+    # Output: scatters of antibody vs microtubule variances by different measures of variaibility
+    
+    # toggle for using log-transformed intensities
+    # we decided to use natural intensities
+    use_log = False
+    calculate_variaton_result = ProteinVariability.calculate_variation(
+        use_log,
         u_well_plates,
-        wp_ccd_unibimodal,
-        perc_var_comp,
-        mean_mean_comp,
-        gini_comp,
-        cv_comp,
-        mean_diff_from_rng,
-        wp_comp_eq_percvar_adj,
-        wp_comp_kruskal_gaussccd_adj,
+        wp_iscell,
+        wp_isnuc,
+        wp_iscyto,
+        pol_sort_well_plate,
+        pol_sort_ab_cell,
+        pol_sort_ab_nuc,
+        pol_sort_ab_cyto,
+        pol_sort_mt_cell,
+        pol_sort_well_plate_imgnb,
     )
-
-# Analyze the CCD results and save them
-ccd_comp, nonccd_comp, bioccd = ProteinCellCycleDependence.analyze_ccd_variation_protein(
-    folder,
-    u_well_plates,
-    wp_ensg,
-    wp_ab,
-    wp_iscell,
-    wp_isnuc,
-    wp_iscyto,
-    wp_comp_ccd_difffromrng,
-    wp_comp_ccd_clust1,
-    wp_comp_ccd_clust2,
-    var_comp,
-    gini_comp,
-    perc_var_comp,
-    mean_diff_from_rng,
-    wp_comp_kruskal_gaussccd_adj,
-    wp_comp_eq_percvar_adj,
-    mean_diff_from_rng_clust1,
-    wp_comp_eq_percvar_adj_clust1,
-    mean_diff_from_rng_clust2,
-    wp_comp_eq_percvar_adj_clust2,
-    wp_isbimodal_fcpadj_pass,
-    wp_isbimodal_generally,
-    wp_ccd_unibimodal,
-    wp_bimodal_fcmaxmin,
-    wp_comp_ccd_gauss,
-)
-
-# Make a dataframe for plotting on the HPA website
-ProteinCellCycleDependence.make_plotting_dataframe(
-    wp_ensg,
-    wp_ab,
-    u_well_plates,
-    wp_iscell,
-    wp_iscyto,
-    wp_isnuc,
-    ccd_comp,
-    bioccd,
-    curr_pols,
-    curr_ab_norms,
-    curr_freds,
-    curr_fgreens,
-    curr_mockbulk_phases,
-    mvavgs_x,
-    mvavgs_comp,
-    mvperc_comps,
-    gini_comp,
-    perc_var_comp,
-)
-
-# Perform comparison to LASSO for finding CCD proteins
-if ALL_PLOTS_AND_ANALYSES:
-    ProteinCellCycleDependence.compare_to_lasso_analysis(
+    mean_mean_comp, var_comp, gini_comp, cv_comp, var_cell, gini_cell, cv_cell, var_mt, gini_mt, cv_mt = (
+        calculate_variaton_result
+    )
+    
+    # Compare variances for protein and microtubules, the internal control for each image
+    if doAllPlotsAndAnalyses:
+        removeThese = pd.read_csv("input/ProteinData/ReplicatesToRemove.txt", header=None)[0] # make these independent samples for one-sided Kruskal-Wallis tests
+        utils.general_boxplot(
+            (var_comp[~np.isin(u_well_plates, removeThese)], var_mt[~np.isin(u_well_plates, removeThese)]),
+            ("Protein", "Microtubules"),
+            "",
+            "Variance",
+            "",
+            False,
+            f"figures/ProteinMicrotubuleVariances.pdf",
+        )
+        utils.general_boxplot(
+            (cv_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)]),
+            ("Protein", "Microtubules"),
+            "",
+            "CV",
+            "",
+            False,
+            f"figures/ProteinMicrotubuleCVs.pdf",
+        )
+        utils.general_boxplot(
+            (gini_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)]),
+            ("Protein", "Microtubules"),
+            "",
+            "Gini",
+            "",
+            False,
+            f"figures/ProteinMicrotubuleGinis.pdf",
+        )
+        p_varProt_varMt = 2*scipy.stats.kruskal(var_comp[~np.isin(u_well_plates, removeThese)], var_mt[~np.isin(u_well_plates, removeThese)])[1]
+        p_cvProt_cvMt = 2*scipy.stats.kruskal(cv_comp[~np.isin(u_well_plates, removeThese)], cv_mt[~np.isin(u_well_plates, removeThese)])[1]
+        p_giniProt_giniMt = 2*scipy.stats.kruskal(gini_comp[~np.isin(u_well_plates, removeThese)], gini_mt[~np.isin(u_well_plates, removeThese)])[1]
+        print(
+            f"{p_varProt_varMt}: p-value for difference between protein and microtubule variances"
+        )
+        print(
+            f"{p_cvProt_cvMt}: p-value for difference between protein and microtubule CVs"
+        )
+        print(
+            f"{p_giniProt_giniMt}: p-value for difference between protein and microtubule Gini indices"
+        )
+    
+    #%% Gaussian clustering to identify biomodal intensity distributions
+    bimodal_results = ProteinBimodality.identify_bimodal_intensity_distributions(
         u_well_plates,
-        pol_sort_norm_rev,
+        wp_ensg,
         pol_sort_well_plate,
+        pol_sort_norm_rev,
         pol_sort_ab_cell,
         pol_sort_ab_nuc,
         pol_sort_ab_cyto,
         pol_sort_mt_cell,
-        pol_sort_fred,
-        pol_sort_fgreen,
         wp_iscell,
         wp_isnuc,
         wp_iscyto,
-        wp_ensg,
-        ccd_comp,
+        doAllPlotsAndAnalyses
     )
+    wp_isbimodal_fcpadj_pass, wp_bimodal_cluster_idxs, wp_isbimodal_generally, wp_bimodal_fcmaxmin = (
+        bimodal_results
+    )
+    
+    #%% Determine cell cycle dependence for each protein
+    use_log_ccd = False
+    do_remove_outliers = True
+    alphaa = 0.05
     
-    # Generate UMAPs to illustrate cutoffs and stability
-    ProteinCellCycleDependence.generate_protein_umaps(
+    # Determine cell cycle dependence for proteins
+    ccd_results = ProteinCellCycleDependence.cell_cycle_dependence_protein(
         u_well_plates,
-        pol_sort_norm_rev,
+        wp_ensg,
+        wp_ab,
+        use_log_ccd,
+        do_remove_outliers,
         pol_sort_well_plate,
+        pol_sort_norm_rev,
         pol_sort_ab_cell,
         pol_sort_ab_nuc,
         pol_sort_ab_cyto,
         pol_sort_mt_cell,
+        pol_sort_fred,
+        pol_sort_fgreen,
+        pol_sort_mockbulk_phases,
+        pol_sort_area_cell,
+        pol_sort_area_nuc,
+        pol_sort_well_plate_imgnb,
+        wp_iscell,
+        wp_isnuc,
+        wp_iscyto,
+        wp_isbimodal_fcpadj_pass,
+        wp_bimodal_cluster_idxs,
+        wp_comp_kruskal_gaussccd_adj,
+        doAllPlotsAndAnalyses
+    )
+    wp_comp_ccd_difffromrng, mean_diff_from_rng_mt, wp_comp_ccd_clust1, wp_comp_ccd_clust2, wp_ccd_unibimodal, wp_comp_ccd_gauss, perc_var_comp, mean_diff_from_rng, wp_comp_eq_percvar_adj, mean_diff_from_rng_clust1, wp_comp_eq_percvar_adj_clust1, mean_diff_from_rng_clust2, wp_comp_eq_percvar_adj_clust2, mvavgs_x, mvavgs_comp, curr_pols, curr_ab_norms, mvperc_comps, curr_freds, curr_fgreens, curr_mockbulk_phases, curr_area_cell, curr_ab_nuc, curr_well_plate_imgnb, folder = (
+        ccd_results
+    )
+    
+    # Move the temporal average plots to more informative places
+    if doAllPlotsAndAnalyses:
+        ProteinCellCycleDependence.copy_mvavg_plots_protein(
+            folder,
+            wp_ensg,
+            wp_comp_ccd_difffromrng,
+            wp_isbimodal_fcpadj_pass,
+            wp_comp_ccd_clust1,
+            wp_comp_ccd_clust2,
+            wp_ccd_unibimodal,
+            wp_comp_ccd_gauss,
+        )
+        ProteinCellCycleDependence.global_plots_protein(
+            alphaa,
+            u_well_plates,
+            wp_ccd_unibimodal,
+            perc_var_comp,
+            mean_mean_comp,
+            gini_comp,
+            cv_comp,
+            mean_diff_from_rng,
+            wp_comp_eq_percvar_adj,
+            wp_comp_kruskal_gaussccd_adj,
+        )
+    
+    # Analyze the CCD results and save them
+    ccd_comp, nonccd_comp, bioccd = ProteinCellCycleDependence.analyze_ccd_variation_protein(
+        folder,
+        u_well_plates,
+        wp_ensg,
+        wp_ab,
         wp_iscell,
         wp_isnuc,
         wp_iscyto,
+        wp_comp_ccd_difffromrng,
+        wp_comp_ccd_clust1,
+        wp_comp_ccd_clust2,
+        var_comp,
+        gini_comp,
+        perc_var_comp,
         mean_diff_from_rng,
+        wp_comp_kruskal_gaussccd_adj,
+        wp_comp_eq_percvar_adj,
+        mean_diff_from_rng_clust1,
+        wp_comp_eq_percvar_adj_clust1,
+        mean_diff_from_rng_clust2,
+        wp_comp_eq_percvar_adj_clust2,
+        wp_isbimodal_fcpadj_pass,
+        wp_isbimodal_generally,
+        wp_ccd_unibimodal,
+        wp_bimodal_fcmaxmin,
+        wp_comp_ccd_gauss,
     )
+    
+    # Make a dataframe for plotting on the HPA website
+    ProteinCellCycleDependence.make_plotting_dataframe(
+        wp_ensg,
+        wp_ab,
+        u_well_plates,
+        wp_iscell,
+        wp_iscyto,
+        wp_isnuc,
+        ccd_comp,
+        bioccd,
+        curr_pols,
+        curr_ab_norms,
+        curr_freds,
+        curr_fgreens,
+        curr_mockbulk_phases,
+        mvavgs_x,
+        mvavgs_comp,
+        mvperc_comps,
+        gini_comp,
+        perc_var_comp,
+    )
+    
+    # Perform comparison to LASSO for finding CCD proteins
+    if doAllPlotsAndAnalyses:
+        ProteinCellCycleDependence.compare_to_lasso_analysis(
+            u_well_plates,
+            pol_sort_norm_rev,
+            pol_sort_well_plate,
+            pol_sort_ab_cell,
+            pol_sort_ab_nuc,
+            pol_sort_ab_cyto,
+            pol_sort_mt_cell,
+            pol_sort_fred,
+            pol_sort_fgreen,
+            wp_iscell,
+            wp_isnuc,
+            wp_iscyto,
+            wp_ensg,
+            ccd_comp,
+        )
+        
+        # Generate UMAPs to illustrate cutoffs and stability
+        ProteinCellCycleDependence.generate_protein_umaps(
+            u_well_plates,
+            pol_sort_norm_rev,
+            pol_sort_well_plate,
+            pol_sort_ab_cell,
+            pol_sort_ab_nuc,
+            pol_sort_ab_cyto,
+            pol_sort_mt_cell,
+            wp_iscell,
+            wp_isnuc,
+            wp_iscyto,
+            mean_diff_from_rng,
+        )
diff --git a/3_RNAFucciPseudotime.py b/3_RNAFucciPseudotime.py
index c91d3b9..c1e1cfe 100644
--- a/3_RNAFucciPseudotime.py
+++ b/3_RNAFucciPseudotime.py
@@ -18,189 +18,197 @@
 import matplotlib.pyplot as plt
 import shutil
 import os
+import argparse
 
-ALL_PLOTS_AND_ANALYSES = False
-
-# Make PDF text readable
-plt.rcParams["pdf.fonttype"] = 42
-plt.rcParams["ps.fonttype"] = 42
-plt.rcParams["savefig.dpi"] = 300
-
-bioccd = np.genfromtxt(
-    "input/ProteinData/BiologicallyDefinedCCD.txt", dtype="str"
-)  # from mitotic structures
-wp_ensg = np.load("output/pickles/wp_ensg.npy", allow_pickle=True)
-ccd_comp = np.load("output/pickles/ccd_comp.npy", allow_pickle=True)
-nonccd_comp = np.load("output/pickles/nonccd_comp.npy", allow_pickle=True)
-u_plates = ["355","356","357"]
-
-#%% Convert FACS intensities for FUCCI markers to pseudotime using the same polar coordinate methods as for protein
-# Idea: Use the polar coordinate pseudotime calculations to calculate the pseudotime for each cell
-# Execution: Adapt Devin's code for the cells sorted for RNA-Seq
-# Output: Make log-log fucci intensity plots for the cells analyzed by RNA-Seq; Plot of all fucci pseudotimes; table of pseudotimes for each cell
-adata, phases_filt = RNADataPreparation.read_counts_and_phases(
-    "Counts", False, "protein_coding", u_plates
-)
-adata = RNADataPreparation.zero_center_fucci(adata)
-FucciPseudotime.pseudotime_rna(adata)
-
-#%% Single cell RNA-Seq data preparation and general analysis
-if ALL_PLOTS_AND_ANALYSES:
-    RNADataPreparation.general_plots(u_plates)
-    RNADataPreparation.analyze_noncycling_cells(u_plates)
+if __name__ == "__main__":
+    description = "Single cell proteogenomic analysis script -- protein cell cycle dependence"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument(
+        "--quicker", action="store_true", help="Skip some plotting and analyses, namely for HPA releases."
+    )
+    args = parser.parse_args()
+    doAllPlotsAndAnalyses = not args.quicker
+    
+    # Make PDF text readable
+    plt.rcParams["pdf.fonttype"] = 42
+    plt.rcParams["ps.fonttype"] = 42
+    plt.rcParams["savefig.dpi"] = 300
+    
+    bioccd = np.genfromtxt(
+        "input/ProteinData/BiologicallyDefinedCCD.txt", dtype="str"
+    )  # from mitotic structures
+    wp_ensg = np.load("output/pickles/wp_ensg.npy", allow_pickle=True)
+    ccd_comp = np.load("output/pickles/ccd_comp.npy", allow_pickle=True)
+    nonccd_comp = np.load("output/pickles/nonccd_comp.npy", allow_pickle=True)
+    u_plates = ["355","356","357"]
+    
+    #%% Convert FACS intensities for FUCCI markers to pseudotime using the same polar coordinate methods as for protein
+    # Idea: Use the polar coordinate pseudotime calculations to calculate the pseudotime for each cell
+    # Execution: Adapt Devin's code for the cells sorted for RNA-Seq
+    # Output: Make log-log fucci intensity plots for the cells analyzed by RNA-Seq; Plot of all fucci pseudotimes; table of pseudotimes for each cell
+    adata, phases_filt = RNADataPreparation.read_counts_and_phases(
+        "Counts", False, "protein_coding", u_plates
+    )
+    adata = RNADataPreparation.zero_center_fucci(adata)
+    FucciPseudotime.pseudotime_rna(adata)
+    
+    #%% Single cell RNA-Seq data preparation and general analysis
+    if doAllPlotsAndAnalyses:
+        RNADataPreparation.general_plots(u_plates)
+        RNADataPreparation.analyze_noncycling_cells(u_plates)
+        adata, phasesfilt = RNADataPreparation.qc_filtering(
+            adata, do_log_normalize=True, do_remove_blob=False
+        )
+        adata = RNADataPreparation.zero_center_fucci(adata)
+        RNADataPreparation.plot_pca_for_batch_effect_analysis(adata, "BeforeRemovingNoncycling")
+    
+    #%% Idea: Similar to mock-bulk analysis for proteins, we can evaluate each gene bundled by phase across cells
+    # Execution: Make boxplots of RNA expression by phase
+    # Output: boxplots for each gene
+    valuetype, use_spikeins, biotype_to_use = "Tpms", False, "protein_coding"
+    adata, phases = RNADataPreparation.read_counts_and_phases(
+        valuetype, use_spikeins, biotype_to_use, u_plates
+    )
     adata, phasesfilt = RNADataPreparation.qc_filtering(
-        adata, do_log_normalize=True, do_remove_blob=False
+        adata, do_log_normalize=True, do_remove_blob=True
     )
     adata = RNADataPreparation.zero_center_fucci(adata)
-    RNADataPreparation.plot_pca_for_batch_effect_analysis(adata, "BeforeRemovingNoncycling")
-
-#%% Idea: Similar to mock-bulk analysis for proteins, we can evaluate each gene bundled by phase across cells
-# Execution: Make boxplots of RNA expression by phase
-# Output: boxplots for each gene
-valuetype, use_spikeins, biotype_to_use = "Tpms", False, "protein_coding"
-adata, phases = RNADataPreparation.read_counts_and_phases(
-    valuetype, use_spikeins, biotype_to_use, u_plates
-)
-adata, phasesfilt = RNADataPreparation.qc_filtering(
-    adata, do_log_normalize=True, do_remove_blob=True
-)
-adata = RNADataPreparation.zero_center_fucci(adata)
-
-if ALL_PLOTS_AND_ANALYSES:
-    RNADataPreparation.plot_markers_vs_reads(adata)
-    RNADataPreparation.plot_pca_for_batch_effect_analysis(adata, "AfterRemovingNoncycling")
-    g1 = adata.obs["phase"] == "G1"
-    s = adata.obs["phase"] == "S-ph"
-    g2 = adata.obs["phase"] == "G2M"
-    do_make_boxplots = False
-    if do_make_boxplots:
-        for iii, ensg in enumerate(adata.var_names):
-            maxtpm = np.max(
-                np.concatenate((adata.X[g1, iii], adata.X[s, iii], adata.X[g2, iii]))
-            )
-            RNACellCycleDependence.boxplot_result(
-                adata.X[g1, iii] / maxtpm,
-                adata.X[s, iii] / maxtpm,
-                adata.X[g2, iii] / maxtpm,
-                "figures/RNABoxplotByPhase",
-                ensg,
-            )
-
-#%% Idea: Display general RNA expression patterns in single cells using UMAP dimensionality reduction, and display with FUCCI pseudotime overlayed
-if ALL_PLOTS_AND_ANALYSES:
-    FucciPseudotime.pseudotime_umap(adata)  # Generate a UMAP with the pseudotime overlayed
-
-    # We can also show that the cycle pattern remains when the curated CCD genes or CCD proteins are removed,
-    # demonstrating that there's still valuable information about cell cycling beyond what was called CCD
-    RNADataPreparation.demonstrate_umap_cycle_without_ccd(adata)
-
-# Read in the curated CCD genes / CCD proteins from the present work / Non-CCD genes from the present work; filter for genes that weren't filtered in QC of RNA-Seq
-ccd_regev_filtered, ccd_filtered, nonccd_filtered = utils.ccd_gene_lists(adata)
-adata_ccdprotein, adata_nonccdprotein, adata_regevccdgenes = RNADataPreparation.is_ccd(
-    adata, wp_ensg, ccd_comp, nonccd_comp, bioccd, ccd_regev_filtered
-)
-
-# Generate plots with expression of genes overlayed
-expression_data = adata.X
-normalized_exp_data = (expression_data.T / np.max(expression_data, axis=0)[:, None]).T
-
-# Log-log FUCCI plot with RNA expression overlayed
-if ALL_PLOTS_AND_ANALYSES:
-    RNACellCycleDependence.plot_expression_facs(
-        adata,
-        wp_ensg[np.isin(wp_ensg, adata.var_names)],
-        normalized_exp_data,
-        "figures/GeneExpressionFucci",
+    
+    if doAllPlotsAndAnalyses:
+        RNADataPreparation.plot_markers_vs_reads(adata)
+        RNADataPreparation.plot_pca_for_batch_effect_analysis(adata, "AfterRemovingNoncycling")
+        g1 = adata.obs["phase"] == "G1"
+        s = adata.obs["phase"] == "S-ph"
+        g2 = adata.obs["phase"] == "G2M"
+        do_make_boxplots = False
+        if do_make_boxplots:
+            for iii, ensg in enumerate(adata.var_names):
+                maxtpm = np.max(
+                    np.concatenate((adata.X[g1, iii], adata.X[s, iii], adata.X[g2, iii]))
+                )
+                RNACellCycleDependence.boxplot_result(
+                    adata.X[g1, iii] / maxtpm,
+                    adata.X[s, iii] / maxtpm,
+                    adata.X[g2, iii] / maxtpm,
+                    "figures/RNABoxplotByPhase",
+                    ensg,
+                )
+    
+    #%% Idea: Display general RNA expression patterns in single cells using UMAP dimensionality reduction, and display with FUCCI pseudotime overlayed
+    if doAllPlotsAndAnalyses:
+        FucciPseudotime.pseudotime_umap(adata)  # Generate a UMAP with the pseudotime overlayed
+    
+        # We can also show that the cycle pattern remains when the curated CCD genes or CCD proteins are removed,
+        # demonstrating that there's still valuable information about cell cycling beyond what was called CCD
+        RNADataPreparation.demonstrate_umap_cycle_without_ccd(adata)
+    
+    # Read in the curated CCD genes / CCD proteins from the present work / Non-CCD genes from the present work; filter for genes that weren't filtered in QC of RNA-Seq
+    ccd_regev_filtered, ccd_filtered, nonccd_filtered = utils.ccd_gene_lists(adata)
+    adata_ccdprotein, adata_nonccdprotein, adata_regevccdgenes = RNADataPreparation.is_ccd(
+        adata, wp_ensg, ccd_comp, nonccd_comp, bioccd, ccd_regev_filtered
     )
-
-# Cluster the expression into phases and analyze it that way
-bulk_phase_tests = RNACellCycleDependence.analyze_ccd_variation_by_phase_rna(
-    adata, normalized_exp_data, biotype_to_use
-)
-# RNACellCycleDependence.plot_expression_boxplots(adata, wp_ensg[np.isin(wp_ensg, adata.var_names)], bulk_phase_tests, "figures/GeneExpressionBoxplots")
-# RNACellCycleDependence.plot_expression_umap(adata, wp_ensg[np.isin(wp_ensg, adata.var_names)], "figures/GeneExpressionUmap")
-
-#%% Moving average calculations and randomization analysis for RNA
-rna_ccd_analysis_results = RNACellCycleDependence.analyze_ccd_variation_by_mvavg_rna(
-    adata,
-    wp_ensg,
-    ccd_comp,
-    bioccd,
-    adata_nonccdprotein,
-    adata_regevccdgenes,
-    biotype_to_use,
-    make_mvavg_plots_isoforms=ALL_PLOTS_AND_ANALYSES,
-)
-percent_ccd_variance, total_gini, mean_diff_from_rng, pass_meandiff, eq_percvar_adj, fucci_time_inds, norm_exp_sort, moving_averages, mvavg_xvals, perms, ccdtranscript, ccdprotein, mvpercs = (
-    rna_ccd_analysis_results
-)
-
-RNACellCycleDependence.make_plotting_dataframe(
-    adata,
-    ccdtranscript,
-    wp_ensg,
-    bioccd,
-    norm_exp_sort[np.argsort(fucci_time_inds), :],
-    mvavg_xvals,
-    moving_averages,
-    mvpercs,
-)
-    
-if ALL_PLOTS_AND_ANALYSES:
-    RNACellCycleDependence.plot_umap_ccd_cutoffs(adata, mean_diff_from_rng)
-    RNACellCycleDependence.figures_ccd_analysis_rna(
+    
+    # Generate plots with expression of genes overlayed
+    expression_data = adata.X
+    normalized_exp_data = (expression_data.T / np.max(expression_data, axis=0)[:, None]).T
+    
+    # Log-log FUCCI plot with RNA expression overlayed
+    if doAllPlotsAndAnalyses:
+        RNACellCycleDependence.plot_expression_facs(
+            adata,
+            wp_ensg[np.isin(wp_ensg, adata.var_names)],
+            normalized_exp_data,
+            "figures/GeneExpressionFucci",
+        )
+    
+    # Cluster the expression into phases and analyze it that way
+    bulk_phase_tests = RNACellCycleDependence.analyze_ccd_variation_by_phase_rna(
+        adata, normalized_exp_data, biotype_to_use
+    )
+    # RNACellCycleDependence.plot_expression_boxplots(adata, wp_ensg[np.isin(wp_ensg, adata.var_names)], bulk_phase_tests, "figures/GeneExpressionBoxplots")
+    # RNACellCycleDependence.plot_expression_umap(adata, wp_ensg[np.isin(wp_ensg, adata.var_names)], "figures/GeneExpressionUmap")
+    
+    #%% Moving average calculations and randomization analysis for RNA
+    rna_ccd_analysis_results = RNACellCycleDependence.analyze_ccd_variation_by_mvavg_rna(
         adata,
-        percent_ccd_variance,
-        mean_diff_from_rng,
-        pass_meandiff,
-        eq_percvar_adj,
         wp_ensg,
         ccd_comp,
-        ccd_regev_filtered,
-    )
-    RNACellCycleDependence.plot_overall_and_ccd_variances(
-        adata,
-        biotype_to_use,
-        total_gini,
-        percent_ccd_variance,
-        pass_meandiff,
-        adata_ccdprotein,
+        bioccd,
         adata_nonccdprotein,
         adata_regevccdgenes,
+        biotype_to_use,
+        make_mvavg_plots_isoforms=doAllPlotsAndAnalyses,
     )
-    RNACellCycleDependence.compare_to_lasso_analysis(adata, ccdtranscript)
-    RNACellCycleDependence.analyze_cnv_calls(adata, ccdtranscript)
-
-#%% Moving average calculations and randomization analysis for the spike-in internal controls
-if ALL_PLOTS_AND_ANALYSES:
-    adata_spikeins, phases_spikeins = RNADataPreparation.read_counts_and_phases(
-        valuetype, use_spike_ins=True, biotype_to_use="", u_plates=u_plates
+    percent_ccd_variance, total_gini, mean_diff_from_rng, pass_meandiff, eq_percvar_adj, fucci_time_inds, norm_exp_sort, moving_averages, mvavg_xvals, perms, ccdtranscript, ccdprotein, mvpercs = (
+        rna_ccd_analysis_results
     )
-    sc.pp.filter_genes(adata_spikeins, min_cells=100)
-    print(f"data shape after filtering: {adata_spikeins.X.shape}")
     
-    RNACellCycleDependence.ccd_analysis_of_spikeins(adata_spikeins, perms)
-
-#%% Analyze RNA velocity
-valuetype, use_spikeins, biotype_to_use = "Tpms", False, "protein_coding"
-adata, phases = RNADataPreparation.read_counts_and_phases(
-    valuetype, use_spikeins, biotype_to_use, u_plates, load_velocities=True
-)
-adata_blobless, phasesfilt = RNADataPreparation.qc_filtering(
-    adata, do_log_normalize=True, do_remove_blob=False
-)
-RNAVelocity.analyze_rna_velocity(adata_blobless, mean_diff_from_rng, ALL_PLOTS_AND_ANALYSES)
-
-#%% Analyze isoforms
-if ALL_PLOTS_AND_ANALYSES:
-    adata_isoform, ccdtranscript_isoform = RNACellCycleDependence.analyze_isoforms(
-        adata, ccdtranscript, wp_ensg, ccd_comp, nonccd_comp, u_plates, make_mvavg_plots_isoforms=ALL_PLOTS_AND_ANALYSES
-    )
-    RNACellCycleDependence.compare_genes_to_isoforms(
+    RNACellCycleDependence.make_plotting_dataframe(
         adata,
-        ccdprotein,
         ccdtranscript,
-        adata_nonccdprotein,
-        adata_isoform,
-        ccdtranscript_isoform,
+        wp_ensg,
+        bioccd,
+        norm_exp_sort[np.argsort(fucci_time_inds), :],
+        mvavg_xvals,
+        moving_averages,
+        mvpercs,
     )
+        
+    if doAllPlotsAndAnalyses:
+        RNACellCycleDependence.plot_umap_ccd_cutoffs(adata, mean_diff_from_rng)
+        RNACellCycleDependence.figures_ccd_analysis_rna(
+            adata,
+            percent_ccd_variance,
+            mean_diff_from_rng,
+            pass_meandiff,
+            eq_percvar_adj,
+            wp_ensg,
+            ccd_comp,
+            ccd_regev_filtered,
+        )
+        RNACellCycleDependence.plot_overall_and_ccd_variances(
+            adata,
+            biotype_to_use,
+            total_gini,
+            percent_ccd_variance,
+            pass_meandiff,
+            adata_ccdprotein,
+            adata_nonccdprotein,
+            adata_regevccdgenes,
+        )
+        RNACellCycleDependence.compare_to_lasso_analysis(adata, ccdtranscript)
+        RNACellCycleDependence.analyze_cnv_calls(adata, ccdtranscript)
+    
+    #%% Moving average calculations and randomization analysis for the spike-in internal controls
+    if doAllPlotsAndAnalyses:
+        adata_spikeins, phases_spikeins = RNADataPreparation.read_counts_and_phases(
+            valuetype, use_spike_ins=True, biotype_to_use="", u_plates=u_plates
+        )
+        sc.pp.filter_genes(adata_spikeins, min_cells=100)
+        print(f"data shape after filtering: {adata_spikeins.X.shape}")
+        
+        RNACellCycleDependence.ccd_analysis_of_spikeins(adata_spikeins, perms)
+    
+    #%% Analyze RNA velocity
+    valuetype, use_spikeins, biotype_to_use = "Tpms", False, "protein_coding"
+    adata, phases = RNADataPreparation.read_counts_and_phases(
+        valuetype, use_spikeins, biotype_to_use, u_plates, load_velocities=True
+    )
+    adata_blobless, phasesfilt = RNADataPreparation.qc_filtering(
+        adata, do_log_normalize=True, do_remove_blob=False
+    )
+    RNAVelocity.analyze_rna_velocity(adata_blobless, mean_diff_from_rng, doAllPlotsAndAnalyses)
+    
+    #%% Analyze isoforms
+    if doAllPlotsAndAnalyses:
+        adata_isoform, ccdtranscript_isoform = RNACellCycleDependence.analyze_isoforms(
+            adata, ccdtranscript, wp_ensg, ccd_comp, nonccd_comp, u_plates, make_mvavg_plots_isoforms=doAllPlotsAndAnalyses
+        )
+        RNACellCycleDependence.compare_genes_to_isoforms(
+            adata,
+            ccdprotein,
+            ccdtranscript,
+            adata_nonccdprotein,
+            adata_isoform,
+            ccdtranscript_isoform,
+        )