From 234c224f1b40e06fe398843d55598cfaa5cd8721 Mon Sep 17 00:00:00 2001 From: Avanti Shrikumar Date: Tue, 2 Feb 2021 20:50:20 -0800 Subject: [PATCH 1/2] fix pointed out by BeyondTheProof, other print statement, reverting default sliding window size to be 21 like before --- modisco.egg-info/PKG-INFO | 2 +- modisco/coordproducers.py | 2 ++ modisco/tfmodisco_workflow/workflow.py | 2 +- modisco/util.py | 2 +- setup.py | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modisco.egg-info/PKG-INFO b/modisco.egg-info/PKG-INFO index ed2092c..bac0df7 100644 --- a/modisco.egg-info/PKG-INFO +++ b/modisco.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: modisco -Version: 0.5.10.0 +Version: 0.5.10.2 Summary: TF MOtif Discovery from Importance SCOres Home-page: https://github.com/kundajelab/tfmodisco License: UNKNOWN diff --git a/modisco/coordproducers.py b/modisco/coordproducers.py index 3d2ab0b..28ad234 100644 --- a/modisco/coordproducers.py +++ b/modisco/coordproducers.py @@ -797,6 +797,8 @@ def refine_thresholds_based_on_frac_passing( a=np.abs(vals), q=100*(1-max_passing_windows_frac)) neg_threshold = -pos_threshold + if (verbose): + print("New thresholds are",pos_threshold,"and",neg_threshold) return pos_threshold, neg_threshold diff --git a/modisco/tfmodisco_workflow/workflow.py b/modisco/tfmodisco_workflow/workflow.py index 5af68d4..98f2ce7 100644 --- a/modisco/tfmodisco_workflow/workflow.py +++ b/modisco/tfmodisco_workflow/workflow.py @@ -166,7 +166,7 @@ class TfModiscoWorkflow(object): def __init__(self, seqlets_to_patterns_factory= seqlets_to_patterns.TfModiscoSeqletsToPatternsFactory(), - sliding_window_size=[5,9,13,17,21], flank_size=10, + sliding_window_size=21, flank_size=10, overlap_portion=0.5, min_metacluster_size=100, min_metacluster_size_frac=0.01, diff --git a/modisco/util.py b/modisco/util.py index 722aad8..f0e5b18 100644 --- a/modisco/util.py +++ b/modisco/util.py @@ -474,7 +474,7 @@ def compute_per_position_ic(ppm, background, pseudocount): assert len(ppm.shape)==2 assert ppm.shape[1]==len(background),\ "Make sure the letter axis is the second axis" - if (np.max(np.abs(np.sum(ppm, axis=1)-1.0)) < 1e-5): + if (not np.allclose(np.sum(ppm, axis=1), 1.0, atol=1.0e-5)): print("WARNING: Probabilities don't sum to 1 in all the rows; this can" +" be caused by zero-padding. Will renormalize. PPM:\n" +str(ppm) diff --git a/setup.py b/setup.py index 2012fe4..bf7ee8e 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ description='TF MOtif Discovery from Importance SCOres', long_description="""Algorithm for discovering consolidated patterns from base-pair-level importance scores""", url='https://github.com/kundajelab/tfmodisco', - version='0.5.10.1', + version='0.5.10.2', packages=find_packages(), package_data={ '': ['cluster/phenograph/louvain/*convert*', 'cluster/phenograph/louvain/*community*', 'cluster/phenograph/louvain/*hierarchy*'] From 11c84dc0d219a547f30a8ef0eb0a8581baa7f2df Mon Sep 17 00:00:00 2001 From: AvantiShri Date: Tue, 2 Feb 2021 21:28:16 -0800 Subject: [PATCH 2/2] bugfix for cases where there are lots of ties at the high end (e.g. with ir transformation) --- modisco/coordproducers.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/modisco/coordproducers.py b/modisco/coordproducers.py index 28ad234..5e14ad0 100644 --- a/modisco/coordproducers.py +++ b/modisco/coordproducers.py @@ -678,9 +678,12 @@ def identify_coords(score_track, pos_threshold, neg_threshold, # coordinates are identified cp_score_track = [np.array(x) for x in score_track] #if a position is less than the threshold, set it to -np.inf + #Note that the threshold comparisons need to be >= and not just > for + # cases where there are lots of ties at the high end (e.g. with an IR + # tranformation that gives a lot of values that have a precision of 1.0) cp_score_track = [ - np.array([np.abs(y) if (y > pos_threshold - or y < neg_threshold) + np.array([np.abs(y) if (y >= pos_threshold + or y <= neg_threshold) else -np.inf for y in x]) for x in cp_score_track] @@ -712,8 +715,8 @@ def identify_coords(score_track, pos_threshold, neg_threshold, other_info = dict([ (track_name, track[example_idx][argmax]) for (track_name, track) in other_info_tracks.items()])) - assert (coord.score > pos_threshold - or coord.score < neg_threshold) + assert (coord.score >= pos_threshold + or coord.score <= neg_threshold) coords.append(coord) else: assert False,\