Skip to content

Commit

Permalink
fixed test, added -p argument
Browse files Browse the repository at this point in the history
  • Loading branch information
AvantiShri committed Apr 22, 2020
1 parent 98c44bb commit e1db274
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 174 deletions.
310 changes: 155 additions & 155 deletions examples/H1ESC_Nanog_gkmsvm/TF MoDISco Nanog.ipynb

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions examples/H1ESC_Nanog_gkmsvm/meme_out/metacluster0/meme.xml
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@
</letter_frequencies>
</training_set>
<model>
<command_line>meme meme_out/metacluster0/inp_seqlets.fa -dna -mod anr -nmotifs 10 -minw 6 -maxw 50 -oc meme_out/metacluster0 </command_line>
<command_line>meme meme_out/metacluster0/inp_seqlets.fa -dna -mod anr -nmotifs 10 -p 4 -minw 6 -maxw 50 -oc meme_out/metacluster0 </command_line>
<host>Avantis-MacBook-Pro.local</host>
<type>anr</type>
<nmotifs>10</nmotifs>
Expand Down Expand Up @@ -246,7 +246,7 @@
</background_frequencies>
</model>
<motifs>
<motif id="motif_1" name="CCCWGCWGGG" alt="MEME-1" width="10" sites="324" ic="10.5" re="10.8" llr="2421" p_value="6.2e-1015" e_value="5.4e-219" bayes_threshold="8.8036" elapsed_time="183.087642">
<motif id="motif_1" name="CCCWGCWGGG" alt="MEME-1" width="10" sites="324" ic="10.5" re="10.8" llr="2421" p_value="6.2e-1015" e_value="5.4e-219" bayes_threshold="8.8036" elapsed_time="181.913222">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down Expand Up @@ -381,7 +381,7 @@
<contributing_sites>
</contributing_sites>
</motif>
<motif id="motif_2" name="BHMATTTGCATDWCAAAGR" alt="MEME-2" width="19" sites="135" ic="15.2" re="14.8" llr="1387" p_value="3.6e-619" e_value="7.0e-181" bayes_threshold="7.58518" elapsed_time="282.325589">
<motif id="motif_2" name="BHMATTTGCATDWCAAAGR" alt="MEME-2" width="19" sites="135" ic="15.2" re="14.8" llr="1387" p_value="3.6e-619" e_value="7.0e-181" bayes_threshold="7.58518" elapsed_time="280.701191">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down Expand Up @@ -624,7 +624,7 @@
<contributing_sites>
</contributing_sites>
</motif>
<motif id="motif_3" name="BYCWTTGTTATGYAAATN" alt="MEME-3" width="18" sites="195" ic="12.3" re="12.0" llr="1620" p_value="4.8e-643" e_value="8.2e-146" bayes_threshold="7.51828" elapsed_time="373.251581">
<motif id="motif_3" name="BYCWTTGTTATGYAAATN" alt="MEME-3" width="18" sites="195" ic="12.3" re="12.0" llr="1620" p_value="4.8e-643" e_value="8.2e-146" bayes_threshold="7.51828" elapsed_time="369.536925">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down Expand Up @@ -855,7 +855,7 @@
<contributing_sites>
</contributing_sites>
</motif>
<motif id="motif_4" name="KVRAAACAAAGGM" alt="MEME-4" width="13" sites="92" ic="12.6" re="12.5" llr="795" p_value="5.2e-311" e_value="2.3e-039" bayes_threshold="8.02077" elapsed_time="450.035937">
<motif id="motif_4" name="KVRAAACAAAGGM" alt="MEME-4" width="13" sites="92" ic="12.6" re="12.5" llr="795" p_value="5.2e-311" e_value="2.3e-039" bayes_threshold="8.02077" elapsed_time="446.727879">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down Expand Up @@ -1026,7 +1026,7 @@
<contributing_sites>
</contributing_sites>
</motif>
<motif id="motif_5" name="KGGGGGAGGGG" alt="MEME-5" width="11" sites="18" ic="18.5" re="19.0" llr="237" p_value="5.6e-083" e_value="2.7e-017" bayes_threshold="11.7166" elapsed_time="524.888619">
<motif id="motif_5" name="KGGGGGAGGGG" alt="MEME-5" width="11" sites="18" ic="18.5" re="19.0" llr="237" p_value="5.6e-083" e_value="2.7e-017" bayes_threshold="11.7166" elapsed_time="519.405419">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down Expand Up @@ -1173,7 +1173,7 @@
<contributing_sites>
</contributing_sites>
</motif>
<motif id="motif_6" name="CYCCCWGKGRG" alt="MEME-6" width="11" sites="40" ic="13.2" re="13.6" llr="376" p_value="8.1e-139" e_value="1.2e-005" bayes_threshold="10.6596" elapsed_time="600.539929">
<motif id="motif_6" name="CYCCCWGKGRG" alt="MEME-6" width="11" sites="40" ic="13.2" re="13.6" llr="376" p_value="8.1e-139" e_value="1.2e-005" bayes_threshold="10.6596" elapsed_time="591.494427">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down Expand Up @@ -1320,7 +1320,7 @@
<contributing_sites>
</contributing_sites>
</motif>
<motif id="motif_7" name="ARWTGYTAATGRRDS" alt="MEME-7" width="15" sites="29" ic="15.7" re="15.5" llr="312" p_value="2.6e-104" e_value="1.0e-005" bayes_threshold="9.86546" elapsed_time="674.474107">
<motif id="motif_7" name="ARWTGYTAATGRRDS" alt="MEME-7" width="15" sites="29" ic="15.7" re="15.5" llr="312" p_value="2.6e-104" e_value="1.0e-005" bayes_threshold="9.86546" elapsed_time="662.118705">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down Expand Up @@ -1515,7 +1515,7 @@
<contributing_sites>
</contributing_sites>
</motif>
<motif id="motif_8" name="CCTGKGGAGA" alt="MEME-8" width="10" sites="22" ic="15.1" re="15.3" llr="234" p_value="3.2e-082" e_value="2.3e-003" bayes_threshold="10.9798" elapsed_time="745.791682">
<motif id="motif_8" name="CCTGKGGAGA" alt="MEME-8" width="10" sites="22" ic="15.1" re="15.3" llr="234" p_value="3.2e-082" e_value="2.3e-003" bayes_threshold="10.9798" elapsed_time="730.435599">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down Expand Up @@ -1650,7 +1650,7 @@ CCTG[GT]GG[AT]G[ACG]
<contributing_sites>
</contributing_sites>
</motif>
<motif id="motif_9" name="GGVVTGCACATTCCWGGCMTTCYTT" alt="MEME-9" width="25" sites="5" ic="36.2" re="36.4" llr="126" p_value="4.4e-021" e_value="7.8e-002" bayes_threshold="12.1016" elapsed_time="816.190480">
<motif id="motif_9" name="GGVVTGCACATTCCWGGCMTTCYTT" alt="MEME-9" width="25" sites="5" ic="36.2" re="36.4" llr="126" p_value="4.4e-021" e_value="7.8e-002" bayes_threshold="12.1016" elapsed_time="797.875238">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down Expand Up @@ -1965,7 +1965,7 @@ GG[AGC][ACG]T[GCT][CAG][AC]CATTCC[TA][GT][GC]C[AC][TA][TG]C[TC][TA][TC]
<contributing_sites>
</contributing_sites>
</motif>
<motif id="motif_10" name="CYCCCCYCCSCCCCC" alt="MEME-10" width="15" sites="10" ic="19.4" re="20.3" llr="141" p_value="1.4e-039" e_value="1.8e-001" bayes_threshold="11.7689" elapsed_time="885.542592">
<motif id="motif_10" name="CYCCCCYCCSCCCCC" alt="MEME-10" width="15" sites="10" ic="19.4" re="20.3" llr="141" p_value="1.4e-039" e_value="1.8e-001" bayes_threshold="11.7689" elapsed_time="864.906464">
<scores>
<alphabet_matrix>
<alphabet_array>
Expand Down
16 changes: 10 additions & 6 deletions modisco/clusterinit/memeinit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import time


def run_meme(meme_command, input_file, outdir, nmotifs):
def run_meme(meme_command, n_jobs, input_file, outdir, nmotifs):

start = time.time()
#p = Popen([meme_command,input_file,"-dna","-mod","anr",
Expand All @@ -25,7 +25,9 @@ def run_meme(meme_command, input_file, outdir, nmotifs):
# sys.stdout.write(output)
print("Running MEME")
command = (meme_command+" "+input_file+" -dna -mod anr -nmotifs "
+str(nmotifs)+" -minw 6 -maxw 50 -oc "+outdir)
+str(nmotifs)
+("" if n_jobs==1 else " -p "+str(n_jobs))
+" -minw 6 -maxw 50 -oc "+outdir)
print("Command:",command)
os.system(command)
print("Duration of MEME:",time.time()-start,"seconds")
Expand All @@ -43,14 +45,15 @@ class MemeInitClustererFactory(InitClustererFactory):

def __init__(self, meme_command, base_outdir, max_num_seqlets_to_use,
nmotifs, e_value_threshold=0.05,
**pwm_clusterer_kwargs):
n_jobs=1, verbose=True):
self.meme_command = meme_command
self.base_outdir = base_outdir
self.max_num_seqlets_to_use = max_num_seqlets_to_use
self.nmotifs = nmotifs
self.call_count = 0 #to avoid overwriting for each metacluster
self.e_value_threshold = e_value_threshold
self.pwm_clusterer_kwargs = pwm_clusterer_kwargs
self.n_jobs = n_jobs
self.verbose = verbose

def __call__(self, seqlets):

Expand Down Expand Up @@ -86,13 +89,14 @@ def __call__(self, seqlets):

run_meme(meme_command=self.meme_command,
input_file=seqlet_fa_to_write,
outdir=outdir, nmotifs=self.nmotifs)
outdir=outdir, nmotifs=self.nmotifs,
n_jobs=self.n_jobs)

motifs = parse_meme(meme_xml=outdir+"/meme.xml",
e_value_threshold=self.e_value_threshold)
return PwmClusterer(
pwms=motifs, onehot_track_name=self.onehot_track_name,
**self.pwm_clusterer_kwargs)
n_jobs=self.n_jobs, verbose=self.verbose)


class Pwm(object):
Expand Down
40 changes: 38 additions & 2 deletions test/test_tfmodisco_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ def test_memeinit_workflow(self):
initclusterer_factory=
modisco.clusterinit.memeinit.MemeInitClustererFactory(
meme_command="meme", base_outdir="meme_out",
num_seqlets_to_use=10000, nmotifs=3,
min_logodds=2, n_jobs=1),
max_num_seqlets_to_use=10000, nmotifs=3,
n_jobs=1),
trim_to_window_size=15,
initial_flank_to_add=5,
kmer_len=5, num_gaps=1,
Expand All @@ -166,3 +166,39 @@ def test_memeinit_workflow(self):
null_per_pos_scores = null_per_pos_scores,
plot_save_dir="plot_save_directory"))

#@skip
def test_parallel_memeinit_workflow(self):

onehot_data = self.onehot_data
task_to_scores = self.task_to_scores
task_to_hyp_scores = self.task_to_hyp_scores

import modisco
null_per_pos_scores = (modisco.coordproducers
.LaplaceNullDist(num_to_samp=5000))
tfmodisco_results = (modisco.tfmodisco_workflow
.workflow.TfModiscoWorkflow(
#Slight modifications from the default settings
sliding_window_size=15,
flank_size=5,
target_seqlet_fdr=0.15,
seqlets_to_patterns_factory=
modisco.tfmodisco_workflow
.seqlets_to_patterns.TfModiscoSeqletsToPatternsFactory(
initclusterer_factory=
modisco.clusterinit.memeinit.MemeInitClustererFactory(
meme_command="meme", base_outdir="meme_out",
max_num_seqlets_to_use=10000, nmotifs=3,
n_jobs=4),
trim_to_window_size=15,
initial_flank_to_add=5,
kmer_len=5, num_gaps=1,
num_mismatches=0,
final_min_cluster_size=60)
)(
task_names=["task0", "task1", "task2"],
contrib_scores=task_to_scores,
hypothetical_contribs=task_to_hyp_scores,
one_hot=onehot_data,
null_per_pos_scores = null_per_pos_scores,
plot_save_dir="plot_save_directory"))

0 comments on commit e1db274

Please sign in to comment.