Skip to content

Commit

Permalink
infernal scoring added
Browse files Browse the repository at this point in the history
  • Loading branch information
smautner committed Sep 1, 2015
1 parent 8530739 commit ef5509b
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 2 deletions.
2 changes: 2 additions & 0 deletions graphlearn/abstract_graphs/rnaabstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ def rna_refold(self, digraph=None, seq=None,vectorizer=None):
graph = rnafold_to_eden([('emptyheader',seq)], shape_type=5, energy_range=30, max_num=3).next()
expanded_graph = self.vectorizer._edge_to_vertex_transform(graph)
ex_di_graph = graphlearn.abstract_graphs.rnaabstract.expanded_rna_graph_to_digraph(expanded_graph)
ex_di_graph.graph['sequence']= seq
#abstract_graph = directedgraphtools.direct_abstraction_wrapper(graph,0)
return ex_di_graph

Expand Down Expand Up @@ -354,6 +355,7 @@ def postprocess(self, seq):
# get graph
graph=grmgr.get_base_graph()
graph.graphmanager=grmgr
graph.graph['sequence'] = seq
return graph


Expand Down
60 changes: 58 additions & 2 deletions graphlearn/abstract_graphs/rnasampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def _score(self,graph):
graph._score=estimateable._score
return graph._score

def _sample_path_append(self, graph, force=False):
if not force:
self._sample_notes+=graph.graph.get('sequence',"0")+"n"
super(RNASampler,self)._sample_path_append(graph,force=force)

'''
this is also used sometimes so we make better sure it doesnt fail
Expand Down Expand Up @@ -99,7 +103,59 @@ def get_mod_dict(graph):
return {s:696969 , e:123123123}
#ubergraphlearn.get_mod_dict=get_mod_dict
import rnaabstract


#ubergraphlearn.make_abstract = rnaabstract.direct_abstractor
#ubergraphlearn.make_abstract = rnaabstract.direct_abstraction_wrapper








import subprocess as sp

def infernal_checker(sequence_list):
'''
:param sequences: a bunch of rna sequences
:return: get evaluation from cmsearch
'''
write_fasta(sequence_list,filename='temp.fa')
return call_cm_search('temp.fa',len(sequence_list))



def write_fasta(sequences,filename='asdasd'):

fasta=''
for i,s in enumerate(sequences):
if len(s) > 5:
fasta+='>HACK%d\n%s\n' % (i,s)

with open(filename, 'w') as f:
f.write(fasta)


def call_cm_search(filename, count):

out = sp.check_output('./cmsearch -g --noali --incT 0 rf00005.cm %s' % filename, shell=True)
# -g global
# --noali, we dont want to see the alignment, score is enough
# --incT 0 we want to see everything with score > 0
result={}
s = out.strip().split('\n')
for line in s:
if 'HACK' in line:
linez=line.split()
score=float(linez[3])/100
id = int(linez[5][4:])
result[id]=score


return [ result.get(k,0) for k in range(count) ]






0 comments on commit ef5509b

Please sign in to comment.