Skip to content

Commit

Permalink
Merge branch 'smole2.0' into 'dev'
Browse files Browse the repository at this point in the history
Smolecule 2.0

See merge request research/medaka!596
  • Loading branch information
cjw85 committed Oct 9, 2024
2 parents 9df4dee + b000cb5 commit 60f96d7
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [unreleased]
### Fixed
- `medaka smolecule` was broken by change from `medaka consensus` to `medaka inference`.

## [v2.0.0]
Switched from tensorflow to pytorch.

Expand Down
20 changes: 19 additions & 1 deletion medaka/smolecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def multi_from_fastx(
above this value.
"""
logger = medaka.common.get_named_logger("FastReader")
depth_filter = max(1, depth_filter)
if take_all and read_id is None:
read_id = os.path.splitext(os.path.basename(fastx))[0]
Expand All @@ -123,21 +124,37 @@ def multi_from_fastx(
for entry in fh:
if not take_all:
cur_read_id = entry.name.split("_")[0]
if read_id is None:
read_id = cur_read_id
if cur_read_id != read_id:
if len(subreads) >= depth_filter:
med_length = np.median(
[len(x.seq) for x in subreads])
if med_length > length_filter:
yield cls(read_id, subreads)
else:
logger.debug(
"Read {} has too short subreads.".format(
read_id))
else:
logger.debug(
"Read {} has too few subreads.".format(
read_id))
read_id = cur_read_id
subreads = []

if len(entry.sequence) > 0:
subreads.append(Subread(entry.name, entry.sequence))

if len(subreads) >= depth_filter:
med_length = np.median([len(x.seq) for x in subreads])
if med_length > length_filter:
yield cls(read_id, subreads)
else:
logger.debug(
"Read {} has too short subreads.".format(read_id))
else:
logger.debug("Read {} has too few subreads.".format(read_id))

@property
def seqs(self):
Expand Down Expand Up @@ -414,9 +431,10 @@ def __getattr__(self, attr):

def main(args):
"""Entry point for repeat read consensus creation."""
print(args)
parser = medaka.medaka.medaka_parser()
defaults = parser.parse_args([
"consensus", medaka.medaka.CheckBam.fake_sentinel,
"inference", medaka.medaka.CheckBam.fake_sentinel,
"fake_out"])

args = MyArgs(args, defaults)
Expand Down

0 comments on commit 60f96d7

Please sign in to comment.