Skip to content

Commit

Permalink
Merge pull request #67 from kwcantrell/master
Browse files Browse the repository at this point in the history
Updates t2t param
  • Loading branch information
cameronmartino authored May 25, 2023
2 parents 28d4184 + 334942f commit 9ac99ac
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
v0.0.9 (2023-05-24)

### Bug fixes

* Fixes in `preprocessing.py` function
* fixes bug that can drastically change taxonomic assigmennt
* see issue #66

v0.0.8 (2022-03-28)

### Bug fixes
Expand Down
2 changes: 1 addition & 1 deletion gemelli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
# The full license is in the file COPYING.txt, distributed with this software.
# ----------------------------------------------------------------------------

__version__ = "0.0.8"
__version__ = "0.0.9"
12 changes: 7 additions & 5 deletions gemelli/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def retrieve_t2t_taxonomy(phylogeny, taxonomy=None):
counts = nl.collect_names_at_ranks_counts(tree_)

nl.decorate_ntips(tree_)
nl.decorate_name_relative_freqs(tree_, counts, 2)
nl.decorate_name_relative_freqs(tree_, counts, 1)
nl.set_ranksafe(tree_)
nl.pick_names(tree_)

Expand Down Expand Up @@ -205,7 +205,6 @@ def _get_taxonomy_io_stream(taxonomy):
"following (case insensitive): {}."
).format(VALID_TAXONOMY_COLUMN_NAMES)
)
stream = io.StringIO()

# Split the single column of taxonomy strings into n columns, where n
# is the highest number of taxonomic levels in any string. This is to
Expand All @@ -217,16 +216,19 @@ def _get_taxonomy_io_stream(taxonomy):
highest_rank_indx = taxonomy.dropna(axis=0, how='any').first_valid_index()
rank_order = [rank[0]
for rank in taxonomy.loc[highest_rank_indx].values.tolist()]
nl.set_rank_order(rank_order)

# collapse taxonomy back into a single string
taxonomy.fillna('', inplace=True)
fill_missing_dict = {i: f'{r}__' for i, r in enumerate(rank_order)}
# collapse taxonomy_ back into a single string
taxonomy.fillna(value=fill_missing_dict, inplace=True)
taxonomy = taxonomy.apply(
lambda levels: '; '.join(levels.values.tolist()),
axis=1)

# convert taxonomy dataframe to a StringIO for use in t2t
stream = io.StringIO()
taxonomy.to_csv(stream, sep='\t', index=True, header=False)
stream.seek(0)
nl.determine_rank_order(stream.readline().strip().split('\t')[1])

# set stream to point to first line
stream.seek(0)
Expand Down

0 comments on commit 9ac99ac

Please sign in to comment.