Skip to content

Commit

Permalink
No more non-unique column names, or non-queryable columns in list_col…
Browse files Browse the repository at this point in the history
…umns
  • Loading branch information
ivirshup committed Apr 8, 2024
1 parent bae82d2 commit 5d8b6f7
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 6 deletions.
17 changes: 11 additions & 6 deletions src/genomic_features/ensembl/ensembldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@

PKG_CACHE_DIR = "genomic-features"

BIOC_ANNOTATION_HUB_URL = (
"https://bioconductorhubs.blob.core.windows.net/annotationhub"
)
BIOC_ANNOTATION_HUB_URL = "https://bioconductorhubs.blob.core.windows.net/annotationhub"
ANNOTATION_HUB_URL = (
"https://annotationhub.bioconductor.org/metadata/annotationhub.sqlite3"
)
Expand Down Expand Up @@ -53,7 +51,7 @@ def annotation(
"""
try:
sqlite_file_path = retrieve_annotation(
f'{BIOC_ANNOTATION_HUB_URL}/AHEnsDbs/v{version}/EnsDb.{species}.v{version}.sqlite'
f"{BIOC_ANNOTATION_HUB_URL}/AHEnsDbs/v{version}/EnsDb.{species}.v{version}.sqlite"
)

if backend == "sqlite":
Expand Down Expand Up @@ -440,12 +438,19 @@ def _get_required_tables(self, tab) -> list:
return self._tables_by_degree(tab)

def list_columns(self, tables: str | list[str] | None = None) -> list[str]:
"""List all columns available in the genomic features table."""
"""List queryable columns available in these tables."""
if tables is None:
tables = self.db.list_tables() # list of table names
if "metadata" in tables:
tables.remove("metadata")
elif isinstance(tables, str):
tables = [tables] # list of tables names (only one)
columns = [c for t in tables for c in self.db.table(t).columns]

columns = []
for t in tables:
for c in self.db.table(t).columns:
if c not in columns:
columns.append(c)
return columns

def _clean_columns(self, columns: list[str]) -> list[str]:
Expand Down
17 changes: 17 additions & 0 deletions tests/test_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,20 @@ def test_chromosome_columns(hsapiens108):
.reset_index(drop=True)
)
pd.testing.assert_series_equal(result["seq_length"], expected_lengths)


def test_list_columns_uniqueness(hsapiens108):
# https://github.com/scverse/genomic-features/issues/42
cols = hsapiens108.list_columns()
assert len(cols) == len(set(cols))

cols = hsapiens108.list_columns(["gene", "tx"])
assert len(cols) == len(set(cols))


def test_list_columns_include_unqueryable_cols(hsapiens108):
# https://github.com/scverse/genomic-features/issues/42
cols = hsapiens108.list_columns()
# From metadata
assert "value" not in cols
assert "name" not in cols

0 comments on commit 5d8b6f7

Please sign in to comment.