Skip to content

Commit

Permalink
Some cleanup on the ensembldb code (#67)
Browse files Browse the repository at this point in the history
* Some cleanup on the ensembldb code

* fix __repr__

* fix comments

* Update src/genomic_features/ensembl/ensembldb.py

---------

Co-authored-by: Felix Raimundo <felix.raimundo@3umassmed.edu>
Co-authored-by: Isaac Virshup <ivirshup@gmail.com>
  • Loading branch information
3 people authored Apr 4, 2024
1 parent 1c2a953 commit bae82d2
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
19 changes: 11 additions & 8 deletions src/genomic_features/ensembl/ensembldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@
PKG_CACHE_DIR = "genomic-features"

BIOC_ANNOTATION_HUB_URL = (
"https://bioconductorhubs.blob.core.windows.net/annotationhub/"
)
ENSEMBL_URL_TEMPLATE = (
BIOC_ANNOTATION_HUB_URL + "AHEnsDbs/v{version}/EnsDb.{species}.v{version}.sqlite"
"https://bioconductorhubs.blob.core.windows.net/annotationhub"
)
ANNOTATION_HUB_URL = (
"https://annotationhub.bioconductor.org/metadata/annotationhub.sqlite3"
Expand Down Expand Up @@ -56,7 +53,7 @@ def annotation(
"""
try:
sqlite_file_path = retrieve_annotation(
ENSEMBL_URL_TEMPLATE.format(species=species, version=version)
f'{BIOC_ANNOTATION_HUB_URL}/AHEnsDbs/v{version}/EnsDb.{species}.v{version}.sqlite'
)

if backend == "sqlite":
Expand All @@ -74,7 +71,8 @@ def annotation(
except HTTPError as err:
if err.response.status_code == 404:
raise ValueError(
f"No Ensembl database found for {species} v{version}. Check available versions with `genomic_features.ensembl.list_versions`."
f"No Ensembl database found for {species} v{version}. Check "
f"available versions with `genomic_features.ensembl.list_ensdb_annotations `."
) from err
else:
raise HTTPError from err
Expand Down Expand Up @@ -125,7 +123,8 @@ def list_ensdb_annotations(species: None | str | list[str] = None) -> DataFrame:
# check that species exist
if version_table.shape[0] == 0:
raise ValueError(
f"No Ensembl database found for {species}. Check species name."
f"No Ensembl database found for {species}. Available species can "
f"be found via: `list_ensdb_annotations()['Species'].unique()`."
)

version_table["Ensembl_version"] = version_table["rdatapath"].str.split(
Expand Down Expand Up @@ -153,7 +152,11 @@ def metadata(self) -> dict:

def __repr__(self) -> str:
d = self.metadata
return f"EnsemblDB(organism='{d['Organism']}', ensembl_release='{d['ensembl_version']}')"
return (
f"EnsemblDB(organism='{d['Organism']}', "
f"ensembl_release='{d['ensembl_version']}', "
f"genome_build='{d['genome_build']}')"
)

def genes(
self,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_missing_version():

def test_repr():
result = repr(gf.ensembl.annotation("Hsapiens", 108))
expected = "EnsemblDB(organism='Homo sapiens', ensembl_release='108')"
expected = "EnsemblDB(organism='Homo sapiens', ensembl_release='108', genome_build='GRCh38')"

assert result == expected

Expand Down

0 comments on commit bae82d2

Please sign in to comment.