Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sqlite3-based object store, with tests #250

Merged
merged 2 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ extras =
bioutils>=0.5.2
hgvs>=1.4
requests
dill~=0.3.7
notebooks =
ipython
jupyter
Expand Down
130 changes: 130 additions & 0 deletions src/ga4gh/vrs/extras/object_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from collections.abc import MutableMapping
from typing import Any, Union
from threading import Lock

import sqlite3
import dill


class Sqlite3MutableMapping(MutableMapping):
"""
Class that can be used like a Python dictionary but that uses a sqlite3 database
as the storage. Can also be opened as a contextmanager.

If not used as a contextmanager, user must call commit and/or close.
"""

def __init__(
self,
sqlite3_db: Union[str, sqlite3.Connection],
autocommit=True
):
"""
Connect to the sqlite3 database specified by an existing sqlite3.Connection
or a connection string.

- autocommit: if False, disables commit after every setitem/delitem.
Significant performance implication (>10X speedup)
"""
if isinstance(sqlite3_db, str):
sqlite3_db = sqlite3.connect(
sqlite3_db,
check_same_thread=True)
self.db = sqlite3_db
self.autocommit = autocommit
self._closed_lock = Lock()
self._closed = False
self._create_schema()

def _create_schema(self):
cur = self.db.cursor()
try:
cur.execute(
"create table if not exists mapping "
"(key text, value blob)")
cur.execute(
"create unique index if not exists mapping_key_idx "
"on mapping (key)")
self.commit()
finally:
cur.close()

def __del__(self):
self.close()

def __delitem__(self, key: Any) -> None:
# Raise KeyError
self[key]
# Delete if found
cur = self.db.cursor()
try:
cur.execute(
"delete from mapping where key = ?",
(key,))
if self.autocommit:
self.commit()
finally:
cur.close()

def __setitem__(self, key: Any, value: Any) -> None:
cur = self.db.cursor()
try:
ser = dill.dumps(value)
cur.execute(
"insert or replace into mapping(key, value) "
"values (?, ?)",
(key, sqlite3.Binary(ser)))
if self.autocommit:
self.commit()
finally:
cur.close()

def __getitem__(self, key: Any) -> Any:
cur = self.db.cursor()
try:
rows = cur.execute(
"select value from mapping where key = ?",
(key,))
row0 = next(rows)
if row0:
des = dill.loads(row0[0])
return des
except StopIteration:
raise KeyError("Key not found: " + str(key))
finally:
cur.close()

def __iter__(self):
cur = self.db.cursor()
try:
rows = cur.execute("select key from mapping")
for row in rows:
yield row[0]
finally:
cur.close()

def __len__(self):
cur = self.db.cursor()
try:
rows = cur.execute("select count(*) from mapping")
ct = list(rows)[0][0]
return ct
finally:
cur.close()

def commit(self):
self.db.commit()

def close(self):
with self._closed_lock:
if not self._closed:
self.commit()
self.db.close()
self._closed = True

def __enter__(self):
self.db.__enter__()
return self

def __exit__(self, exc_type, exc_value, traceback):
self.db.__exit__(exc_type, exc_value, traceback)
152 changes: 152 additions & 0 deletions tests/extras/test_object_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import tempfile
import ast
import os
import pytest
import shutil
import sys

from ga4gh.vrs.extras.object_store import Sqlite3MutableMapping


def test_simple(tmp_path):
db_path = str(tmp_path) + "/test_simple.sqlite3"
object_store = Sqlite3MutableMapping(db_path)

kvp = {
chr(ord("A") + i): i
for i in range(10)
}

assert len(object_store) == 0
for k, v in kvp.items():
object_store[k] = v

assert len(kvp) == len(object_store)
assert set([k for k, v in kvp.items()]) == set(object_store.keys())

for k_act, v_act in object_store.items():
assert kvp[k_act] == v_act

# Test deletes
del object_store["A"]
assert len(object_store) == len(kvp) - 1
with pytest.raises(KeyError):
del object_store["A"]
while len(object_store) > 0:
del object_store[list(object_store.keys())[0]]
assert len(object_store) == 0


def test_complex(tmp_path):
db_path = str(tmp_path) + "/test_complex.sqlite3"

kvp = {
"A": "A-value",
"B": {
"B-1": "B-1-value",
"B-2": {
"B-2-1": [
"B-2-1-1",
"B-2-1-2",
"B-2-1-3"
],
"B-3": 12345
}
}
}

object_store = Sqlite3MutableMapping(db_path)
assert len(object_store) == 0

for k, v in kvp.items():
object_store[k] = v

assert len(kvp) == len(object_store)
assert set([k for k, v in kvp.items()]) == set(object_store.keys())

for k_act, v_act in object_store.items():
assert kvp[k_act] == v_act

object_store.close()


def test_classes(tmp_path):
db_path = str(tmp_path) + "/test_complex.sqlite3"

class TestClass(object):
def __init__(self, id):
self.id = id
self.A = "A"
self.B = ["B1", "B2", 3]
self.C = {"C1": "C1-value"}

def somefunction(self, arg):
return f"{self.id}-{arg}"

object_store = Sqlite3MutableMapping(db_path)

val1 = TestClass("val1")
val2 = TestClass("val2")
object_store["val1"] = val1
object_store["val2"] = val2

# Test retrieval of custom object contents
assert len(object_store) == 2
assert object_store["val1"].id == "val1"
assert object_store["val2"].id == "val2"
assert object_store["val1"].somefunction("X") == "val1-X"

# Test deletes of custom objects
del object_store["val1"]
assert len(object_store) == 1
with pytest.raises(KeyError):
del object_store["val1"]

del object_store["val2"]
assert len(object_store) == 0

object_store.close()


# This version verifies that setting autocommit=False may
# lose data is .close or .commit is not explicitly called
# def test_no_commit():
# tmpdir = tempfile.mkdtemp()
# db_path = tmpdir + "/test_commit.sqlite3"
# object_store = Sqlite3MutableMapping(db_path, autocommit=False)
# value_count = int(1e5)
# for i in range(value_count):
# object_store[f"key{i}"] = f"value{i}"
# object_store.db.close()
# # See if the stuff is still there
# object_store = Sqlite3MutableMapping(db_path)
# for i in range(value_count):
# assert object_store[f"key{i}"] == f"value{i}"

def test_commit(tmp_path):
db_path = str(tmp_path) + "/test_commit.sqlite3"
object_store = Sqlite3MutableMapping(db_path, autocommit=False)

value_count = int(1e4)
for i in range(value_count):
object_store[f"key{i}"] = f"value{i}"

object_store.close()

# See if the stuff is still there
object_store = Sqlite3MutableMapping(db_path)
for i in range(value_count):
assert object_store[f"key{i}"] == f"value{i}"


def test_contextmanager(tmp_path):
db_path = str(tmp_path) + "/test_contextmanager.sqlite3"
value_count = int(1e4)
with Sqlite3MutableMapping(db_path, autocommit=False) as object_store:
for i in range(value_count):
object_store[f"key{i}"] = f"value{i}"

with Sqlite3MutableMapping(db_path, autocommit=False) as object_store:
# See if the stuff is still there
for i in range(value_count):
assert object_store[f"key{i}"] == f"value{i}"
Loading