From b4f9ac59742300b00e77571156d432ce97df3c05 Mon Sep 17 00:00:00 2001 From: Pat Nadolny Date: Tue, 3 Oct 2023 14:35:55 -0400 Subject: [PATCH] feat: Cache SQL columns and schemas (#1779) * cache sql columns and schemas * lint fix * fix mypy typing * use lru_cache instead of custom cache * Revert "use lru_cache instead of custom cache" This reverts commit 668832b7125f0cec971c35be744841577f9530cb. * use set for cache instead of list * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix set typing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Ken Payne Co-authored-by: Edgar R. M Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- singer_sdk/connectors/sql.py | 42 ++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/singer_sdk/connectors/sql.py b/singer_sdk/connectors/sql.py index c74726057..c6f957589 100644 --- a/singer_sdk/connectors/sql.py +++ b/singer_sdk/connectors/sql.py @@ -58,6 +58,8 @@ def __init__( """ self._config: dict[str, t.Any] = config or {} self._sqlalchemy_url: str | None = sqlalchemy_url or None + self._table_cols_cache: dict[str, dict[str, sqlalchemy.Column]] = {} + self._schema_cache: set[str] = set() @property def config(self) -> dict: @@ -584,8 +586,12 @@ def schema_exists(self, schema_name: str) -> bool: Returns: True if the database schema exists, False if not. """ - schema_names = sqlalchemy.inspect(self._engine).get_schema_names() - return schema_name in schema_names + if schema_name not in self._schema_cache: + self._schema_cache = set( + sqlalchemy.inspect(self._engine).get_schema_names(), + ) + + return schema_name in self._schema_cache def get_table_columns( self, @@ -601,20 +607,24 @@ def get_table_columns( Returns: An ordered list of column objects. """ - _, schema_name, table_name = self.parse_full_table_name(full_table_name) - inspector = sqlalchemy.inspect(self._engine) - columns = inspector.get_columns(table_name, schema_name) - - return { - col_meta["name"]: sqlalchemy.Column( - col_meta["name"], - col_meta["type"], - nullable=col_meta.get("nullable", False), - ) - for col_meta in columns - if not column_names - or col_meta["name"].casefold() in {col.casefold() for col in column_names} - } + if full_table_name not in self._table_cols_cache: + _, schema_name, table_name = self.parse_full_table_name(full_table_name) + inspector = sqlalchemy.inspect(self._engine) + columns = inspector.get_columns(table_name, schema_name) + + self._table_cols_cache[full_table_name] = { + col_meta["name"]: sqlalchemy.Column( + col_meta["name"], + col_meta["type"], + nullable=col_meta.get("nullable", False), + ) + for col_meta in columns + if not column_names + or col_meta["name"].casefold() + in {col.casefold() for col in column_names} + } + + return self._table_cols_cache[full_table_name] def get_table( self,