Skip to content

Commit

Permalink
Merge pull request #77 from ClickHouse/fix_regex_in_column
Browse files Browse the repository at this point in the history
Fix brackets regex in column.py
  • Loading branch information
guykoh authored Jul 11, 2022
2 parents f24c209 + 8c5ca0c commit 6368cfd
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 15 deletions.
2 changes: 1 addition & 1 deletion dbt/adapters/clickhouse/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = '1.1.6'
version = '1.1.7'
31 changes: 17 additions & 14 deletions dbt/adapters/clickhouse/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,25 @@ class ClickhouseColumn(Column):
'INTEGER': 'Int64',
}
is_nullable: bool = False
_brackets_regex = re.compile(r'(Nullable|LowCardinality)\((.*?\))\)')
_brackets_regex = re.compile(r'^(Nullable|LowCardinality)\((.*)\)$')
_fix_size_regex = re.compile(r'FixedString\((.*?)\)')
_decimal_regex = re.compile(r'Decimal\((\d+), (\d+)\)')

def __init__(
self,
column: str,
dtype: str,
is_nullable: bool = False,
) -> None:
def __init__(self, column: str, dtype: str) -> None:
char_size = None
numeric_precision = None
numeric_scale = None

match_brackets = self._brackets_regex.search(dtype)
if match_brackets:
self.is_nullable = True
dtype = match_brackets.group(2)
inner_dtype = self.match_brackets(dtype)
if inner_dtype:
dtype = inner_dtype
if not self.is_nullable:
# Support LowCardinality(Nullable(dtype))
inner_dtype = self.match_brackets(dtype)
dtype = inner_dtype if inner_dtype else dtype

if dtype.lower().startswith('fixedstring'):
match_sized = self._fix_size_regex.search(dtype)
char_size = 0
if match_sized:
char_size = int(match_sized.group(1))

Expand Down Expand Up @@ -86,7 +83,7 @@ def is_string(self) -> bool:
'tinyblob',
'char',
'mediumtext',
]
] or self.dtype.lower().startswith('fixedstring')

def is_integer(self) -> bool:
return self.dtype.lower().startswith('int') or self.dtype.lower().startswith('uint')
Expand All @@ -101,7 +98,7 @@ def string_size(self) -> int:
if not self.is_string():
raise RuntimeException('Called string_size() on non-string field!')

if self.dtype.lower() != 'fixedstring' or self.char_size is None:
if not self.dtype.lower().startswith('fixedstring') or self.char_size is None:
return 256
else:
return int(self.char_size)
Expand All @@ -122,3 +119,9 @@ def can_expand_to(self, other_column: 'Column') -> bool:
return False

return other_column.string_size() > self.string_size()

def match_brackets(self, dtype):
match = self._brackets_regex.search(dtype.strip())
if match:
self.is_nullable = match.group(1) == 'Nullable'
return match.group(2)
92 changes: 92 additions & 0 deletions tests/integration/adapter/test_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from dbt.adapters.clickhouse import ClickhouseColumn


class TestColumn:
def test_base_types(self):
verify_column('name', 'UInt8', False, False, False, True)
verify_column('name', 'UInt16', False, False, False, True)
verify_column('name', 'UInt32', False, False, False, True)
verify_column('name', 'UInt64', False, False, False, True)
verify_column('name', 'UInt128', False, False, False, True)
verify_column('name', 'UInt256', False, False, False, True)
verify_column('name', 'Int8', False, False, False, True)
verify_column('name', 'Int16', False, False, False, True)
verify_column('name', 'Int32', False, False, False, True)
verify_column('name', 'Int64', False, False, False, True)
verify_column('name', 'Int128', False, False, False, True)
verify_column('name', 'Int256', False, False, False, True)
str_col = verify_column('name', 'String', True, False, False, False)
assert str_col.string_size() == 256
fixed_str_col = verify_column('name', 'FixedString', True, False, False, False)
assert fixed_str_col.string_size() == 256
fixed_str_col = verify_column('name', 'FixedString(16)', True, False, False, False)
assert fixed_str_col.string_size() == 16
verify_column('name', 'Decimal(6, 6)', False, True, False, False)
verify_column('name', 'Float32', False, False, True, False)
verify_column('name', 'Float64', False, False, True, False)
verify_column('name', 'Float64', False, False, True, False)
verify_column('name', 'Date', False, False, False, False)
verify_column('name', 'Date32', False, False, False, False)
verify_column('name', "DateTime('Asia/Istanbul')", False, False, False, False)
verify_column('name', "UUID", False, False, False, False)

def test_array_type(self):
# Test Array of Strings type
col = ClickhouseColumn(column='name', dtype='Array(String)')
verify_column_types(col, False, False, False, False)
assert repr(col) == '<ClickhouseColumn name (Array(String), is nullable: False)>'

# Test Array of Nullable Strings type
col = ClickhouseColumn(column='name', dtype='Array(Nullable(String))')
verify_column_types(col, False, False, False, False)
assert repr(col) == '<ClickhouseColumn name (Array(Nullable(String)), is nullable: False)>'

# Test Array of Nullable FixedStrings type
col = ClickhouseColumn(column='name', dtype='Array(Nullable(FixedString(16)))')
verify_column_types(col, False, False, False, False)
assert (
repr(col)
== '<ClickhouseColumn name (Array(Nullable(FixedString(16))), is nullable: False)>'
)

def test_low_cardinality_nullable_type(self):
col = ClickhouseColumn(column='name', dtype='LowCardinality(Nullable(String))')
verify_column_types(col, True, False, False, False)
assert repr(col) == '<ClickhouseColumn name (Nullable(String), is nullable: True)>'
col = ClickhouseColumn(column='name', dtype='LowCardinality(Nullable(FixedString(16)))')
verify_column_types(col, True, False, False, False)
assert repr(col) == '<ClickhouseColumn name (Nullable(String), is nullable: True)>'


def verify_column(
name: str, dtype: str, is_string: bool, is_numeric: bool, is_float: bool, is_int: bool
) -> ClickhouseColumn:
data_type = 'String' if is_string else dtype
col = ClickhouseColumn(column=name, dtype=dtype)
verify_column_types(col, is_string, is_numeric, is_float, is_int)
assert repr(col) == f'<ClickhouseColumn {name} ({data_type}, is nullable: False)>'

# Test Nullable dtype.
nullable_col = ClickhouseColumn(column=name, dtype=f'Nullable({dtype})')
verify_column_types(nullable_col, is_string, is_numeric, is_float, is_int)
assert (
repr(nullable_col)
== f'<ClickhouseColumn {name} (Nullable({data_type}), is nullable: True)>'
)

# Test low cardinality dtype
low_cardinality_col = ClickhouseColumn(column=name, dtype=f'LowCardinality({dtype})')
verify_column_types(low_cardinality_col, is_string, is_numeric, is_float, is_int)
assert (
repr(low_cardinality_col) == f'<ClickhouseColumn {name} ({data_type}, is nullable: False)>'
)
return col


def verify_column_types(
col: ClickhouseColumn, is_string: bool, is_numeric: bool, is_float: bool, is_int: bool
):
assert col.is_string() == is_string
assert col.is_numeric() == is_numeric
assert col.is_float() == is_float
assert col.is_integer() == is_int

0 comments on commit 6368cfd

Please sign in to comment.