diff --git a/databridge_etl_tools/oracle/oracle.py b/databridge_etl_tools/oracle/oracle.py index 71c411e..56ec701 100644 --- a/databridge_etl_tools/oracle/oracle.py +++ b/databridge_etl_tools/oracle/oracle.py @@ -176,50 +176,6 @@ def check_remove_nulls(self): writer.writerows(reader) os.replace(temp_file, self.csv_path) - def has_m_or_z(self): - # Assume innocence until proven guilty - m = False - z = False - cursor = self.conn.cursor() - - has_m_or_z_stmt = f''' - SELECT definition FROM sde.gdb_items - WHERE name = 'databridge.{self.table_schema}.{self.table_name}' - ''' - print('Running has_m_or_z_stmt: ' + has_m_or_z_stmt) - cursor.execute(has_m_or_z_stmt) - result = cursor.fetchone() - - if not result: - print('No XML file found sde.gdb_items definition col, this is unusual for a registered table!') - else: - xml_def = result[0] - if not xml_def: - print('No XML file found sde.gdb_items definition col, this is unusual for a registered table!') - else: - m_search = re.search("\D*<\/HasM>", xml_def) - if not m_search: - #print('No element found in xml definition, assuming False.') - pass - else: - if 'true' in m_search[0]: - print(m_search[0]) - m = True - z_search = re.search("\D*<\/HasZ>", xml_def) - if not z_search: - #print('No element found in xml definition, assuming False.') - pass - else: - if 'true' in z_search[0]: - print(z_search[0]) - z = True - - if m or z: - return True - else: - return False - - def extract(self): ''' Extract data from database and save as a CSV file. Any fields that contain @@ -234,49 +190,10 @@ def extract(self): self.logger.info('Note: petl can cause log messages to seemingly come out of order.') import geopetl - # Insert as a function within this function to it can be called easily by petl - def mz_to_xy(shape): - # Check if the shape includes an SRID (Extended WKT format) - srid = '' - if shape.startswith('SRID='): - srid, shape = shape.split(';', 1) # Separate SRID from the shape part - srid += ';' # Retain SRID for later - - # Extract the geometry type (e.g., LINESTRING, MULTILINESTRING) - # Split the type from the rest of the coordinates - xy_type = shape.split('(', 1)[0].strip() - # Remove any Z, M, ZM, or MZ suffix from the geometry type - xy_type = re.sub(r'\s?[ZM]+$', '', xy_type) - - # The rest of the shape after the geometry type, and remove any Z or M that could stil exist. - shape_coords = shape[len(xy_type):].strip().replace('Z','').replace('M','').replace('ZM','').replace('MZ','') - - # Function to remove M and Z values from coordinate strings - def process_coords(coords_str): - # Using regular expressions to match only X and Y coordinates, ignoring M and Z - coords = re.findall(r'-?\d+\.\d+', coords_str) - # Group coordinates into pairs (x, y) by skipping M and Z values - return ', '.join(' '.join(coords[i:i + 2]) for i in range(0, len(coords), 3)) - - # Match each nested set of parentheses to properly handle geometries - def process_shape(wkt_str): - return re.sub(r'\(([^()]*)\)', lambda m: f"({process_coords(m.group(1))})", wkt_str) - - # Process the main geometry - xy_wkt = process_shape(shape_coords) - - # Return the EWKT if SRID exists, otherwise just the WKT - return srid + xy_type + xy_wkt - - # Note: data isn't read just yet at this point self.logger.info('Initializing data var with etl.fromoraclesde()..') data = etl.fromoraclesde(self.conn, self.schema_table_name, geom_with_srid=True) - if self.has_m_or_z: - print('Table has M or Z values, converting to XY..') - data = data.convert('shape', lambda v: mz_to_xy(v) if v else None) - self.logger.info('Initialized.') @@ -316,8 +233,8 @@ def process_shape(wkt_str): etl.tocsv(data.progress(interval), self.csv_path, encoding='latin-1') # Used solely in pytest to ensure database is called only once. - #self.times_db_called = data.times_db_called - #self.logger.info(f'Times database queried: {self.times_db_called}') + self.times_db_called = data.times_db_called + self.logger.info(f'Times database queried: {self.times_db_called}') # Confirm CSV isn't empty try: @@ -393,7 +310,7 @@ def load(self): WHERE table_name = '{self.table_name.upper()}' AND owner = '{self.table_schema.upper()}' AND column_name not like 'SYS_%' - AND (column_name not like '%OBJECTID%' or column_name in ({','.join(self.nonoid_fields_w_objectid.upper())})) + AND (column_name not like '%OBJECTID%' or column_name in ({','.join([ f.upper() for f in self.nonoid_fields_w_objectid])})) ''' cursor.execute(cols_stmt) cols = cursor.fetchall()[0][0] diff --git a/pyproject.toml b/pyproject.toml index 09dba00..97e363b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ py_modules = ['databridge_etl_tools'] [project] name = "databridge-etl-tools" -version = "1.3.0" +version = "1.3.2" description = "Command line tools to extract and load SQL data to various endpoints" authors = [ {name = "citygeo", email = "maps@phila.gov"},