From f3baf576d7da13ff79dbfe52938f22834fb6c0d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikko=20Lepp=C3=A4nen?= Date: Wed, 24 Oct 2018 13:38:50 +0300 Subject: [PATCH] As Glue limits comments to 255 characters, we may need to truncate them. --- .../src/hive_metastore_migration.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/utilities/Hive_metastore_migration/src/hive_metastore_migration.py b/utilities/Hive_metastore_migration/src/hive_metastore_migration.py index e9f76bd..ed898e4 100644 --- a/utilities/Hive_metastore_migration/src/hive_metastore_migration.py +++ b/utilities/Hive_metastore_migration/src/hive_metastore_migration.py @@ -414,6 +414,14 @@ def transform_ms_bucketing_cols(self, ms_bucketing_cols): payload_func=lambda row: row['BUCKET_COL_NAME']) def transform_ms_columns(self, ms_columns): + def extract_row(row): + def truncate(x): + return x[:255] if hasattr(x,"__getitem__") else x + return ( + row['COLUMN_NAME'], + row['TYPE_NAME'], + truncate(row['COMMENT']) + ) return self.transform_df_with_idx(df=ms_columns, id_col='CD_ID', idx='INTEGER_IDX', @@ -422,8 +430,7 @@ def transform_ms_columns(self, ms_columns): StructField(name='name', dataType=StringType()), StructField(name='type', dataType=StringType()), StructField(name='comment', dataType=StringType())]), - payload_func=lambda row: ( - row['COLUMN_NAME'], row['TYPE_NAME'], row['COMMENT'])) + payload_func=extract_row) def transform_ms_skewed_col_names(self, ms_skewed_col_names): return self.transform_df_with_idx(df=ms_skewed_col_names,