From 7678a576098997d4a3a0e452d1ec224401ccbe9f Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Tue, 17 Sep 2024 17:10:33 -0700 Subject: [PATCH] [vpj] Ignore field props check during subset schema validation. (#1180) Previous issue: During subset schema validation, we will compare field strictly including the customized props. Such props could be JSON string provided by customers, without strict string order. And when we generate superset schema, we also did not check the props difference. The fix is avoiding the props checking when comparing field with the same name, between subset schema and superset schema. We will deep-copy fields from subset schema and superset schema and then do equality check. Co-authored-by: Hao Xu --- .../venice/utils/AvroSupersetSchemaUtils.java | 19 ++++++++++++----- .../schema/TestAvroSupersetSchemaUtils.java | 12 +++++++++++ .../linkedin/venice/utils/TestWriteUtils.java | 4 ++++ .../main/resources/valueSchema/NameV5.avsc | 21 +++++++++++++++++++ .../main/resources/valueSchema/NameV6.avsc | 21 +++++++++++++++++++ 5 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 internal/venice-test-common/src/main/resources/valueSchema/NameV5.avsc create mode 100644 internal/venice-test-common/src/main/resources/valueSchema/NameV6.avsc diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/AvroSupersetSchemaUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/AvroSupersetSchemaUtils.java index 9ec682571b..564507ac03 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/AvroSupersetSchemaUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/AvroSupersetSchemaUtils.java @@ -116,19 +116,22 @@ private static void copyFieldProperties(FieldBuilder fieldBuilder, Schema.Field }); } - private static FieldBuilder deepCopySchemaField(Schema.Field field) { + private static FieldBuilder deepCopySchemaFieldWithoutFieldProps(Schema.Field field) { FieldBuilder fieldBuilder = AvroCompatibilityHelper.newField(null) .setName(field.name()) .setSchema(field.schema()) .setDoc(field.doc()) .setOrder(field.order()); - copyFieldProperties(fieldBuilder, field); - // set default as AvroCompatibilityHelper builder might drop defaults if there is type mismatch if (field.hasDefaultValue()) { fieldBuilder.setDefault(getFieldDefault(field)); } + return fieldBuilder; + } + private static FieldBuilder deepCopySchemaField(Schema.Field field) { + FieldBuilder fieldBuilder = deepCopySchemaFieldWithoutFieldProps(field); + copyFieldProperties(fieldBuilder, field); return fieldBuilder; } @@ -192,6 +195,10 @@ public static MultiSchemaResponse.Schema getLatestUpdateSchemaFromSchemaResponse return updateSchema; } + /** + * * Validate if the Subset Value Schema is a subset of the Superset Value Schema, here the field props are not used to + * check if the field is same or not. + */ public static boolean validateSubsetValueSchema(Schema subsetValueSchema, String supersetSchemaStr) { Schema supersetSchema = AvroSchemaParseUtils.parseSchemaFromJSONLooseValidation(supersetSchemaStr); for (Schema.Field field: subsetValueSchema.getFields()) { @@ -199,11 +206,13 @@ public static boolean validateSubsetValueSchema(Schema subsetValueSchema, String if (fieldInSupersetSchema == null) { return false; } - if (!field.equals(fieldInSupersetSchema)) { + Schema.Field subsetValueSchemaWithoutFieldProps = deepCopySchemaFieldWithoutFieldProps(field).build(); + Schema.Field fieldInSupersetSchemaWithoutFieldProps = + deepCopySchemaFieldWithoutFieldProps(fieldInSupersetSchema).build(); + if (!subsetValueSchemaWithoutFieldProps.equals(fieldInSupersetSchemaWithoutFieldProps)) { return false; } } return true; } - } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/schema/TestAvroSupersetSchemaUtils.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/schema/TestAvroSupersetSchemaUtils.java index f676cfd4bb..ed8766d851 100644 --- a/internal/venice-client-common/src/test/java/com/linkedin/venice/schema/TestAvroSupersetSchemaUtils.java +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/schema/TestAvroSupersetSchemaUtils.java @@ -4,6 +4,8 @@ import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V2_SCHEMA; import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V3_SCHEMA; import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V4_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V5_SCHEMA; +import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V6_SCHEMA; import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper; import com.linkedin.venice.controllerapi.MultiSchemaResponse; @@ -489,5 +491,15 @@ public void testValidateSubsetSchema() { AvroSupersetSchemaUtils.validateSubsetValueSchema(NAME_RECORD_V2_SCHEMA, NAME_RECORD_V3_SCHEMA.toString())); Assert.assertFalse( AvroSupersetSchemaUtils.validateSubsetValueSchema(NAME_RECORD_V3_SCHEMA, NAME_RECORD_V4_SCHEMA.toString())); + + // NAME_RECORD_V5_SCHEMA and NAME_RECORD_V6_SCHEMA are different in props for field. + Assert.assertNotEquals(NAME_RECORD_V5_SCHEMA, NAME_RECORD_V6_SCHEMA); + // Test validation skip comparing props when checking for subset schema. + Schema supersetSchemaForV5AndV4 = + AvroSupersetSchemaUtils.generateSuperSetSchema(NAME_RECORD_V5_SCHEMA, NAME_RECORD_V4_SCHEMA); + Assert.assertTrue( + AvroSupersetSchemaUtils.validateSubsetValueSchema(NAME_RECORD_V5_SCHEMA, supersetSchemaForV5AndV4.toString())); + Assert.assertTrue( + AvroSupersetSchemaUtils.validateSubsetValueSchema(NAME_RECORD_V6_SCHEMA, supersetSchemaForV5AndV4.toString())); } } diff --git a/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestWriteUtils.java b/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestWriteUtils.java index c7b92e245a..7f6dccf732 100644 --- a/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestWriteUtils.java +++ b/internal/venice-test-common/src/main/java/com/linkedin/venice/utils/TestWriteUtils.java @@ -95,6 +95,10 @@ public class TestWriteUtils { AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV3.avsc")); public static final Schema NAME_RECORD_V4_SCHEMA = AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV4.avsc")); + public static final Schema NAME_RECORD_V5_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV5.avsc")); + public static final Schema NAME_RECORD_V6_SCHEMA = + AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV6.avsc")); // ETL Schema public static final Schema ETL_KEY_SCHEMA = AvroCompatibilityHelper.parse(loadSchemaFileFromResource("etl/Key.avsc")); diff --git a/internal/venice-test-common/src/main/resources/valueSchema/NameV5.avsc b/internal/venice-test-common/src/main/resources/valueSchema/NameV5.avsc new file mode 100644 index 0000000000..963271223b --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/NameV5.avsc @@ -0,0 +1,21 @@ +{ + "type" : "record", + "name" : "nameRecord", + "namespace" : "example.avro", + "fields" : [ { + "name" : "firstName", + "type" : "string", + "default" : "", + "custom_prop" : "custom_prop_value_1, custom_prop_value_2" + }, { + "name" : "lastName", + "type" : "string", + "default" : "", + "custom_prop" : "custom_prop_value_1, custom_prop_value_2" + }, { + "name" : "age", + "type" : "int", + "default" : -1, + "custom_prop" : "custom_prop_value_1, custom_prop_value_2" + } ] +} \ No newline at end of file diff --git a/internal/venice-test-common/src/main/resources/valueSchema/NameV6.avsc b/internal/venice-test-common/src/main/resources/valueSchema/NameV6.avsc new file mode 100644 index 0000000000..838dc51859 --- /dev/null +++ b/internal/venice-test-common/src/main/resources/valueSchema/NameV6.avsc @@ -0,0 +1,21 @@ +{ + "type" : "record", + "name" : "nameRecord", + "namespace" : "example.avro", + "fields" : [ { + "name" : "firstName", + "type" : "string", + "default" : "", + "custom_prop" : "custom_prop_value_2, custom_prop_value_1" + }, { + "name" : "lastName", + "type" : "string", + "default" : "", + "custom_prop" : "custom_prop_value_2, custom_prop_value_1" + }, { + "name" : "age", + "type" : "int", + "default" : -1, + "custom_prop" : "custom_prop_value_2, custom_prop_value_1" + } ] +} \ No newline at end of file