Skip to content

Commit

Permalink
[vpj] Ignore field props check during subset schema validation. (#1180)
Browse files Browse the repository at this point in the history
Previous issue: During subset schema validation, we will compare field strictly including the customized props. Such props could be JSON string provided by customers, without strict string order. And when we generate superset schema, we also did not check the props difference.

The fix is avoiding the props checking when comparing field with the same name, between subset schema and superset schema. We will deep-copy fields from subset schema and superset schema and then do equality check.

Co-authored-by: Hao Xu <xhao@xhao-mn3.linkedin.biz>
  • Loading branch information
haoxu07 and Hao Xu authored Sep 18, 2024
1 parent 21f4d23 commit 7678a57
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -116,19 +116,22 @@ private static void copyFieldProperties(FieldBuilder fieldBuilder, Schema.Field
});
}

private static FieldBuilder deepCopySchemaField(Schema.Field field) {
private static FieldBuilder deepCopySchemaFieldWithoutFieldProps(Schema.Field field) {
FieldBuilder fieldBuilder = AvroCompatibilityHelper.newField(null)
.setName(field.name())
.setSchema(field.schema())
.setDoc(field.doc())
.setOrder(field.order());
copyFieldProperties(fieldBuilder, field);

// set default as AvroCompatibilityHelper builder might drop defaults if there is type mismatch
if (field.hasDefaultValue()) {
fieldBuilder.setDefault(getFieldDefault(field));
}
return fieldBuilder;
}

private static FieldBuilder deepCopySchemaField(Schema.Field field) {
FieldBuilder fieldBuilder = deepCopySchemaFieldWithoutFieldProps(field);
copyFieldProperties(fieldBuilder, field);
return fieldBuilder;
}

Expand Down Expand Up @@ -192,18 +195,24 @@ public static MultiSchemaResponse.Schema getLatestUpdateSchemaFromSchemaResponse
return updateSchema;
}

/**
* * Validate if the Subset Value Schema is a subset of the Superset Value Schema, here the field props are not used to
* check if the field is same or not.
*/
public static boolean validateSubsetValueSchema(Schema subsetValueSchema, String supersetSchemaStr) {
Schema supersetSchema = AvroSchemaParseUtils.parseSchemaFromJSONLooseValidation(supersetSchemaStr);
for (Schema.Field field: subsetValueSchema.getFields()) {
Schema.Field fieldInSupersetSchema = supersetSchema.getField(field.name());
if (fieldInSupersetSchema == null) {
return false;
}
if (!field.equals(fieldInSupersetSchema)) {
Schema.Field subsetValueSchemaWithoutFieldProps = deepCopySchemaFieldWithoutFieldProps(field).build();
Schema.Field fieldInSupersetSchemaWithoutFieldProps =
deepCopySchemaFieldWithoutFieldProps(fieldInSupersetSchema).build();
if (!subsetValueSchemaWithoutFieldProps.equals(fieldInSupersetSchemaWithoutFieldProps)) {
return false;
}
}
return true;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V2_SCHEMA;
import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V3_SCHEMA;
import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V4_SCHEMA;
import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V5_SCHEMA;
import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V6_SCHEMA;

import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
import com.linkedin.venice.controllerapi.MultiSchemaResponse;
Expand Down Expand Up @@ -489,5 +491,15 @@ public void testValidateSubsetSchema() {
AvroSupersetSchemaUtils.validateSubsetValueSchema(NAME_RECORD_V2_SCHEMA, NAME_RECORD_V3_SCHEMA.toString()));
Assert.assertFalse(
AvroSupersetSchemaUtils.validateSubsetValueSchema(NAME_RECORD_V3_SCHEMA, NAME_RECORD_V4_SCHEMA.toString()));

// NAME_RECORD_V5_SCHEMA and NAME_RECORD_V6_SCHEMA are different in props for field.
Assert.assertNotEquals(NAME_RECORD_V5_SCHEMA, NAME_RECORD_V6_SCHEMA);
// Test validation skip comparing props when checking for subset schema.
Schema supersetSchemaForV5AndV4 =
AvroSupersetSchemaUtils.generateSuperSetSchema(NAME_RECORD_V5_SCHEMA, NAME_RECORD_V4_SCHEMA);
Assert.assertTrue(
AvroSupersetSchemaUtils.validateSubsetValueSchema(NAME_RECORD_V5_SCHEMA, supersetSchemaForV5AndV4.toString()));
Assert.assertTrue(
AvroSupersetSchemaUtils.validateSubsetValueSchema(NAME_RECORD_V6_SCHEMA, supersetSchemaForV5AndV4.toString()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ public class TestWriteUtils {
AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV3.avsc"));
public static final Schema NAME_RECORD_V4_SCHEMA =
AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV4.avsc"));
public static final Schema NAME_RECORD_V5_SCHEMA =
AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV5.avsc"));
public static final Schema NAME_RECORD_V6_SCHEMA =
AvroCompatibilityHelper.parse(loadSchemaFileFromResource("valueSchema/NameV6.avsc"));

// ETL Schema
public static final Schema ETL_KEY_SCHEMA = AvroCompatibilityHelper.parse(loadSchemaFileFromResource("etl/Key.avsc"));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"type" : "record",
"name" : "nameRecord",
"namespace" : "example.avro",
"fields" : [ {
"name" : "firstName",
"type" : "string",
"default" : "",
"custom_prop" : "custom_prop_value_1, custom_prop_value_2"
}, {
"name" : "lastName",
"type" : "string",
"default" : "",
"custom_prop" : "custom_prop_value_1, custom_prop_value_2"
}, {
"name" : "age",
"type" : "int",
"default" : -1,
"custom_prop" : "custom_prop_value_1, custom_prop_value_2"
} ]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"type" : "record",
"name" : "nameRecord",
"namespace" : "example.avro",
"fields" : [ {
"name" : "firstName",
"type" : "string",
"default" : "",
"custom_prop" : "custom_prop_value_2, custom_prop_value_1"
}, {
"name" : "lastName",
"type" : "string",
"default" : "",
"custom_prop" : "custom_prop_value_2, custom_prop_value_1"
}, {
"name" : "age",
"type" : "int",
"default" : -1,
"custom_prop" : "custom_prop_value_2, custom_prop_value_1"
} ]
}

0 comments on commit 7678a57

Please sign in to comment.