From 0d79aa6b264ad507beecfd34f5265a998fdd894a Mon Sep 17 00:00:00 2001 From: Bingfeng Xia Date: Tue, 26 May 2020 16:00:35 -0700 Subject: [PATCH] Improve fast deserialization speed by avoiding field schema retrieval cost (#49) Change to use fields' schema directly instead of registering and then retrieving them from HashMap. Co-authored-by: Bingfeng Xia --- .../fastserde/FastDeserializerGenerator.java | 30 +++++++------------ .../avro/fastserde/SchemaAssistant.java | 4 +-- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGenerator.java b/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGenerator.java index b42883ef0..7a6925518 100644 --- a/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGenerator.java +++ b/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/FastDeserializerGenerator.java @@ -9,7 +9,6 @@ import com.sun.codemodel.JDoLoop; import com.sun.codemodel.JExpr; import com.sun.codemodel.JExpression; -import com.sun.codemodel.JFieldVar; import com.sun.codemodel.JForLoop; import com.sun.codemodel.JInvocation; import com.sun.codemodel.JMethod; @@ -59,8 +58,7 @@ public class FastDeserializerGenerator extends FastDeserializerGeneratorBase< private static final Supplier EMPTY_SUPPLIER = () -> JExpr._null(); private boolean useGenericTypes; - private JMethod schemaMapMethod; - private JFieldVar schemaMapField; + private JMethod constructor; private Map schemaMap = new HashMap<>(); private Map schemaVarMap = new HashMap<>(); private Map deserializeMethodMap = new HashMap<>(); @@ -83,7 +81,7 @@ public FastDeserializer generateDeserializer() { deserializerClass = classPackage._class(className); JVar readerSchemaVar = deserializerClass.field(JMod.PRIVATE | JMod.FINAL, Schema.class, "readerSchema"); - JMethod constructor = deserializerClass.constructor(JMod.PUBLIC); + constructor = deserializerClass.constructor(JMod.PUBLIC); JVar constructorParam = constructor.param(Schema.class, "readerSchema"); constructor.body().assign(JExpr.refthis(readerSchemaVar.name()), constructorParam); @@ -99,14 +97,6 @@ public FastDeserializer generateDeserializer() { FieldAction fieldAction = FieldAction.fromValues(aliasedWriterSchema.getType(), true, resolvingGrammar); if (useGenericTypes) { - schemaMapField = - deserializerClass.field(JMod.PRIVATE, codeModel.ref(Map.class).narrow(Long.class).narrow(Schema.class), - "readerSchemaMap"); - schemaMapMethod = deserializerClass.method(JMod.PRIVATE | JMod.FINAL, void.class, "schemaMap"); - constructor.body().invoke(schemaMapMethod); - schemaMapMethod.body() - .assign(schemaMapField, - JExpr._new(codeModel.ref(HashMap.class).narrow(Long.class).narrow(Schema.class))); registerSchema(aliasedWriterSchema, readerSchemaVar); } @@ -276,7 +266,7 @@ private void processRecord(JVar recordSchemaVar, String recordName, final Schema JClass indexedRecordClass = codeModel.ref(IndexedRecord.class); JInvocation newRecord = JExpr._new(schemaAssistant.classFromSchema(recordWriterSchema, false)); if (useGenericTypes) { - JExpression recordSchema = schemaMapField.invoke("get").arg(JExpr.lit(Utils.getSchemaFingerprint(recordWriterSchema))); + JExpression recordSchema = schemaVarMap.get(Utils.getSchemaFingerprint(recordWriterSchema)); newRecord = newRecord.arg(recordSchema); JInvocation finalNewRecordInvocation = newRecord; @@ -1021,10 +1011,11 @@ private JVar declareSchemaVar(Schema valueSchema, String variableName, JInvocati if (schemaVarMap.get(schemaId) != null) { return schemaVarMap.get(schemaId); } else { - JVar schemaVar = schemaMapMethod.body() - .decl(codeModel.ref(Schema.class), getVariableName(StringUtils.uncapitalize(variableName)), getValueType); + JVar schemaVar = deserializerClass.field(JMod.PRIVATE | JMod.FINAL, Schema.class, + getVariableName(StringUtils.uncapitalize(variableName))); + constructor.body().assign(JExpr.refthis(schemaVar.name()), getValueType); + registerSchema(valueSchema, schemaId, schemaVar); - schemaVarMap.put(schemaId, schemaVar); return schemaVar; } } else { @@ -1040,7 +1031,7 @@ private void registerSchema(final Schema writerSchema, long schemaId, JVar schem if ((Schema.Type.RECORD.equals(writerSchema.getType()) || Schema.Type.ENUM.equals(writerSchema.getType()) || Schema.Type.ARRAY.equals(writerSchema.getType())) && schemaNotRegistered(writerSchema)) { schemaMap.put(schemaId, writerSchema); - schemaMapMethod.body().invoke(schemaMapField, "put").arg(JExpr.lit(schemaId)).arg(schemaVar); + schemaVarMap.put(schemaId, schemaVar); } } @@ -1087,7 +1078,8 @@ private JMethod createMethod(final Schema schema, boolean read) { return method; } - private JInvocation getSchemaExpr(Schema schema) { - return useGenericTypes ? schemaMapField.invoke("get").arg(JExpr.lit(Utils.getSchemaFingerprint(schema))) : null; + private JExpression getSchemaExpr(Schema schema) { + Long index = Utils.getSchemaFingerprint(schema); + return (useGenericTypes && schemaVarMap.containsKey(index)) ? schemaVarMap.get(index) : JExpr._null(); } } diff --git a/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/SchemaAssistant.java b/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/SchemaAssistant.java index c2f67753f..ac5289af8 100644 --- a/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/SchemaAssistant.java +++ b/avro-fastserde/src/main/java/com/linkedin/avro/fastserde/SchemaAssistant.java @@ -291,7 +291,7 @@ protected JClass defaultStringType() { return codeModel.ref(Utf8.class); } - public JExpression getEnumValueByName(Schema enumSchema, JExpression nameExpr, JInvocation getSchemaExpr) { + public JExpression getEnumValueByName(Schema enumSchema, JExpression nameExpr, JExpression getSchemaExpr) { if (useGenericTypes) { if (Utils.isAvro14()) { return JExpr._new(codeModel.ref(GenericData.EnumSymbol.class)).arg(nameExpr); @@ -303,7 +303,7 @@ public JExpression getEnumValueByName(Schema enumSchema, JExpression nameExpr, J } } - public JExpression getEnumValueByIndex(Schema enumSchema, JExpression indexExpr, JInvocation getSchemaExpr) { + public JExpression getEnumValueByIndex(Schema enumSchema, JExpression indexExpr, JExpression getSchemaExpr) { if (useGenericTypes) { if (Utils.isAvro14()) { return JExpr._new(codeModel.ref(GenericData.EnumSymbol.class))