-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Enricher interface * Support in realtime ingestion * Fix offline segment builder * Add enricher to mapper * Add columnsToExtract config * Complete enricher pipeline usage * Enrichment configs * Add transform function based enrichment * Change package name * Change package name * Review comments * Add table config validation * Review comments * Change to fieldToFunctionMap --------- Co-authored-by: Saurabh Dubey <saurabh.dubey@saurabhs-macbook-pro-1.tail8a064.ts.net> Co-authored-by: Saurabh Dubey <saurabh.dubey@Saurabhs-MacBook-Pro.local>
- Loading branch information
1 parent
3267a74
commit 07daa7b
Showing
25 changed files
with
777 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
...-local/src/main/java/org/apache/pinot/plugin/record/enricher/clp/CLPEncodingEnricher.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.pinot.plugin.record.enricher.clp; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.yscope.clp.compressorfrontend.BuiltInVariableHandlingRuleVersions; | ||
import com.yscope.clp.compressorfrontend.EncodedMessage; | ||
import com.yscope.clp.compressorfrontend.MessageEncoder; | ||
import java.io.IOException; | ||
import java.util.List; | ||
import org.apache.pinot.spi.data.readers.GenericRow; | ||
import org.apache.pinot.spi.recordenricher.RecordEnricher; | ||
import org.apache.pinot.spi.utils.JsonUtils; | ||
import org.apache.pinot.sql.parsers.rewriter.ClpRewriter; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
|
||
/** | ||
* Enriches the record with CLP encoded fields. | ||
* For a column 'x', it adds three new columns to the record: | ||
* 1. 'x_logtype' - The logtype of the encoded message | ||
* 2. 'x_dictVars' - The dictionary variables of the encoded message | ||
* 3. 'x_encodedVars' - The encoded variables of the encoded message | ||
*/ | ||
public class CLPEncodingEnricher implements RecordEnricher { | ||
private static final Logger LOGGER = LoggerFactory.getLogger(CLPEncodingEnricher.class); | ||
private final ClpEnricherConfig _config; | ||
private final EncodedMessage _clpEncodedMessage; | ||
private final MessageEncoder _clpMessageEncoder; | ||
|
||
public CLPEncodingEnricher(JsonNode enricherProperties) throws IOException { | ||
_config = JsonUtils.jsonNodeToObject(enricherProperties, ClpEnricherConfig.class); | ||
_clpEncodedMessage = new EncodedMessage(); | ||
_clpMessageEncoder = new MessageEncoder(BuiltInVariableHandlingRuleVersions.VariablesSchemaV2, | ||
BuiltInVariableHandlingRuleVersions.VariableEncodingMethodsV1); | ||
} | ||
|
||
@Override | ||
public List<String> getInputColumns() { | ||
return _config.getFields(); | ||
} | ||
|
||
@Override | ||
public void enrich(GenericRow record) { | ||
try { | ||
for (String field : _config.getFields()) { | ||
Object value = record.getValue(field); | ||
if (value != null) { | ||
enrichWithClpEncodedFields(field, value, record); | ||
} | ||
} | ||
} catch (Exception e) { | ||
LOGGER.error("Failed to enrich record: {}", record); | ||
} | ||
} | ||
|
||
private void enrichWithClpEncodedFields(String key, Object value, GenericRow to) { | ||
String logtype = null; | ||
Object[] dictVars = null; | ||
Object[] encodedVars = null; | ||
if (null != value) { | ||
if (value instanceof String) { | ||
String valueAsString = (String) value; | ||
try { | ||
_clpMessageEncoder.encodeMessage(valueAsString, _clpEncodedMessage); | ||
logtype = _clpEncodedMessage.getLogTypeAsString(); | ||
encodedVars = _clpEncodedMessage.getEncodedVarsAsBoxedLongs(); | ||
dictVars = _clpEncodedMessage.getDictionaryVarsAsStrings(); | ||
} catch (IOException e) { | ||
LOGGER.error("Can't encode field with CLP. name: '{}', value: '{}', error: {}", key, valueAsString, | ||
e.getMessage()); | ||
} | ||
} else { | ||
LOGGER.error("Can't encode value of type {} with CLP. name: '{}', value: '{}'", | ||
value.getClass().getSimpleName(), key, value); | ||
} | ||
} | ||
|
||
to.putValue(key + ClpRewriter.LOGTYPE_COLUMN_SUFFIX, logtype); | ||
to.putValue(key + ClpRewriter.DICTIONARY_VARS_COLUMN_SUFFIX, dictVars); | ||
to.putValue(key + ClpRewriter.ENCODED_VARS_COLUMN_SUFFIX, encodedVars); | ||
} | ||
} |
51 changes: 51 additions & 0 deletions
51
...src/main/java/org/apache/pinot/plugin/record/enricher/clp/CLPEncodingEnricherFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.pinot.plugin.record.enricher.clp; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.google.auto.service.AutoService; | ||
import java.io.IOException; | ||
import org.apache.pinot.spi.recordenricher.RecordEnricher; | ||
import org.apache.pinot.spi.recordenricher.RecordEnricherFactory; | ||
import org.apache.pinot.spi.recordenricher.RecordEnricherValidationConfig; | ||
import org.apache.pinot.spi.utils.JsonUtils; | ||
|
||
@AutoService(RecordEnricherFactory.class) | ||
public class CLPEncodingEnricherFactory implements RecordEnricherFactory { | ||
private static final String ENRICHER_TYPE = "clpEnricher"; | ||
@Override | ||
public String getEnricherType() { | ||
return ENRICHER_TYPE; | ||
} | ||
|
||
@Override | ||
public RecordEnricher createEnricher(JsonNode enricherProps) | ||
throws IOException { | ||
return new CLPEncodingEnricher(enricherProps); | ||
} | ||
|
||
@Override | ||
public void validateEnrichmentConfig(JsonNode enricherProps, RecordEnricherValidationConfig validationConfig) { | ||
try { | ||
ClpEnricherConfig config = JsonUtils.jsonNodeToObject(enricherProps, ClpEnricherConfig.class); | ||
} catch (IOException e) { | ||
throw new IllegalArgumentException("Failed to parse clp enricher config", e); | ||
} | ||
} | ||
} |
40 changes: 40 additions & 0 deletions
40
...nt-local/src/main/java/org/apache/pinot/plugin/record/enricher/clp/ClpEnricherConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.pinot.plugin.record.enricher.clp; | ||
|
||
import com.fasterxml.jackson.annotation.JsonCreator; | ||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
import java.util.List; | ||
|
||
|
||
/** | ||
* Configuration for the CLP enricher. | ||
*/ | ||
public class ClpEnricherConfig { | ||
private final List<String> _fields; | ||
|
||
@JsonCreator | ||
public ClpEnricherConfig(@JsonProperty("fields") List<String> fields) { | ||
_fields = fields; | ||
} | ||
|
||
public List<String> getFields() { | ||
return _fields; | ||
} | ||
} |
66 changes: 66 additions & 0 deletions
66
...rc/main/java/org/apache/pinot/plugin/record/enricher/function/CustomFunctionEnricher.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.apache.pinot.plugin.record.enricher.function; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import org.apache.pinot.segment.local.function.FunctionEvaluator; | ||
import org.apache.pinot.segment.local.function.FunctionEvaluatorFactory; | ||
import org.apache.pinot.spi.data.readers.GenericRow; | ||
import org.apache.pinot.spi.recordenricher.RecordEnricher; | ||
import org.apache.pinot.spi.utils.JsonUtils; | ||
|
||
|
||
/** | ||
* Enriches the record with custom functions. | ||
*/ | ||
public class CustomFunctionEnricher implements RecordEnricher { | ||
private final Map<String, FunctionEvaluator> _fieldToFunctionEvaluator; | ||
private final List<String> _fieldsToExtract; | ||
|
||
public CustomFunctionEnricher(JsonNode enricherProps) throws IOException { | ||
CustomFunctionEnricherConfig config = JsonUtils.jsonNodeToObject(enricherProps, CustomFunctionEnricherConfig.class); | ||
_fieldToFunctionEvaluator = new LinkedHashMap<>(); | ||
_fieldsToExtract = new ArrayList<>(); | ||
for (Map.Entry<String, String> entry : config.getFieldToFunctionMap().entrySet()) { | ||
String column = entry.getKey(); | ||
String function = entry.getValue(); | ||
FunctionEvaluator functionEvaluator = FunctionEvaluatorFactory.getExpressionEvaluator(function); | ||
_fieldToFunctionEvaluator.put(column, functionEvaluator); | ||
_fieldsToExtract.addAll(functionEvaluator.getArguments()); | ||
} | ||
} | ||
|
||
@Override | ||
public List<String> getInputColumns() { | ||
return _fieldsToExtract; | ||
} | ||
|
||
@Override | ||
public void enrich(GenericRow record) { | ||
_fieldToFunctionEvaluator.forEach((field, evaluator) -> { | ||
record.putValue(field, evaluator.evaluate(record)); | ||
}); | ||
} | ||
} |
Oops, something went wrong.