From 16c88062126447b51ab15022a76aaa32de600225 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Sat, 15 Jun 2024 04:38:26 +0800 Subject: [PATCH] Add fingerprint ingest processor (#13724) * Add fingerprint ingest processor Signed-off-by: Gao Binlong * Ignore metadata fields Signed-off-by: Gao Binlong * Add sha3-256 hash method Signed-off-by: Gao Binlong * Remove unused code Signed-off-by: Gao Binlong * Add exclude_fields and remove include_all_fields Signed-off-by: Gao Binlong * Modify processor description Signed-off-by: Gao Binlong * Make FingerprintProcessor being final Signed-off-by: Gao Binlong * Optimize error message and check if field name is empty string Signed-off-by: Gao Binlong * Fix yaml test failure Signed-off-by: Gao Binlong * Prepend string length to the field value Signed-off-by: Gao Binlong * Append hash method with version number Signed-off-by: Gao Binlong * Update supported version in yml test file Signed-off-by: Gao Binlong * Add more comment Signed-off-by: Gao Binlong * Prepend hash method to the hash value and add more test cases Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../ingest/common/FingerprintProcessor.java | 279 +++++++ .../common/IngestCommonModulePlugin.java | 1 + .../FingerprintProcessorFactoryTests.java | 119 +++ .../common/FingerprintProcessorTests.java | 176 ++++ .../rest-api-spec/test/ingest/10_basic.yml | 16 + .../test/ingest/340_fingerprint_processor.yml | 786 ++++++++++++++++++ .../common/hash/MessageDigests.java | 13 + .../common/hash/MessageDigestsTests.java | 25 + 9 files changed, 1416 insertions(+) create mode 100644 modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java create mode 100644 modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java create mode 100644 modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java create mode 100644 modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b7e333174354..4e1d41ec6b152 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added +- Add fingerprint ingest processor ([#13724](https://github.com/opensearch-project/OpenSearch/pull/13724)) ### Dependencies - Bump `org.gradle.test-retry` from 1.5.8 to 1.5.9 ([#13442](https://github.com/opensearch-project/OpenSearch/pull/13442)) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java new file mode 100644 index 0000000000000..c2f59bf586c81 --- /dev/null +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/FingerprintProcessor.java @@ -0,0 +1,279 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.common.Nullable; +import org.opensearch.common.hash.MessageDigests; +import org.opensearch.core.common.Strings; +import org.opensearch.ingest.AbstractProcessor; +import org.opensearch.ingest.ConfigurationUtils; +import org.opensearch.ingest.IngestDocument; +import org.opensearch.ingest.Processor; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.Base64; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.opensearch.ingest.ConfigurationUtils.newConfigurationException; + +/** + * Processor that generating hash value for the specified fields or fields not in the specified excluded list + */ +public final class FingerprintProcessor extends AbstractProcessor { + public static final String TYPE = "fingerprint"; + // this processor is introduced in 2.16.0, we append the OpenSearch version to the hash method name to ensure + // that this processor always generates same hash value based on a specific hash method, if the processing logic + // of this processor changes in future version, the version number in the hash method should be increased correspondingly. + private static final Set HASH_METHODS = Set.of("MD5@2.16.0", "SHA-1@2.16.0", "SHA-256@2.16.0", "SHA3-256@2.16.0"); + + // fields used to generate hash value + private final List fields; + // all fields other than the excluded fields are used to generate hash value + private final List excludeFields; + // the target field to store the hash value, defaults to fingerprint + private final String targetField; + // hash method used to generate the hash value, defaults to SHA-1 + private final String hashMethod; + private final boolean ignoreMissing; + + FingerprintProcessor( + String tag, + String description, + @Nullable List fields, + @Nullable List excludeFields, + String targetField, + String hashMethod, + boolean ignoreMissing + ) { + super(tag, description); + if (fields != null && !fields.isEmpty()) { + if (fields.stream().anyMatch(Strings::isNullOrEmpty)) { + throw new IllegalArgumentException("field name in [fields] cannot be null nor empty"); + } + if (excludeFields != null && !excludeFields.isEmpty()) { + throw new IllegalArgumentException("either fields or exclude_fields can be set"); + } + } + if (excludeFields != null && !excludeFields.isEmpty() && excludeFields.stream().anyMatch(Strings::isNullOrEmpty)) { + throw new IllegalArgumentException("field name in [exclude_fields] cannot be null nor empty"); + } + + if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { + throw new IllegalArgumentException("hash method must be MD5@2.16.0, SHA-1@2.16.0 or SHA-256@2.16.0 or SHA3-256@2.16.0"); + } + this.fields = fields; + this.excludeFields = excludeFields; + this.targetField = targetField; + this.hashMethod = hashMethod; + this.ignoreMissing = ignoreMissing; + } + + public List getFields() { + return fields; + } + + public List getExcludeFields() { + return excludeFields; + } + + public String getTargetField() { + return targetField; + } + + public String getHashMethod() { + return hashMethod; + } + + public boolean isIgnoreMissing() { + return ignoreMissing; + } + + @Override + public IngestDocument execute(IngestDocument document) { + // we should deduplicate and sort the field names to make sure we can get consistent hash value + final List sortedFields; + Set existingFields = new HashSet<>(document.getSourceAndMetadata().keySet()); + Set metadataFields = document.getMetadata() + .keySet() + .stream() + .map(IngestDocument.Metadata::getFieldName) + .collect(Collectors.toSet()); + // metadata fields such as _index, _id and _routing are ignored + if (fields != null && !fields.isEmpty()) { + sortedFields = fields.stream() + .distinct() + .filter(field -> !metadataFields.contains(field)) + .sorted() + .collect(Collectors.toList()); + } else if (excludeFields != null && !excludeFields.isEmpty()) { + sortedFields = existingFields.stream() + .filter(field -> !metadataFields.contains(field) && !excludeFields.contains(field)) + .sorted() + .collect(Collectors.toList()); + } else { + sortedFields = existingFields.stream().filter(field -> !metadataFields.contains(field)).sorted().collect(Collectors.toList()); + } + assert (!sortedFields.isEmpty()); + + final StringBuilder concatenatedFields = new StringBuilder(); + sortedFields.forEach(field -> { + if (!document.hasField(field)) { + if (ignoreMissing) { + return; + } else { + throw new IllegalArgumentException("field [" + field + "] doesn't exist"); + } + } + + final Object value = document.getFieldValue(field, Object.class); + if (value instanceof Map) { + @SuppressWarnings("unchecked") + Map flattenedMap = toFlattenedMap((Map) value); + flattenedMap.entrySet().stream().sorted(Map.Entry.comparingByKey()).forEach(entry -> { + String fieldValue = String.valueOf(entry.getValue()); + concatenatedFields.append("|") + .append(field) + .append(".") + .append(entry.getKey()) + .append("|") + .append(fieldValue.length()) + .append(":") + .append(fieldValue); + }); + } else { + String fieldValue = String.valueOf(value); + concatenatedFields.append("|").append(field).append("|").append(fieldValue.length()).append(":").append(fieldValue); + } + }); + // if all specified fields don't exist and ignore_missing is true, then do nothing + if (concatenatedFields.length() == 0) { + return document; + } + concatenatedFields.append("|"); + + MessageDigest messageDigest = HashMethod.fromMethodName(hashMethod); + assert (messageDigest != null); + messageDigest.update(concatenatedFields.toString().getBytes(StandardCharsets.UTF_8)); + document.setFieldValue(targetField, hashMethod + ":" + Base64.getEncoder().encodeToString(messageDigest.digest())); + + return document; + } + + @Override + public String getType() { + return TYPE; + } + + /** + * Convert a map containing nested fields to a flattened map, + * for example, if the original map is + * { + * "a": { + * "b": 1, + * "c": 2 + * } + * }, then the converted map is + * { + * "a.b": 1, + * "a.c": 2 + * } + * @param map the original map which may contain nested fields + * @return a flattened map which has only one level fields + */ + @SuppressWarnings("unchecked") + private Map toFlattenedMap(Map map) { + Map flattenedMap = new HashMap<>(); + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue() instanceof Map) { + toFlattenedMap((Map) entry.getValue()).forEach( + (key, value) -> flattenedMap.put(entry.getKey() + "." + key, value) + ); + } else { + flattenedMap.put(entry.getKey(), entry.getValue()); + } + } + return flattenedMap; + } + + /** + * The supported hash methods used to generate hash value + */ + enum HashMethod { + MD5(MessageDigests.md5()), + SHA1(MessageDigests.sha1()), + SHA256(MessageDigests.sha256()), + SHA3256(MessageDigests.sha3256()); + + private final MessageDigest messageDigest; + + HashMethod(MessageDigest messageDigest) { + this.messageDigest = messageDigest; + } + + public static MessageDigest fromMethodName(String methodName) { + String name = methodName.toUpperCase(Locale.ROOT); + switch (name) { + case "MD5@2.16.0": + return MD5.messageDigest; + case "SHA-1@2.16.0": + return SHA1.messageDigest; + case "SHA-256@2.16.0": + return SHA256.messageDigest; + case "SHA3-256@2.16.0": + return SHA3256.messageDigest; + default: + return null; + } + } + } + + public static final class Factory implements Processor.Factory { + @Override + public FingerprintProcessor create( + Map registry, + String processorTag, + String description, + Map config + ) throws Exception { + List fields = ConfigurationUtils.readOptionalList(TYPE, processorTag, config, "fields"); + List excludeFields = ConfigurationUtils.readOptionalList(TYPE, processorTag, config, "exclude_fields"); + if (fields != null && !fields.isEmpty()) { + if (fields.stream().anyMatch(Strings::isNullOrEmpty)) { + throw newConfigurationException(TYPE, processorTag, "fields", "field name cannot be null nor empty"); + } + if (excludeFields != null && !excludeFields.isEmpty()) { + throw newConfigurationException(TYPE, processorTag, "fields", "either fields or exclude_fields can be set"); + } + } + if (excludeFields != null && !excludeFields.isEmpty() && excludeFields.stream().anyMatch(Strings::isNullOrEmpty)) { + throw newConfigurationException(TYPE, processorTag, "exclude_fields", "field name cannot be null nor empty"); + } + + String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", "fingerprint"); + String hashMethod = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "hash_method", "SHA-1@2.16.0"); + if (!HASH_METHODS.contains(hashMethod.toUpperCase(Locale.ROOT))) { + throw newConfigurationException( + TYPE, + processorTag, + "hash_method", + "hash method must be MD5@2.16.0, SHA-1@2.16.0, SHA-256@2.16.0 or SHA3-256@2.16.0" + ); + } + boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); + return new FingerprintProcessor(processorTag, description, fields, excludeFields, targetField, hashMethod, ignoreMissing); + } + } +} diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java index 0f8b248fd5af8..162934efa6778 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java @@ -109,6 +109,7 @@ public Map getProcessors(Processor.Parameters paramet processors.put(CopyProcessor.TYPE, new CopyProcessor.Factory(parameters.scriptService)); processors.put(RemoveByPatternProcessor.TYPE, new RemoveByPatternProcessor.Factory()); processors.put(CommunityIdProcessor.TYPE, new CommunityIdProcessor.Factory()); + processors.put(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()); return Collections.unmodifiableMap(processors); } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java new file mode 100644 index 0000000000000..74ad4cade7b37 --- /dev/null +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorFactoryTests.java @@ -0,0 +1,119 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.OpenSearchParseException; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.hamcrest.CoreMatchers.equalTo; + +public class FingerprintProcessorFactoryTests extends OpenSearchTestCase { + + private FingerprintProcessor.Factory factory; + + @Before + public void init() { + factory = new FingerprintProcessor.Factory(); + } + + public void testCreate() throws Exception { + Map config = new HashMap<>(); + + List fields = null; + List excludeFields = null; + if (randomBoolean()) { + fields = List.of(randomAlphaOfLength(10)); + config.put("fields", fields); + } else { + excludeFields = List.of(randomAlphaOfLength(10)); + config.put("exclude_fields", excludeFields); + } + + String targetField = null; + if (randomBoolean()) { + targetField = randomAlphaOfLength(10); + } + config.put("target_field", targetField); + + boolean ignoreMissing = randomBoolean(); + config.put("ignore_missing", ignoreMissing); + String processorTag = randomAlphaOfLength(10); + FingerprintProcessor fingerprintProcessor = factory.create(null, processorTag, null, config); + assertThat(fingerprintProcessor.getTag(), equalTo(processorTag)); + assertThat(fingerprintProcessor.getFields(), equalTo(fields)); + assertThat(fingerprintProcessor.getExcludeFields(), equalTo(excludeFields)); + assertThat(fingerprintProcessor.getTargetField(), equalTo(Objects.requireNonNullElse(targetField, "fingerprint"))); + assertThat(fingerprintProcessor.isIgnoreMissing(), equalTo(ignoreMissing)); + } + + public void testCreateWithFields() throws Exception { + Map config = new HashMap<>(); + config.put("fields", List.of(randomAlphaOfLength(10))); + config.put("exclude_fields", List.of(randomAlphaOfLength(10))); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[fields] either fields or exclude_fields can be set")); + } + + config = new HashMap<>(); + List fields = new ArrayList<>(); + if (randomBoolean()) { + fields.add(null); + } else { + fields.add(""); + } + config.put("fields", fields); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[fields] field name cannot be null nor empty")); + } + + config = new HashMap<>(); + List excludeFields = new ArrayList<>(); + if (randomBoolean()) { + excludeFields.add(null); + } else { + excludeFields.add(""); + } + config.put("exclude_fields", excludeFields); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat(e.getMessage(), equalTo("[exclude_fields] field name cannot be null nor empty")); + } + } + + public void testCreateWithHashMethod() throws Exception { + Map config = new HashMap<>(); + List fields = List.of(randomAlphaOfLength(10)); + config.put("fields", fields); + config.put("hash_method", randomAlphaOfLength(10)); + try { + factory.create(null, null, null, config); + fail("factory create should have failed"); + } catch (OpenSearchParseException e) { + assertThat( + e.getMessage(), + equalTo("[hash_method] hash method must be MD5@2.16.0, SHA-1@2.16.0, SHA-256@2.16.0 or SHA3-256@2.16.0") + ); + } + } +} diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java new file mode 100644 index 0000000000000..67a82f28fb763 --- /dev/null +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/FingerprintProcessorTests.java @@ -0,0 +1,176 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.ingest.IngestDocument; +import org.opensearch.ingest.Processor; +import org.opensearch.ingest.RandomDocumentPicks; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.equalTo; + +public class FingerprintProcessorTests extends OpenSearchTestCase { + private final List hashMethods = List.of("MD5@2.16.0", "SHA-1@2.16.0", "SHA-256@2.16.0", "SHA3-256@2.16.0"); + + public void testGenerateFingerprint() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + List fields = null; + List excludeFields = null; + if (randomBoolean()) { + fields = new ArrayList<>(); + for (int i = 0; i < randomIntBetween(1, 10); i++) { + fields.add(RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10))); + } + } else { + excludeFields = new ArrayList<>(); + for (int i = 0; i < randomIntBetween(1, 10); i++) { + excludeFields.add(RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10))); + } + } + + String targetField = "fingerprint"; + if (randomBoolean()) { + targetField = randomAlphaOfLength(10); + } + + String hashMethod = randomFrom(hashMethods); + Processor processor = createFingerprintProcessor(fields, excludeFields, targetField, hashMethod, false); + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetField), equalTo(true)); + } + + public void testCreateFingerprintProcessorFailed() { + List fields = new ArrayList<>(); + if (randomBoolean()) { + fields.add(null); + } else { + fields.add(""); + } + fields.add(randomAlphaOfLength(10)); + + assertThrows( + "field name in [fields] cannot be null nor empty", + IllegalArgumentException.class, + () -> createFingerprintProcessor(fields, null, null, randomFrom(hashMethods), false) + ); + + List excludeFields = new ArrayList<>(); + if (randomBoolean()) { + excludeFields.add(null); + } else { + excludeFields.add(""); + } + excludeFields.add(randomAlphaOfLength(10)); + + assertThrows( + "field name in [exclude_fields] cannot be null nor empty", + IllegalArgumentException.class, + () -> createFingerprintProcessor(null, excludeFields, null, randomFrom(hashMethods), false) + ); + + assertThrows( + "either fields or exclude_fields can be set", + IllegalArgumentException.class, + () -> createFingerprintProcessor( + List.of(randomAlphaOfLength(10)), + List.of(randomAlphaOfLength(10)), + null, + randomFrom(hashMethods), + false + ) + ); + + assertThrows( + "hash method must be MD5@2.16.0, SHA-1@2.16.0, SHA-256@2.16.0 or SHA3-256@2.16.0", + IllegalArgumentException.class, + () -> createFingerprintProcessor(Collections.emptyList(), null, "fingerprint", randomAlphaOfLength(10), false) + ); + } + + public void testEmptyFieldAndExcludeFields() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + List fields = null; + List excludeFields = null; + if (randomBoolean()) { + fields = new ArrayList<>(); + } else { + excludeFields = new ArrayList<>(); + } + String targetField = "fingerprint"; + if (randomBoolean()) { + targetField = randomAlphaOfLength(10); + } + + String hashMethod = randomFrom(hashMethods); + Processor processor = createFingerprintProcessor(fields, excludeFields, targetField, hashMethod, false); + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetField), equalTo(true)); + } + + public void testIgnoreMissing() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + String nonExistingFieldName = RandomDocumentPicks.randomNonExistingFieldName(random(), ingestDocument); + List nonExistingFields = List.of(nonExistingFieldName); + Processor processor = createFingerprintProcessor(nonExistingFields, null, "fingerprint", randomFrom(hashMethods), false); + assertThrows( + "field [" + nonExistingFieldName + "] doesn't exist", + IllegalArgumentException.class, + () -> processor.execute(ingestDocument) + ); + + String targetField = "fingerprint"; + Processor processorWithIgnoreMissing = createFingerprintProcessor( + nonExistingFields, + null, + "fingerprint", + randomFrom(hashMethods), + true + ); + processorWithIgnoreMissing.execute(ingestDocument); + assertThat(ingestDocument.hasField(targetField), equalTo(false)); + } + + public void testIgnoreMetadataFields() throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + List metadataFields = ingestDocument.getMetadata() + .keySet() + .stream() + .map(IngestDocument.Metadata::getFieldName) + .collect(Collectors.toList()); + + String existingFieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, randomAlphaOfLength(10)); + List fields = List.of(existingFieldName, metadataFields.get(randomIntBetween(0, metadataFields.size() - 1))); + + String targetField = "fingerprint"; + String algorithm = randomFrom(hashMethods); + Processor processor = createFingerprintProcessor(fields, null, targetField, algorithm, false); + + processor.execute(ingestDocument); + String fingerprint = ingestDocument.getFieldValue(targetField, String.class); + + processor = createFingerprintProcessor(List.of(existingFieldName), null, targetField, algorithm, false); + processor.execute(ingestDocument); + assertThat(ingestDocument.getFieldValue(targetField, String.class), equalTo(fingerprint)); + } + + private FingerprintProcessor createFingerprintProcessor( + List fields, + List excludeFields, + String targetField, + String hashMethod, + boolean ignoreMissing + ) { + return new FingerprintProcessor(randomAlphaOfLength(10), null, fields, excludeFields, targetField, hashMethod, ignoreMissing); + } +} diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml index 2a816f0386667..9bf4faf53a999 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml @@ -86,3 +86,19 @@ - do: nodes.info: {} - contains: { nodes.$cluster_manager.ingest.processors: { type: community_id } } + +--- +"Fingerprint processor exists": + - skip: + version: " - 2.15.99" + features: contains + reason: "fingerprint processor was introduced in 2.16.0 and contains is a newly added assertion" + - do: + cluster.state: {} + + # Get cluster-manager node id + - set: { cluster_manager_node: cluster_manager } + + - do: + nodes.info: {} + - contains: { nodes.$cluster_manager.ingest.processors: { type: fingerprint } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml new file mode 100644 index 0000000000000..04568916239f4 --- /dev/null +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/340_fingerprint_processor.yml @@ -0,0 +1,786 @@ +--- +teardown: + - do: + ingest.delete_pipeline: + id: "1" + ignore: 404 + +--- +"Test creat fingerprint processor": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + catch: /field name cannot be null nor empty/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields": [null] + } + } + ] + } + - do: + catch: /field name cannot be null nor empty/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "exclude_fields": [""] + } + } + ] + } + - do: + catch: /either fields or exclude\_fields can be set/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields": ["foo"], + "exclude_fields": ["bar"] + } + } + ] + } + + - do: + catch: /hash method must be MD5@2.16.0\, SHA\-1@2.16.0, SHA\-256@2.16.0 or SHA3\-256@2.16.0/ + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields": ["foo"], + "hash_method": "non-existing" + } + } + ] + } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo"], + "target_field" : "fingerprint_field", + "hash_method": "SHA-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + } + } + ] + } + - match: { acknowledged: true } + +--- +"Test fingerprint processor with ignore_missing": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo"] + } + } + ] + } + - match: { acknowledged: true } + + - do: + catch: /field \[foo\] doesn't exist/ + index: + index: test + id: 1 + pipeline: "1" + body: { + bar: "bar" + } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar"], + "ignore_missing" : true + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "foo" + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "SHA-1@2.16.0:YqpBTuHXCPV04j/7lGfWeUl8Tyo=" } + +--- +"Test fingerprint processor with custom target field": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo"], + "target_field" : "target" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: "foo" + } + - do: + get: + index: test + id: 1 + - match: { _source.target: "SHA-1@2.16.0:YqpBTuHXCPV04j/7lGfWeUl8Tyo=" } + +--- +"Test fingerprint processor with non-primitive fields and SHA-1": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar", "zoo"] + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "SHA-1@2.16.0:KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 2 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 2 + - match: { _source.fingerprint: "SHA-1@2.16.0:KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields":[] + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 3 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 3 + - match: { _source.fingerprint: "SHA-1@2.16.0:KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "exclude_fields":[] + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 4 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 4 + - match: { _source.fingerprint: "SHA-1@2.16.0:KYJ4pc4ouFmAbgZGp7CfNoykZeo=" } + +--- +"Test fingerprint processor with non-primitive fields and MD5": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar", "zoo"], + "hash_method" : "MD5@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "MD5@2.16.0:NovpcJ+MYHzEZtCewcDPTQ==" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "hash_method" : "MD5@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 2 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 2 + - match: { _source.fingerprint: "MD5@2.16.0:NovpcJ+MYHzEZtCewcDPTQ==" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields":[], + "hash_method" : "MD5@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 3 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 3 + - match: { _source.fingerprint: "MD5@2.16.0:NovpcJ+MYHzEZtCewcDPTQ==" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "exclude_fields":[], + "hash_method" : "MD5@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 4 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 4 + - match: { _source.fingerprint: "MD5@2.16.0:NovpcJ+MYHzEZtCewcDPTQ==" } + + +--- +"Test fingerprint processor with non-primitive fields and SHA-256": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar", "zoo"], + "hash_method" : "SHA-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "SHA-256@2.16.0:Sdlg0BodM3n1my4BvaTfJCPrvHxfrxno0kCLfMaC+XY=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "hash_method" : "SHA-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 2 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 2 + - match: { _source.fingerprint: "SHA-256@2.16.0:Sdlg0BodM3n1my4BvaTfJCPrvHxfrxno0kCLfMaC+XY=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields":[], + "hash_method" : "SHA-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 3 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 3 + - match: { _source.fingerprint: "SHA-256@2.16.0:Sdlg0BodM3n1my4BvaTfJCPrvHxfrxno0kCLfMaC+XY=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "exclude_fields":[], + "hash_method" : "SHA-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 4 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 4 + - match: { _source.fingerprint: "SHA-256@2.16.0:Sdlg0BodM3n1my4BvaTfJCPrvHxfrxno0kCLfMaC+XY=" } + +--- +"Test fingerprint processor with non-primitive fields and SHA3-256": + - skip: + version: " - 2.15.99" + reason: "introduced in 2.16.0" + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields" : ["foo", "bar", "zoo"], + "hash_method" : "SHA3-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 1 + - match: { _source.fingerprint: "SHA3-256@2.16.0:+GZCkMLEMkUA/4IrEZEZZYsVMbZdpJ92ppN3wUsFYOI=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "hash_method" : "SHA3-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 2 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 2 + - match: { _source.fingerprint: "SHA3-256@2.16.0:+GZCkMLEMkUA/4IrEZEZZYsVMbZdpJ92ppN3wUsFYOI=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "fields":[], + "hash_method" : "SHA3-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 3 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 3 + - match: { _source.fingerprint: "SHA3-256@2.16.0:+GZCkMLEMkUA/4IrEZEZZYsVMbZdpJ92ppN3wUsFYOI=" } + + - do: + ingest.put_pipeline: + id: "1" + body: > + { + "processors": [ + { + "fingerprint" : { + "exclude_fields":[], + "hash_method" : "SHA3-256@2.16.0" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 4 + pipeline: "1" + body: { + foo: [1, 2, 3], + bar: { + field: { + innerField: "inner" + } + }, + zoo: null + } + - do: + get: + index: test + id: 4 + - match: { _source.fingerprint: "SHA3-256@2.16.0:+GZCkMLEMkUA/4IrEZEZZYsVMbZdpJ92ppN3wUsFYOI=" } diff --git a/server/src/main/java/org/opensearch/common/hash/MessageDigests.java b/server/src/main/java/org/opensearch/common/hash/MessageDigests.java index f53f60a3a97a3..123bd3489bedb 100644 --- a/server/src/main/java/org/opensearch/common/hash/MessageDigests.java +++ b/server/src/main/java/org/opensearch/common/hash/MessageDigests.java @@ -58,6 +58,7 @@ private static ThreadLocal createThreadLocalMessageDigest(String private static final ThreadLocal MD5_DIGEST = createThreadLocalMessageDigest("MD5"); private static final ThreadLocal SHA_1_DIGEST = createThreadLocalMessageDigest("SHA-1"); private static final ThreadLocal SHA_256_DIGEST = createThreadLocalMessageDigest("SHA-256"); + private static final ThreadLocal SHA3_256_DIGEST = createThreadLocalMessageDigest("SHA3-256"); /** * Returns a {@link MessageDigest} instance for MD5 digests; note @@ -95,6 +96,18 @@ public static MessageDigest sha256() { return get(SHA_256_DIGEST); } + /** + * Returns a {@link MessageDigest} instance for SHA3-256 digests; + * note that the instance returned is thread local and must not be + * shared amongst threads. + * + * @return a thread local {@link MessageDigest} instance that + * provides SHA3-256 message digest functionality. + */ + public static MessageDigest sha3256() { + return get(SHA3_256_DIGEST); + } + private static MessageDigest get(ThreadLocal messageDigest) { MessageDigest instance = messageDigest.get(); instance.reset(); diff --git a/server/src/test/java/org/opensearch/common/hash/MessageDigestsTests.java b/server/src/test/java/org/opensearch/common/hash/MessageDigestsTests.java index 9e793e5487eb8..6b7cfb4c8932c 100644 --- a/server/src/test/java/org/opensearch/common/hash/MessageDigestsTests.java +++ b/server/src/test/java/org/opensearch/common/hash/MessageDigestsTests.java @@ -91,6 +91,31 @@ public void testSha256() throws Exception { ); } + public void testSha3256() throws Exception { + assertHash("a7ffc6f8bf1ed76651c14756a061d662f580ff4de43b49fa82d80a4b80f8434a", "", MessageDigests.sha3256()); + assertHash("3a985da74fe225b2045c172d6bd390bd855f086e3e9d525b46bfe24511431532", "abc", MessageDigests.sha3256()); + assertHash( + "41c0dba2a9d6240849100376a8235e2c82e1b9998a999e21db32dd97496d3376", + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + MessageDigests.sha3256() + ); + assertHash( + "5c8875ae474a3634ba4fd55ec85bffd661f32aca75c6d699d0cdcb6c115891c1", + new String(new char[1000000]).replace("\0", "a"), + MessageDigests.sha3256() + ); + assertHash( + "69070dda01975c8c120c3aada1b282394e7f032fa9cf32f4cb2259a0897dfc04", + "The quick brown fox jumps over the lazy dog", + MessageDigests.sha3256() + ); + assertHash( + "cc80b0b13ba89613d93f02ee7ccbe72ee26c6edfe577f22e63a1380221caedbc", + "The quick brown fox jumps over the lazy cog", + MessageDigests.sha3256() + ); + } + public void testToHexString() throws Exception { BigInteger expected = BigInteger.probablePrime(256, random()); byte[] bytes = expected.toByteArray();