diff --git a/.github/workflows/git.yml b/.github/workflows/git.yml index 28db68023..9a01fbab3 100644 --- a/.github/workflows/git.yml +++ b/.github/workflows/git.yml @@ -10,3 +10,17 @@ jobs: - uses: actions/checkout@v3 - name: Block Fixup Commit Merge uses: alexkappa/block-fixup-merge-action@v2 + + - name: Gen docs + run: make docs + + - name: Check for uncommitted changes + run: | + if [[ -n $(git status -s) ]]; then + echo "There are uncommitted changes after the task:" + git status -s + exit 1 + else + echo "No changes detected." + fi + shell: bash diff --git a/Makefile b/Makefile index 80be5a577..202d7930e 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,10 @@ storage/gcs/build/distributions/gcs-$(VERSION).tgz: storage/azure/build/distributions/azure-$(VERSION).tgz: ./gradlew build :storage:azure:distTar -x test -x integrationTest -x e2e:test +.PHONY: docs +docs: + ./gradlew :docs:genConfigDocs + test: build ./gradlew test -x e2e:test diff --git a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPassword.java b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPassword.java index 72528577d..53c2cbbe5 100644 --- a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPassword.java +++ b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPassword.java @@ -33,4 +33,9 @@ public void ensureValid(final String name, final Object value) { throw new ConfigException(name + " value must not be empty"); } } + + @Override + public String toString() { + return "Non-empty password text"; + } } diff --git a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Null.java b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Null.java index fbf9a9578..d4c095e36 100644 --- a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Null.java +++ b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Null.java @@ -44,5 +44,10 @@ public void ensureValid(final String name, final Object value) { validator.ensureValid(name, value); } } + + @Override + public String toString() { + return "null or " + validator.toString(); + } } diff --git a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Subclass.java b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Subclass.java index d9440d7b0..5bc837e87 100644 --- a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Subclass.java +++ b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Subclass.java @@ -40,5 +40,9 @@ public void ensureValid(final String name, final Object value) { throw new ConfigException(name + " should be a subclass of " + parentClass.getCanonicalName()); } } - + + @Override + public String toString() { + return "Any implementation of " + parentClass.getName(); + } } diff --git a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrl.java b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrl.java index 03bf73367..39c5a3fd2 100644 --- a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrl.java +++ b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrl.java @@ -43,4 +43,9 @@ public void ensureValid(final String name, final Object value) { } } } + + @Override + public String toString() { + return "Valid URL as defined in rfc2396"; + } } diff --git a/config.rst b/config.rst new file mode 100644 index 000000000..36d0c6c8b --- /dev/null +++ b/config.rst @@ -0,0 +1,503 @@ +================= +RemoteStorageManagerConfig +================= +``chunk.size`` + Segment files are chunked into smaller parts to allow for faster processing (e.g. encryption, compression) and for range-fetching. It is recommended to benchmark this value, starting with 4MiB. + + * Type: int + * Valid Values: [1,...,1073741823] + * Importance: high + +``storage.backend.class`` + The storage backend implementation class + + * Type: class + * Importance: high + +``compression.enabled`` + Segments can be further compressed to optimize storage usage. Disabled by default. + + * Type: boolean + * Default: false + * Importance: high + +``compression.heuristic.enabled`` + Only compress segments where native compression has not been enabled. This is currently validated by looking into the first batch header. Only enabled if compression.enabled is enabled. + + * Type: boolean + * Default: false + * Importance: high + +``encryption.enabled`` + Segments and indexes can be encrypted, so objects are not accessible by accessing the remote storage. Disabled by default. + + * Type: boolean + * Default: false + * Importance: high + +``key.prefix`` + The object storage path prefix + + * Type: string + * Default: "" + * Valid Values: non-null string + * Importance: high + +``upload.rate.limit.bytes.per.second`` + Upper bound on bytes to upload (therefore read from disk) per second. Rate limit must be equal or larger than 1 MiB/sec as minimal upload throughput. + + * Type: int + * Default: null + * Valid Values: null or [1048576,...] + * Importance: medium + +``custom.metadata.fields.include`` + Custom Metadata to be stored along Remote Log Segment metadata on Remote Log Metadata Manager back-end. Allowed values: [REMOTE_SIZE, OBJECT_PREFIX, OBJECT_KEY] + + * Type: list + * Default: "" + * Valid Values: [REMOTE_SIZE, OBJECT_PREFIX, OBJECT_KEY] + * Importance: low + +``key.prefix.mask`` + Whether to mask path prefix in logs + + * Type: boolean + * Default: false + * Importance: low + +``metrics.num.samples`` + The number of samples maintained to compute metrics. + + * Type: int + * Default: 2 + * Valid Values: [1,...] + * Importance: low + +``metrics.recording.level`` + The highest recording level for metrics. + + * Type: string + * Default: INFO + * Valid Values: [INFO, DEBUG, TRACE] + * Importance: low + +``metrics.sample.window.ms`` + The window of time a metrics sample is computed over. + + * Type: long + * Default: 30000 (30 seconds) + * Valid Values: [1,...] + * Importance: low + + +================= +SegmentManifestCacheConfig +================= +Under ``fetch.manifest.cache.`` + +``retention.ms`` + Cache retention time ms, where "-1" represents infinite retention + + * Type: long + * Default: 3600000 (1 hour) + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``size`` + Cache size in bytes, where "-1" represents unbounded cache + + * Type: long + * Default: 1000 + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``get.timeout.ms`` + When getting an object from the fetch, how long to wait before timing out. Defaults to 10 sec. + + * Type: long + * Default: 10000 (10 seconds) + * Valid Values: [1,...,9223372036854775807] + * Importance: low + +``thread.pool.size`` + Size for the thread pool used to schedule asynchronous fetching tasks, default to number of processors. + + * Type: int + * Default: 0 + * Valid Values: [0,...,1024] + * Importance: low + + +================= +SegmentIndexesCacheConfig +================= +Under ``fetch.indexes.cache.`` + +``retention.ms`` + Cache retention time ms, where "-1" represents infinite retention + + * Type: long + * Default: 600000 (10 minutes) + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``size`` + Cache size in bytes, where "-1" represents unbounded cache + + * Type: long + * Default: 10485760 + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``get.timeout.ms`` + When getting an object from the fetch, how long to wait before timing out. Defaults to 10 sec. + + * Type: long + * Default: 10000 (10 seconds) + * Valid Values: [1,...,9223372036854775807] + * Importance: low + +``thread.pool.size`` + Size for the thread pool used to schedule asynchronous fetching tasks, default to number of processors. + + * Type: int + * Default: 0 + * Valid Values: [0,...,1024] + * Importance: low + + +================= +ChunkManagerFactoryConfig +================= +``fetch.chunk.cache.class`` + Chunk cache implementation. There are 2 implementations included: io.aiven.kafka.tieredstorage.fetch.cache.MemoryChunkCache and io.aiven.kafka.tieredstorage.fetch.cache.DiskChunkCache + + * Type: class + * Default: null + * Valid Values: Any implementation of io.aiven.kafka.tieredstorage.fetch.cache.ChunkCache + * Importance: medium + + +================= +MemoryChunkCacheConfig +================= +Under ``fetch.chunk.cache.`` + +``size`` + Cache size in bytes, where "-1" represents unbounded cache + + * Type: long + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``prefetch.max.size`` + The amount of data that should be eagerly prefetched and cached + + * Type: int + * Default: 0 + * Valid Values: [0,...,2147483647] + * Importance: medium + +``retention.ms`` + Cache retention time ms, where "-1" represents infinite retention + + * Type: long + * Default: 600000 (10 minutes) + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``get.timeout.ms`` + When getting an object from the fetch, how long to wait before timing out. Defaults to 10 sec. + + * Type: long + * Default: 10000 (10 seconds) + * Valid Values: [1,...,9223372036854775807] + * Importance: low + +``thread.pool.size`` + Size for the thread pool used to schedule asynchronous fetching tasks, default to number of processors. + + * Type: int + * Default: 0 + * Valid Values: [0,...,1024] + * Importance: low + + +================= +DiskChunkCacheConfig +================= +Under ``fetch.chunk.cache.`` + +``path`` + Cache base directory. It is required to exist and be writable prior to the execution of the plugin. + + * Type: string + * Importance: high + +``size`` + Cache size in bytes, where "-1" represents unbounded cache + + * Type: long + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``prefetch.max.size`` + The amount of data that should be eagerly prefetched and cached + + * Type: int + * Default: 0 + * Valid Values: [0,...,2147483647] + * Importance: medium + +``retention.ms`` + Cache retention time ms, where "-1" represents infinite retention + + * Type: long + * Default: 600000 (10 minutes) + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``get.timeout.ms`` + When getting an object from the fetch, how long to wait before timing out. Defaults to 10 sec. + + * Type: long + * Default: 10000 (10 seconds) + * Valid Values: [1,...,9223372036854775807] + * Importance: low + +``thread.pool.size`` + Size for the thread pool used to schedule asynchronous fetching tasks, default to number of processors. + + * Type: int + * Default: 0 + * Valid Values: [0,...,1024] + * Importance: low + + +================= +Storage Backends +================= +Under ``storage.`` + +----------------- +AzureBlobStorageStorageConfig +----------------- +``azure.container.name`` + Azure container to store log segments + + * Type: string + * Valid Values: non-empty string + * Importance: high + +``azure.account.name`` + Azure account name + + * Type: string + * Default: null + * Valid Values: null or non-empty string + * Importance: high + +``azure.account.key`` + Azure account key + + * Type: password + * Default: null + * Valid Values: null or Non-empty password text + * Importance: medium + +``azure.connection.string`` + Azure connection string. Cannot be used together with azure.account.name, azure.account.key, and azure.endpoint.url + + * Type: password + * Default: null + * Valid Values: null or Non-empty password text + * Importance: medium + +``azure.sas.token`` + Azure SAS token + + * Type: password + * Default: null + * Valid Values: null or Non-empty password text + * Importance: medium + +``azure.upload.block.size`` + Size of blocks to use when uploading objects to Azure + + * Type: int + * Default: 5242880 + * Valid Values: [102400,...,2147483647] + * Importance: medium + +``azure.endpoint.url`` + Custom Azure Blob Storage endpoint URL + + * Type: string + * Default: null + * Valid Values: null or Valid URL as defined in rfc2396 + * Importance: low + + +----------------- +AzureBlobStorageStorageConfig +----------------- +``gcs.bucket.name`` + GCS bucket to store log segments + + * Type: string + * Valid Values: non-empty string + * Importance: high + +``gcs.credentials.default`` + Use the default GCP credentials. Cannot be set together with "gcs.credentials.json" or "gcs.credentials.path" + + * Type: boolean + * Default: null + * Importance: medium + +``gcs.credentials.json`` + GCP credentials as a JSON string. Cannot be set together with "gcs.credentials.path" or "gcs.credentials.default" + + * Type: password + * Default: null + * Valid Values: Non-empty password text + * Importance: medium + +``gcs.credentials.path`` + The path to a GCP credentials file. Cannot be set together with "gcs.credentials.json" or "gcs.credentials.default" + + * Type: string + * Default: null + * Valid Values: non-empty string + * Importance: medium + +``gcs.resumable.upload.chunk.size`` + The chunk size for resumable upload. Must be a multiple of 256 KiB (256 x 1024 bytes). Larger chunk sizes typically make uploads faster, but requires bigger memory buffers. The recommended minimum is 8 MiB. The default is 15 MiB. + + * Type: int + * Default: null + * Valid Values: [256 KiB...] values multiple of 262144 bytes + * Importance: medium + +``gcs.endpoint.url`` + Custom GCS endpoint URL. To be used with custom GCS-compatible backends. + + * Type: string + * Default: null + * Valid Values: Valid URL as defined in rfc2396 + * Importance: low + + +----------------- +S3StorageConfig +----------------- +``s3.bucket.name`` + S3 bucket to store log segments + + * Type: string + * Valid Values: non-empty string + * Importance: high + +``s3.region`` + AWS region where S3 bucket is placed + + * Type: string + * Importance: medium + +``aws.access.key.id`` + AWS access key ID. To be used when static credentials are provided. + + * Type: password + * Default: null + * Valid Values: Non-empty password text + * Importance: medium + +``aws.checksum.check.enabled`` + This property is used to enable checksum validation done by AWS library. When set to "false", there will be no validation. It is disabled by default as Kafka already validates integrity of the files. + + * Type: boolean + * Default: false + * Importance: medium + +``aws.secret.access.key`` + AWS secret access key. To be used when static credentials are provided. + + * Type: password + * Default: null + * Valid Values: Non-empty password text + * Importance: medium + +``s3.multipart.upload.part.size`` + Size of parts in bytes to use when uploading. All parts but the last one will have this size. Valid values: between 5MiB and 2GiB + + * Type: int + * Default: 5242880 + * Valid Values: [5242880,...,2147483647] + * Importance: medium + +``aws.certificate.check.enabled`` + This property is used to enable SSL certificate checking for AWS services. When set to "false", the SSL certificate checking for AWS services will be bypassed. Use with caution and always only in a test environment, as disabling certificate lead the storage to be vulnerable to man-in-the-middle attacks. + + * Type: boolean + * Default: true + * Importance: low + +``aws.credentials.provider.class`` + AWS credentials provider. If not set, AWS SDK uses the default software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain + + * Type: class + * Default: null + * Valid Values: Any implementation of software.amazon.awssdk.auth.credentials.AwsCredentialsProvider + * Importance: low + +``s3.api.call.attempt.timeout`` + AWS S3 API call attempt timeout in milliseconds + + * Type: long + * Default: null + * Valid Values: null or [1,...,9223372036854775807] + * Importance: low + +``s3.api.call.timeout`` + AWS S3 API call timeout in milliseconds + + * Type: long + * Default: null + * Valid Values: null or [1,...,9223372036854775807] + * Importance: low + +``s3.endpoint.url`` + Custom S3 endpoint URL. To be used with custom S3-compatible backends (e.g. minio). + + * Type: string + * Default: null + * Valid Values: Valid URL as defined in rfc2396 + * Importance: low + +``s3.path.style.access.enabled`` + Whether to use path style access or virtual hosts. By default, empty value means S3 library will auto-detect. Amazon S3 uses virtual hosts by default (true), but other S3-compatible backends may differ (e.g. minio). + + * Type: boolean + * Default: null + * Importance: low + + +----------------- +FilesystemStorageConfig +----------------- +> Only for development/testing purposes +``root`` + Root directory + + * Type: string + * Importance: high + +``overwrite.enabled`` + Enable overwriting existing files + + * Type: boolean + * Default: false + * Importance: medium + + diff --git a/core/src/main/java/io/aiven/kafka/tieredstorage/config/RemoteStorageManagerConfig.java b/core/src/main/java/io/aiven/kafka/tieredstorage/config/RemoteStorageManagerConfig.java index 6eb88260f..5481d656b 100644 --- a/core/src/main/java/io/aiven/kafka/tieredstorage/config/RemoteStorageManagerConfig.java +++ b/core/src/main/java/io/aiven/kafka/tieredstorage/config/RemoteStorageManagerConfig.java @@ -41,9 +41,9 @@ import static org.apache.kafka.common.config.ConfigDef.ValidString.in; public class RemoteStorageManagerConfig extends AbstractConfig { - private static final String STORAGE_PREFIX = "storage."; - private static final String FETCH_INDEXES_CACHE_PREFIX = "fetch.indexes.cache."; - private static final String SEGMENT_MANIFEST_CACHE_PREFIX = "fetch.manifest.cache."; + public static final String STORAGE_PREFIX = "storage."; + public static final String FETCH_INDEXES_CACHE_PREFIX = "fetch.indexes.cache."; + public static final String SEGMENT_MANIFEST_CACHE_PREFIX = "fetch.manifest.cache."; private static final String STORAGE_BACKEND_CLASS_CONFIG = STORAGE_PREFIX + "backend.class"; private static final String STORAGE_BACKEND_CLASS_DOC = "The storage backend implementation class"; @@ -55,28 +55,22 @@ public class RemoteStorageManagerConfig extends AbstractConfig { private static final String OBJECT_KEY_PREFIX_MASK_DOC = "Whether to mask path prefix in logs"; private static final String CHUNK_SIZE_CONFIG = "chunk.size"; - private static final String CHUNK_SIZE_DOC = "The chunk size of log files"; + private static final String CHUNK_SIZE_DOC = "Segment files are chunked into smaller parts to allow for faster " + + "processing (e.g. encryption, compression) and for range-fetching. " + + "It is recommended to benchmark this value, starting with 4MiB."; private static final String COMPRESSION_ENABLED_CONFIG = "compression.enabled"; - private static final String COMPRESSION_ENABLED_DOC = "Whether to enable compression"; + private static final String COMPRESSION_ENABLED_DOC = "Segments can be further compressed to optimize storage " + + "usage. Disabled by default."; private static final String COMPRESSION_HEURISTIC_ENABLED_CONFIG = "compression.heuristic.enabled"; - private static final String COMPRESSION_HEURISTIC_ENABLED_DOC = "Whether to use compression heuristics " - + "when compression is enabled"; + private static final String COMPRESSION_HEURISTIC_ENABLED_DOC = "Only compress segments where native compression " + + "has not been enabled. This is currently validated by looking into the first batch header. " + + "Only enabled if " + COMPRESSION_ENABLED_CONFIG + " is enabled."; private static final String ENCRYPTION_CONFIG = "encryption.enabled"; - private static final String ENCRYPTION_DOC = "Whether to enable encryption"; - // TODO add possibility to pass keys as strings - - - public static final String METRICS_NUM_SAMPLES_CONFIG = CommonClientConfigs.METRICS_NUM_SAMPLES_CONFIG; - private static final String METRICS_NUM_SAMPLES_DOC = CommonClientConfigs.METRICS_NUM_SAMPLES_DOC; - - public static final String METRICS_SAMPLE_WINDOW_MS_CONFIG = CommonClientConfigs.METRICS_SAMPLE_WINDOW_MS_CONFIG; - private static final String METRICS_SAMPLE_WINDOW_MS_DOC = CommonClientConfigs.METRICS_SAMPLE_WINDOW_MS_DOC; - - public static final String METRICS_RECORDING_LEVEL_CONFIG = CommonClientConfigs.METRICS_RECORDING_LEVEL_CONFIG; - private static final String METRICS_RECORDING_LEVEL_DOC = CommonClientConfigs.METRICS_RECORDING_LEVEL_DOC; + private static final String ENCRYPTION_DOC = "Segments and indexes can be encrypted, so objects are not accessible " + + "by accessing the remote storage. Disabled by default."; private static final String CUSTOM_METADATA_FIELDS_INCLUDE_CONFIG = "custom.metadata.fields.include"; private static final String CUSTOM_METADATA_FIELDS_INCLUDE_DOC = "Custom Metadata to be stored along " @@ -88,14 +82,19 @@ public class RemoteStorageManagerConfig extends AbstractConfig { + "(therefore read from disk) per second. Rate limit must be equal or larger than 1 MiB/sec " + "as minimal upload throughput."; - private static final ConfigDef CONFIG; + public static final String METRICS_NUM_SAMPLES_CONFIG = CommonClientConfigs.METRICS_NUM_SAMPLES_CONFIG; + private static final String METRICS_NUM_SAMPLES_DOC = CommonClientConfigs.METRICS_NUM_SAMPLES_DOC; + + public static final String METRICS_SAMPLE_WINDOW_MS_CONFIG = CommonClientConfigs.METRICS_SAMPLE_WINDOW_MS_CONFIG; + private static final String METRICS_SAMPLE_WINDOW_MS_DOC = CommonClientConfigs.METRICS_SAMPLE_WINDOW_MS_DOC; - static { - CONFIG = new ConfigDef(); + public static final String METRICS_RECORDING_LEVEL_CONFIG = CommonClientConfigs.METRICS_RECORDING_LEVEL_CONFIG; + private static final String METRICS_RECORDING_LEVEL_DOC = CommonClientConfigs.METRICS_RECORDING_LEVEL_DOC; - // TODO checkers + public static ConfigDef configDef() { + final ConfigDef configDef = new ConfigDef(); - CONFIG.define( + configDef.define( STORAGE_BACKEND_CLASS_CONFIG, ConfigDef.Type.CLASS, ConfigDef.NO_DEFAULT_VALUE, @@ -103,7 +102,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { STORAGE_BACKEND_CLASS_DOC ); - CONFIG.define( + configDef.define( OBJECT_KEY_PREFIX_CONFIG, ConfigDef.Type.STRING, "", @@ -112,7 +111,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { OBJECT_KEY_PREFIX_DOC ); - CONFIG.define( + configDef.define( OBJECT_KEY_PREFIX_MASK_CONFIG, ConfigDef.Type.BOOLEAN, false, @@ -120,7 +119,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { OBJECT_KEY_PREFIX_MASK_DOC ); - CONFIG.define( + configDef.define( CHUNK_SIZE_CONFIG, ConfigDef.Type.INT, ConfigDef.NO_DEFAULT_VALUE, @@ -130,14 +129,14 @@ public class RemoteStorageManagerConfig extends AbstractConfig { CHUNK_SIZE_DOC ); - CONFIG.define( + configDef.define( COMPRESSION_ENABLED_CONFIG, ConfigDef.Type.BOOLEAN, false, ConfigDef.Importance.HIGH, COMPRESSION_ENABLED_DOC ); - CONFIG.define( + configDef.define( COMPRESSION_HEURISTIC_ENABLED_CONFIG, ConfigDef.Type.BOOLEAN, false, @@ -145,7 +144,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { COMPRESSION_HEURISTIC_ENABLED_DOC ); - CONFIG.define( + configDef.define( ENCRYPTION_CONFIG, ConfigDef.Type.BOOLEAN, false, @@ -153,21 +152,21 @@ public class RemoteStorageManagerConfig extends AbstractConfig { ENCRYPTION_DOC ); - CONFIG.define( + configDef.define( METRICS_SAMPLE_WINDOW_MS_CONFIG, ConfigDef.Type.LONG, 30000, atLeast(1), ConfigDef.Importance.LOW, METRICS_SAMPLE_WINDOW_MS_DOC); - CONFIG.define( + configDef.define( METRICS_NUM_SAMPLES_CONFIG, ConfigDef.Type.INT, 2, atLeast(1), ConfigDef.Importance.LOW, METRICS_NUM_SAMPLES_DOC); - CONFIG.define( + configDef.define( METRICS_RECORDING_LEVEL_CONFIG, ConfigDef.Type.STRING, Sensor.RecordingLevel.INFO.toString(), @@ -177,14 +176,14 @@ public class RemoteStorageManagerConfig extends AbstractConfig { ConfigDef.Importance.LOW, METRICS_RECORDING_LEVEL_DOC); - CONFIG.define(CUSTOM_METADATA_FIELDS_INCLUDE_CONFIG, + configDef.define(CUSTOM_METADATA_FIELDS_INCLUDE_CONFIG, ConfigDef.Type.LIST, "", ConfigDef.ValidList.in(SegmentCustomMetadataField.names()), ConfigDef.Importance.LOW, CUSTOM_METADATA_FIELDS_INCLUDE_DOC); - CONFIG.define( + configDef.define( UPLOAD_RATE_LIMIT_BYTES_CONFIG, ConfigDef.Type.INT, null, @@ -193,6 +192,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { ConfigDef.Importance.MEDIUM, UPLOAD_RATE_LIMIT_BYTES_DOC ); + return configDef; } public OptionalInt uploadRateLimit() { @@ -296,7 +296,7 @@ private static String privateKeyFileConfig(final String keyPairId) { private final EncryptionConfig encryptionConfig; public RemoteStorageManagerConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); encryptionConfig = encryptionEnabled() ? EncryptionConfig.create(props) : null; validate(); } diff --git a/docs/build.gradle b/docs/build.gradle new file mode 100644 index 000000000..7ae589ca6 --- /dev/null +++ b/docs/build.gradle @@ -0,0 +1,33 @@ +/* + * Copyright 2024 Aiven Oy + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +plugins { + id 'java-library' +} + +dependencies { + implementation "org.apache.kafka:kafka-clients:$kafkaVersion" + implementation project(":core") + implementation project(":storage:s3") + implementation project(":storage:gcs") + implementation project(":storage:azure") + implementation project(":storage:filesystem") +} + +tasks.register('genConfigDocs', JavaExec) { + classpath = sourceSets.main.runtimeClasspath + mainClass = 'io.aiven.kafka.tieredstorage.misc.ConfigDocs' + standardOutput = new File("config.rst").newOutputStream() +} diff --git a/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java new file mode 100644 index 000000000..d0dcfc5d7 --- /dev/null +++ b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java @@ -0,0 +1,102 @@ +/* + * Copyright 2024 Aiven Oy + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.aiven.kafka.tieredstorage.misc; + +import org.apache.kafka.common.config.ConfigDef; + +import io.aiven.kafka.tieredstorage.config.ChunkCacheConfig; +import io.aiven.kafka.tieredstorage.config.ChunkManagerFactoryConfig; +import io.aiven.kafka.tieredstorage.config.DiskChunkCacheConfig; +import io.aiven.kafka.tieredstorage.config.RemoteStorageManagerConfig; +import io.aiven.kafka.tieredstorage.fetch.index.MemorySegmentIndexesCache; +import io.aiven.kafka.tieredstorage.fetch.manifest.MemorySegmentManifestCache; +import io.aiven.kafka.tieredstorage.storage.azure.AzureBlobStorageConfig; +import io.aiven.kafka.tieredstorage.storage.filesystem.FileSystemStorageConfig; +import io.aiven.kafka.tieredstorage.storage.gcs.GcsStorageConfig; +import io.aiven.kafka.tieredstorage.storage.s3.S3StorageConfig; + +import static io.aiven.kafka.tieredstorage.config.ChunkManagerFactoryConfig.FETCH_CHUNK_CACHE_PREFIX; +import static io.aiven.kafka.tieredstorage.config.RemoteStorageManagerConfig.FETCH_INDEXES_CACHE_PREFIX; +import static io.aiven.kafka.tieredstorage.config.RemoteStorageManagerConfig.SEGMENT_MANIFEST_CACHE_PREFIX; +import static io.aiven.kafka.tieredstorage.config.RemoteStorageManagerConfig.STORAGE_PREFIX; + +/** + * Gather all config definitions across the project and generate a documentation page + **/ +public class ConfigDocs { + public static void main(final String[] args) { + printSectionTitle("RemoteStorageManagerConfig"); + final var rsmConfigDef = RemoteStorageManagerConfig.configDef(); + System.out.println(rsmConfigDef.toEnrichedRst()); + + printSectionTitle("SegmentManifestCacheConfig"); + System.out.println("Under ``" + SEGMENT_MANIFEST_CACHE_PREFIX + "``\n"); + final var segmentManifestCacheDef = MemorySegmentManifestCache.configDef(); + System.out.println(segmentManifestCacheDef.toEnrichedRst()); + + printSectionTitle("SegmentIndexesCacheConfig"); + System.out.println("Under ``" + FETCH_INDEXES_CACHE_PREFIX + "``\n"); + final var segmentIndexesCacheDef = MemorySegmentIndexesCache.configDef(); + System.out.println(segmentIndexesCacheDef.toEnrichedRst()); + + printSectionTitle("ChunkManagerFactoryConfig"); + final var chunkCacheFactoryDef = ChunkManagerFactoryConfig.configDef(); + System.out.println(chunkCacheFactoryDef.toEnrichedRst()); + + printSectionTitle("MemoryChunkCacheConfig"); + System.out.println("Under ``" + FETCH_CHUNK_CACHE_PREFIX + "``\n"); + final var memChunkCacheDef = ChunkCacheConfig.configDef(new ConfigDef()); + System.out.println(memChunkCacheDef.toEnrichedRst()); + + printSectionTitle("DiskChunkCacheConfig"); + System.out.println("Under ``" + FETCH_CHUNK_CACHE_PREFIX + "``\n"); + final var diskChunkCacheDef = DiskChunkCacheConfig.configDef(); + System.out.println(diskChunkCacheDef.toEnrichedRst()); + + printSectionTitle("Storage Backends"); + System.out.println("Under ``" + STORAGE_PREFIX + "``\n"); + + printSubsectionTitle("AzureBlobStorageStorageConfig"); + final var azBlobStorageConfigDef = AzureBlobStorageConfig.configDef(); + System.out.println(azBlobStorageConfigDef.toEnrichedRst()); + + printSubsectionTitle("AzureBlobStorageStorageConfig"); + final var googleCloudConfigDef = GcsStorageConfig.configDef(); + System.out.println(googleCloudConfigDef.toEnrichedRst()); + + printSubsectionTitle("S3StorageConfig"); + final var s3StorageConfigDef = S3StorageConfig.configDef(); + System.out.println(s3StorageConfigDef.toEnrichedRst()); + + printSubsectionTitle("FilesystemStorageConfig"); + System.out.println("> Only for development/testing purposes"); + final var fsStorageConfigDef = FileSystemStorageConfig.configDef(); + System.out.println(fsStorageConfigDef.toEnrichedRst()); + } + + static void printSectionTitle(final String title) { + System.out.println("=================\n" + + title + "\n" + + "================="); + } + + static void printSubsectionTitle(final String title) { + System.out.println("-----------------\n" + + title + "\n" + + "-----------------"); + } +} diff --git a/settings.gradle b/settings.gradle index 1d6840abc..959247562 100644 --- a/settings.gradle +++ b/settings.gradle @@ -24,3 +24,5 @@ include 'storage:gcs' include 'storage:s3' include 'e2e' include 'commons' +include 'docs' + diff --git a/storage/azure/src/main/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageConfig.java b/storage/azure/src/main/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageConfig.java index 9f9004ad1..f9dc408d6 100644 --- a/storage/azure/src/main/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageConfig.java +++ b/storage/azure/src/main/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageConfig.java @@ -54,10 +54,8 @@ public class AzureBlobStorageConfig extends AbstractConfig { static final int AZURE_UPLOAD_BLOCK_SIZE_MIN = 100 * 1024; static final int AZURE_UPLOAD_BLOCK_SIZE_MAX = Integer.MAX_VALUE; - private static final ConfigDef CONFIG; - - static { - CONFIG = new ConfigDef() + public static final ConfigDef configDef() { + return new ConfigDef() .define( AZURE_ACCOUNT_NAME_CONFIG, ConfigDef.Type.STRING, @@ -112,7 +110,7 @@ public class AzureBlobStorageConfig extends AbstractConfig { private ProxyConfig proxyConfig = null; public AzureBlobStorageConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); validate(); final Map proxyProps = this.originalsWithPrefix(ProxyConfig.PROXY_PREFIX, true); diff --git a/storage/filesystem/src/main/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfig.java b/storage/filesystem/src/main/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfig.java index edf08e531..88c8e7134 100644 --- a/storage/filesystem/src/main/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfig.java +++ b/storage/filesystem/src/main/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfig.java @@ -22,8 +22,7 @@ import org.apache.kafka.common.config.AbstractConfig; import org.apache.kafka.common.config.ConfigDef; -class FileSystemStorageConfig extends AbstractConfig { - private static final ConfigDef CONFIG; +public class FileSystemStorageConfig extends AbstractConfig { private static final String ROOT_CONFIG = "root"; private static final String ROOT_DOC = "Root directory"; @@ -31,26 +30,26 @@ class FileSystemStorageConfig extends AbstractConfig { private static final String OVERWRITE_ENABLED_CONFIG = "overwrite.enabled"; private static final String OVERWRITE_ENABLED_DOC = "Enable overwriting existing files"; - static { - CONFIG = new ConfigDef(); - CONFIG.define( - ROOT_CONFIG, - ConfigDef.Type.STRING, - ConfigDef.NO_DEFAULT_VALUE, - ConfigDef.Importance.HIGH, - ROOT_DOC - ); - CONFIG.define( - OVERWRITE_ENABLED_CONFIG, - ConfigDef.Type.BOOLEAN, - false, - ConfigDef.Importance.MEDIUM, - OVERWRITE_ENABLED_DOC - ); + public static final ConfigDef configDef() { + return new ConfigDef() + .define( + ROOT_CONFIG, + ConfigDef.Type.STRING, + ConfigDef.NO_DEFAULT_VALUE, + ConfigDef.Importance.HIGH, + ROOT_DOC + ) + .define( + OVERWRITE_ENABLED_CONFIG, + ConfigDef.Type.BOOLEAN, + false, + ConfigDef.Importance.MEDIUM, + OVERWRITE_ENABLED_DOC + ); } FileSystemStorageConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); } final Path root() { diff --git a/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java b/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java index 8a19c2f62..263d12cf5 100644 --- a/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java +++ b/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java @@ -30,7 +30,7 @@ import com.google.auth.Credentials; -class GcsStorageConfig extends AbstractConfig { +public class GcsStorageConfig extends AbstractConfig { static final String GCS_BUCKET_NAME_CONFIG = "gcs.bucket.name"; private static final String GCS_BUCKET_NAME_DOC = "GCS bucket to store log segments"; @@ -58,10 +58,8 @@ class GcsStorageConfig extends AbstractConfig { + "Cannot be set together with \"" + GCP_CREDENTIALS_JSON_CONFIG + "\" " + "or \"" + GCP_CREDENTIALS_PATH_CONFIG + "\""; - private static final ConfigDef CONFIG; - - static { - CONFIG = new ConfigDef() + public static final ConfigDef configDef() { + return new ConfigDef() .define( GCS_BUCKET_NAME_CONFIG, ConfigDef.Type.STRING, @@ -108,7 +106,7 @@ class GcsStorageConfig extends AbstractConfig { private ProxyConfig proxyConfig = null; public GcsStorageConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); validate(); final Map proxyProps = this.originalsWithPrefix(ProxyConfig.PROXY_PREFIX, true); @@ -181,5 +179,10 @@ public void ensureValid(final String name, final Object value) { throw new ConfigException(name, value, "Value must be a multiple of 256 KiB (" + MULTIPLIER + " B)"); } } + + @Override + public String toString() { + return "[256 KiB...] values multiple of " + MULTIPLIER + " bytes"; + } } } diff --git a/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java b/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java index cd13db260..0df700c98 100644 --- a/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java +++ b/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java @@ -90,10 +90,8 @@ public class S3StorageConfig extends AbstractConfig { + "It is disabled by default as Kafka already validates integrity of the files."; - private static final ConfigDef CONFIG; - - static { - CONFIG = new ConfigDef() + public static ConfigDef configDef() { + return new ConfigDef() .define( S3_BUCKET_NAME_CONFIG, ConfigDef.Type.STRING, @@ -177,7 +175,7 @@ public class S3StorageConfig extends AbstractConfig { } public S3StorageConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); validate(); }