From 7378c04b0145239951fd4f35c8d378faedc76518 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Tue, 1 Oct 2024 16:08:15 +0300 Subject: [PATCH 1/4] refactor: improve RSM config descriptions --- .../config/RemoteStorageManagerConfig.java | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/core/src/main/java/io/aiven/kafka/tieredstorage/config/RemoteStorageManagerConfig.java b/core/src/main/java/io/aiven/kafka/tieredstorage/config/RemoteStorageManagerConfig.java index 6eb88260..5481d656 100644 --- a/core/src/main/java/io/aiven/kafka/tieredstorage/config/RemoteStorageManagerConfig.java +++ b/core/src/main/java/io/aiven/kafka/tieredstorage/config/RemoteStorageManagerConfig.java @@ -41,9 +41,9 @@ import static org.apache.kafka.common.config.ConfigDef.ValidString.in; public class RemoteStorageManagerConfig extends AbstractConfig { - private static final String STORAGE_PREFIX = "storage."; - private static final String FETCH_INDEXES_CACHE_PREFIX = "fetch.indexes.cache."; - private static final String SEGMENT_MANIFEST_CACHE_PREFIX = "fetch.manifest.cache."; + public static final String STORAGE_PREFIX = "storage."; + public static final String FETCH_INDEXES_CACHE_PREFIX = "fetch.indexes.cache."; + public static final String SEGMENT_MANIFEST_CACHE_PREFIX = "fetch.manifest.cache."; private static final String STORAGE_BACKEND_CLASS_CONFIG = STORAGE_PREFIX + "backend.class"; private static final String STORAGE_BACKEND_CLASS_DOC = "The storage backend implementation class"; @@ -55,28 +55,22 @@ public class RemoteStorageManagerConfig extends AbstractConfig { private static final String OBJECT_KEY_PREFIX_MASK_DOC = "Whether to mask path prefix in logs"; private static final String CHUNK_SIZE_CONFIG = "chunk.size"; - private static final String CHUNK_SIZE_DOC = "The chunk size of log files"; + private static final String CHUNK_SIZE_DOC = "Segment files are chunked into smaller parts to allow for faster " + + "processing (e.g. encryption, compression) and for range-fetching. " + + "It is recommended to benchmark this value, starting with 4MiB."; private static final String COMPRESSION_ENABLED_CONFIG = "compression.enabled"; - private static final String COMPRESSION_ENABLED_DOC = "Whether to enable compression"; + private static final String COMPRESSION_ENABLED_DOC = "Segments can be further compressed to optimize storage " + + "usage. Disabled by default."; private static final String COMPRESSION_HEURISTIC_ENABLED_CONFIG = "compression.heuristic.enabled"; - private static final String COMPRESSION_HEURISTIC_ENABLED_DOC = "Whether to use compression heuristics " - + "when compression is enabled"; + private static final String COMPRESSION_HEURISTIC_ENABLED_DOC = "Only compress segments where native compression " + + "has not been enabled. This is currently validated by looking into the first batch header. " + + "Only enabled if " + COMPRESSION_ENABLED_CONFIG + " is enabled."; private static final String ENCRYPTION_CONFIG = "encryption.enabled"; - private static final String ENCRYPTION_DOC = "Whether to enable encryption"; - // TODO add possibility to pass keys as strings - - - public static final String METRICS_NUM_SAMPLES_CONFIG = CommonClientConfigs.METRICS_NUM_SAMPLES_CONFIG; - private static final String METRICS_NUM_SAMPLES_DOC = CommonClientConfigs.METRICS_NUM_SAMPLES_DOC; - - public static final String METRICS_SAMPLE_WINDOW_MS_CONFIG = CommonClientConfigs.METRICS_SAMPLE_WINDOW_MS_CONFIG; - private static final String METRICS_SAMPLE_WINDOW_MS_DOC = CommonClientConfigs.METRICS_SAMPLE_WINDOW_MS_DOC; - - public static final String METRICS_RECORDING_LEVEL_CONFIG = CommonClientConfigs.METRICS_RECORDING_LEVEL_CONFIG; - private static final String METRICS_RECORDING_LEVEL_DOC = CommonClientConfigs.METRICS_RECORDING_LEVEL_DOC; + private static final String ENCRYPTION_DOC = "Segments and indexes can be encrypted, so objects are not accessible " + + "by accessing the remote storage. Disabled by default."; private static final String CUSTOM_METADATA_FIELDS_INCLUDE_CONFIG = "custom.metadata.fields.include"; private static final String CUSTOM_METADATA_FIELDS_INCLUDE_DOC = "Custom Metadata to be stored along " @@ -88,14 +82,19 @@ public class RemoteStorageManagerConfig extends AbstractConfig { + "(therefore read from disk) per second. Rate limit must be equal or larger than 1 MiB/sec " + "as minimal upload throughput."; - private static final ConfigDef CONFIG; + public static final String METRICS_NUM_SAMPLES_CONFIG = CommonClientConfigs.METRICS_NUM_SAMPLES_CONFIG; + private static final String METRICS_NUM_SAMPLES_DOC = CommonClientConfigs.METRICS_NUM_SAMPLES_DOC; + + public static final String METRICS_SAMPLE_WINDOW_MS_CONFIG = CommonClientConfigs.METRICS_SAMPLE_WINDOW_MS_CONFIG; + private static final String METRICS_SAMPLE_WINDOW_MS_DOC = CommonClientConfigs.METRICS_SAMPLE_WINDOW_MS_DOC; - static { - CONFIG = new ConfigDef(); + public static final String METRICS_RECORDING_LEVEL_CONFIG = CommonClientConfigs.METRICS_RECORDING_LEVEL_CONFIG; + private static final String METRICS_RECORDING_LEVEL_DOC = CommonClientConfigs.METRICS_RECORDING_LEVEL_DOC; - // TODO checkers + public static ConfigDef configDef() { + final ConfigDef configDef = new ConfigDef(); - CONFIG.define( + configDef.define( STORAGE_BACKEND_CLASS_CONFIG, ConfigDef.Type.CLASS, ConfigDef.NO_DEFAULT_VALUE, @@ -103,7 +102,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { STORAGE_BACKEND_CLASS_DOC ); - CONFIG.define( + configDef.define( OBJECT_KEY_PREFIX_CONFIG, ConfigDef.Type.STRING, "", @@ -112,7 +111,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { OBJECT_KEY_PREFIX_DOC ); - CONFIG.define( + configDef.define( OBJECT_KEY_PREFIX_MASK_CONFIG, ConfigDef.Type.BOOLEAN, false, @@ -120,7 +119,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { OBJECT_KEY_PREFIX_MASK_DOC ); - CONFIG.define( + configDef.define( CHUNK_SIZE_CONFIG, ConfigDef.Type.INT, ConfigDef.NO_DEFAULT_VALUE, @@ -130,14 +129,14 @@ public class RemoteStorageManagerConfig extends AbstractConfig { CHUNK_SIZE_DOC ); - CONFIG.define( + configDef.define( COMPRESSION_ENABLED_CONFIG, ConfigDef.Type.BOOLEAN, false, ConfigDef.Importance.HIGH, COMPRESSION_ENABLED_DOC ); - CONFIG.define( + configDef.define( COMPRESSION_HEURISTIC_ENABLED_CONFIG, ConfigDef.Type.BOOLEAN, false, @@ -145,7 +144,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { COMPRESSION_HEURISTIC_ENABLED_DOC ); - CONFIG.define( + configDef.define( ENCRYPTION_CONFIG, ConfigDef.Type.BOOLEAN, false, @@ -153,21 +152,21 @@ public class RemoteStorageManagerConfig extends AbstractConfig { ENCRYPTION_DOC ); - CONFIG.define( + configDef.define( METRICS_SAMPLE_WINDOW_MS_CONFIG, ConfigDef.Type.LONG, 30000, atLeast(1), ConfigDef.Importance.LOW, METRICS_SAMPLE_WINDOW_MS_DOC); - CONFIG.define( + configDef.define( METRICS_NUM_SAMPLES_CONFIG, ConfigDef.Type.INT, 2, atLeast(1), ConfigDef.Importance.LOW, METRICS_NUM_SAMPLES_DOC); - CONFIG.define( + configDef.define( METRICS_RECORDING_LEVEL_CONFIG, ConfigDef.Type.STRING, Sensor.RecordingLevel.INFO.toString(), @@ -177,14 +176,14 @@ public class RemoteStorageManagerConfig extends AbstractConfig { ConfigDef.Importance.LOW, METRICS_RECORDING_LEVEL_DOC); - CONFIG.define(CUSTOM_METADATA_FIELDS_INCLUDE_CONFIG, + configDef.define(CUSTOM_METADATA_FIELDS_INCLUDE_CONFIG, ConfigDef.Type.LIST, "", ConfigDef.ValidList.in(SegmentCustomMetadataField.names()), ConfigDef.Importance.LOW, CUSTOM_METADATA_FIELDS_INCLUDE_DOC); - CONFIG.define( + configDef.define( UPLOAD_RATE_LIMIT_BYTES_CONFIG, ConfigDef.Type.INT, null, @@ -193,6 +192,7 @@ public class RemoteStorageManagerConfig extends AbstractConfig { ConfigDef.Importance.MEDIUM, UPLOAD_RATE_LIMIT_BYTES_DOC ); + return configDef; } public OptionalInt uploadRateLimit() { @@ -296,7 +296,7 @@ private static String privateKeyFileConfig(final String keyPairId) { private final EncryptionConfig encryptionConfig; public RemoteStorageManagerConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); encryptionConfig = encryptionEnabled() ? EncryptionConfig.create(props) : null; validate(); } From cbf4d8073cf165dd3a9fdc03f3eb7016f8175fa0 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Thu, 3 Oct 2024 17:22:46 +0300 Subject: [PATCH 2/4] refactor: add toString to validators Needed to print the right valid values on docs --- .../tieredstorage/config/validators/NonEmptyPassword.java | 5 +++++ .../aiven/kafka/tieredstorage/config/validators/Null.java | 5 +++++ .../kafka/tieredstorage/config/validators/Subclass.java | 6 +++++- .../kafka/tieredstorage/config/validators/ValidUrl.java | 5 +++++ .../kafka/tieredstorage/storage/gcs/GcsStorageConfig.java | 5 +++++ 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPassword.java b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPassword.java index 72528577..53c2cbbe 100644 --- a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPassword.java +++ b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/NonEmptyPassword.java @@ -33,4 +33,9 @@ public void ensureValid(final String name, final Object value) { throw new ConfigException(name + " value must not be empty"); } } + + @Override + public String toString() { + return "Non-empty password text"; + } } diff --git a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Null.java b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Null.java index fbf9a957..d4c095e3 100644 --- a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Null.java +++ b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Null.java @@ -44,5 +44,10 @@ public void ensureValid(final String name, final Object value) { validator.ensureValid(name, value); } } + + @Override + public String toString() { + return "null or " + validator.toString(); + } } diff --git a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Subclass.java b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Subclass.java index d9440d7b..5bc837e8 100644 --- a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Subclass.java +++ b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/Subclass.java @@ -40,5 +40,9 @@ public void ensureValid(final String name, final Object value) { throw new ConfigException(name + " should be a subclass of " + parentClass.getCanonicalName()); } } - + + @Override + public String toString() { + return "Any implementation of " + parentClass.getName(); + } } diff --git a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrl.java b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrl.java index 03bf7336..39c5a3fd 100644 --- a/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrl.java +++ b/commons/src/main/java/io/aiven/kafka/tieredstorage/config/validators/ValidUrl.java @@ -43,4 +43,9 @@ public void ensureValid(final String name, final Object value) { } } } + + @Override + public String toString() { + return "Valid URL as defined in rfc2396"; + } } diff --git a/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java b/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java index 8a19c2f6..36820379 100644 --- a/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java +++ b/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java @@ -181,5 +181,10 @@ public void ensureValid(final String name, final Object value) { throw new ConfigException(name, value, "Value must be a multiple of 256 KiB (" + MULTIPLIER + " B)"); } } + + @Override + public String toString() { + return "[256 KiB...] values multiple of " + MULTIPLIER + " bytes"; + } } } From 1bfe8a236167282db86210285e5b464e9113b025 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Thu, 3 Oct 2024 17:23:27 +0300 Subject: [PATCH 3/4] refactor: expose configDef() on Storage configs Needed for documentation to be generated centrally. --- .../storage/azure/AzureBlobStorageConfig.java | 8 ++-- .../filesystem/FileSystemStorageConfig.java | 37 +++++++++---------- .../storage/gcs/GcsStorageConfig.java | 10 ++--- .../storage/s3/S3StorageConfig.java | 8 ++-- 4 files changed, 28 insertions(+), 35 deletions(-) diff --git a/storage/azure/src/main/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageConfig.java b/storage/azure/src/main/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageConfig.java index 9f9004ad..f9dc408d 100644 --- a/storage/azure/src/main/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageConfig.java +++ b/storage/azure/src/main/java/io/aiven/kafka/tieredstorage/storage/azure/AzureBlobStorageConfig.java @@ -54,10 +54,8 @@ public class AzureBlobStorageConfig extends AbstractConfig { static final int AZURE_UPLOAD_BLOCK_SIZE_MIN = 100 * 1024; static final int AZURE_UPLOAD_BLOCK_SIZE_MAX = Integer.MAX_VALUE; - private static final ConfigDef CONFIG; - - static { - CONFIG = new ConfigDef() + public static final ConfigDef configDef() { + return new ConfigDef() .define( AZURE_ACCOUNT_NAME_CONFIG, ConfigDef.Type.STRING, @@ -112,7 +110,7 @@ public class AzureBlobStorageConfig extends AbstractConfig { private ProxyConfig proxyConfig = null; public AzureBlobStorageConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); validate(); final Map proxyProps = this.originalsWithPrefix(ProxyConfig.PROXY_PREFIX, true); diff --git a/storage/filesystem/src/main/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfig.java b/storage/filesystem/src/main/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfig.java index edf08e53..88c8e713 100644 --- a/storage/filesystem/src/main/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfig.java +++ b/storage/filesystem/src/main/java/io/aiven/kafka/tieredstorage/storage/filesystem/FileSystemStorageConfig.java @@ -22,8 +22,7 @@ import org.apache.kafka.common.config.AbstractConfig; import org.apache.kafka.common.config.ConfigDef; -class FileSystemStorageConfig extends AbstractConfig { - private static final ConfigDef CONFIG; +public class FileSystemStorageConfig extends AbstractConfig { private static final String ROOT_CONFIG = "root"; private static final String ROOT_DOC = "Root directory"; @@ -31,26 +30,26 @@ class FileSystemStorageConfig extends AbstractConfig { private static final String OVERWRITE_ENABLED_CONFIG = "overwrite.enabled"; private static final String OVERWRITE_ENABLED_DOC = "Enable overwriting existing files"; - static { - CONFIG = new ConfigDef(); - CONFIG.define( - ROOT_CONFIG, - ConfigDef.Type.STRING, - ConfigDef.NO_DEFAULT_VALUE, - ConfigDef.Importance.HIGH, - ROOT_DOC - ); - CONFIG.define( - OVERWRITE_ENABLED_CONFIG, - ConfigDef.Type.BOOLEAN, - false, - ConfigDef.Importance.MEDIUM, - OVERWRITE_ENABLED_DOC - ); + public static final ConfigDef configDef() { + return new ConfigDef() + .define( + ROOT_CONFIG, + ConfigDef.Type.STRING, + ConfigDef.NO_DEFAULT_VALUE, + ConfigDef.Importance.HIGH, + ROOT_DOC + ) + .define( + OVERWRITE_ENABLED_CONFIG, + ConfigDef.Type.BOOLEAN, + false, + ConfigDef.Importance.MEDIUM, + OVERWRITE_ENABLED_DOC + ); } FileSystemStorageConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); } final Path root() { diff --git a/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java b/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java index 36820379..263d12cf 100644 --- a/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java +++ b/storage/gcs/src/main/java/io/aiven/kafka/tieredstorage/storage/gcs/GcsStorageConfig.java @@ -30,7 +30,7 @@ import com.google.auth.Credentials; -class GcsStorageConfig extends AbstractConfig { +public class GcsStorageConfig extends AbstractConfig { static final String GCS_BUCKET_NAME_CONFIG = "gcs.bucket.name"; private static final String GCS_BUCKET_NAME_DOC = "GCS bucket to store log segments"; @@ -58,10 +58,8 @@ class GcsStorageConfig extends AbstractConfig { + "Cannot be set together with \"" + GCP_CREDENTIALS_JSON_CONFIG + "\" " + "or \"" + GCP_CREDENTIALS_PATH_CONFIG + "\""; - private static final ConfigDef CONFIG; - - static { - CONFIG = new ConfigDef() + public static final ConfigDef configDef() { + return new ConfigDef() .define( GCS_BUCKET_NAME_CONFIG, ConfigDef.Type.STRING, @@ -108,7 +106,7 @@ class GcsStorageConfig extends AbstractConfig { private ProxyConfig proxyConfig = null; public GcsStorageConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); validate(); final Map proxyProps = this.originalsWithPrefix(ProxyConfig.PROXY_PREFIX, true); diff --git a/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java b/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java index cd13db26..0df700c9 100644 --- a/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java +++ b/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java @@ -90,10 +90,8 @@ public class S3StorageConfig extends AbstractConfig { + "It is disabled by default as Kafka already validates integrity of the files."; - private static final ConfigDef CONFIG; - - static { - CONFIG = new ConfigDef() + public static ConfigDef configDef() { + return new ConfigDef() .define( S3_BUCKET_NAME_CONFIG, ConfigDef.Type.STRING, @@ -177,7 +175,7 @@ public class S3StorageConfig extends AbstractConfig { } public S3StorageConfig(final Map props) { - super(CONFIG, props); + super(configDef(), props); validate(); } From fa952723d61a7e7ae3bb001d77c08bcee6108e6c Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Tue, 1 Oct 2024 16:08:44 +0300 Subject: [PATCH 4/4] docs: Add documentation module Add module to generate config documentation --- .github/workflows/git.yml | 14 + Makefile | 4 + config.rst | 503 ++++++++++++++++++ docs/build.gradle | 33 ++ .../kafka/tieredstorage/misc/ConfigDocs.java | 102 ++++ settings.gradle | 2 + 6 files changed, 658 insertions(+) create mode 100644 config.rst create mode 100644 docs/build.gradle create mode 100644 docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java diff --git a/.github/workflows/git.yml b/.github/workflows/git.yml index 28db6802..9a01fbab 100644 --- a/.github/workflows/git.yml +++ b/.github/workflows/git.yml @@ -10,3 +10,17 @@ jobs: - uses: actions/checkout@v3 - name: Block Fixup Commit Merge uses: alexkappa/block-fixup-merge-action@v2 + + - name: Gen docs + run: make docs + + - name: Check for uncommitted changes + run: | + if [[ -n $(git status -s) ]]; then + echo "There are uncommitted changes after the task:" + git status -s + exit 1 + else + echo "No changes detected." + fi + shell: bash diff --git a/Makefile b/Makefile index 80be5a57..202d7930 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,10 @@ storage/gcs/build/distributions/gcs-$(VERSION).tgz: storage/azure/build/distributions/azure-$(VERSION).tgz: ./gradlew build :storage:azure:distTar -x test -x integrationTest -x e2e:test +.PHONY: docs +docs: + ./gradlew :docs:genConfigDocs + test: build ./gradlew test -x e2e:test diff --git a/config.rst b/config.rst new file mode 100644 index 00000000..36d0c6c8 --- /dev/null +++ b/config.rst @@ -0,0 +1,503 @@ +================= +RemoteStorageManagerConfig +================= +``chunk.size`` + Segment files are chunked into smaller parts to allow for faster processing (e.g. encryption, compression) and for range-fetching. It is recommended to benchmark this value, starting with 4MiB. + + * Type: int + * Valid Values: [1,...,1073741823] + * Importance: high + +``storage.backend.class`` + The storage backend implementation class + + * Type: class + * Importance: high + +``compression.enabled`` + Segments can be further compressed to optimize storage usage. Disabled by default. + + * Type: boolean + * Default: false + * Importance: high + +``compression.heuristic.enabled`` + Only compress segments where native compression has not been enabled. This is currently validated by looking into the first batch header. Only enabled if compression.enabled is enabled. + + * Type: boolean + * Default: false + * Importance: high + +``encryption.enabled`` + Segments and indexes can be encrypted, so objects are not accessible by accessing the remote storage. Disabled by default. + + * Type: boolean + * Default: false + * Importance: high + +``key.prefix`` + The object storage path prefix + + * Type: string + * Default: "" + * Valid Values: non-null string + * Importance: high + +``upload.rate.limit.bytes.per.second`` + Upper bound on bytes to upload (therefore read from disk) per second. Rate limit must be equal or larger than 1 MiB/sec as minimal upload throughput. + + * Type: int + * Default: null + * Valid Values: null or [1048576,...] + * Importance: medium + +``custom.metadata.fields.include`` + Custom Metadata to be stored along Remote Log Segment metadata on Remote Log Metadata Manager back-end. Allowed values: [REMOTE_SIZE, OBJECT_PREFIX, OBJECT_KEY] + + * Type: list + * Default: "" + * Valid Values: [REMOTE_SIZE, OBJECT_PREFIX, OBJECT_KEY] + * Importance: low + +``key.prefix.mask`` + Whether to mask path prefix in logs + + * Type: boolean + * Default: false + * Importance: low + +``metrics.num.samples`` + The number of samples maintained to compute metrics. + + * Type: int + * Default: 2 + * Valid Values: [1,...] + * Importance: low + +``metrics.recording.level`` + The highest recording level for metrics. + + * Type: string + * Default: INFO + * Valid Values: [INFO, DEBUG, TRACE] + * Importance: low + +``metrics.sample.window.ms`` + The window of time a metrics sample is computed over. + + * Type: long + * Default: 30000 (30 seconds) + * Valid Values: [1,...] + * Importance: low + + +================= +SegmentManifestCacheConfig +================= +Under ``fetch.manifest.cache.`` + +``retention.ms`` + Cache retention time ms, where "-1" represents infinite retention + + * Type: long + * Default: 3600000 (1 hour) + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``size`` + Cache size in bytes, where "-1" represents unbounded cache + + * Type: long + * Default: 1000 + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``get.timeout.ms`` + When getting an object from the fetch, how long to wait before timing out. Defaults to 10 sec. + + * Type: long + * Default: 10000 (10 seconds) + * Valid Values: [1,...,9223372036854775807] + * Importance: low + +``thread.pool.size`` + Size for the thread pool used to schedule asynchronous fetching tasks, default to number of processors. + + * Type: int + * Default: 0 + * Valid Values: [0,...,1024] + * Importance: low + + +================= +SegmentIndexesCacheConfig +================= +Under ``fetch.indexes.cache.`` + +``retention.ms`` + Cache retention time ms, where "-1" represents infinite retention + + * Type: long + * Default: 600000 (10 minutes) + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``size`` + Cache size in bytes, where "-1" represents unbounded cache + + * Type: long + * Default: 10485760 + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``get.timeout.ms`` + When getting an object from the fetch, how long to wait before timing out. Defaults to 10 sec. + + * Type: long + * Default: 10000 (10 seconds) + * Valid Values: [1,...,9223372036854775807] + * Importance: low + +``thread.pool.size`` + Size for the thread pool used to schedule asynchronous fetching tasks, default to number of processors. + + * Type: int + * Default: 0 + * Valid Values: [0,...,1024] + * Importance: low + + +================= +ChunkManagerFactoryConfig +================= +``fetch.chunk.cache.class`` + Chunk cache implementation. There are 2 implementations included: io.aiven.kafka.tieredstorage.fetch.cache.MemoryChunkCache and io.aiven.kafka.tieredstorage.fetch.cache.DiskChunkCache + + * Type: class + * Default: null + * Valid Values: Any implementation of io.aiven.kafka.tieredstorage.fetch.cache.ChunkCache + * Importance: medium + + +================= +MemoryChunkCacheConfig +================= +Under ``fetch.chunk.cache.`` + +``size`` + Cache size in bytes, where "-1" represents unbounded cache + + * Type: long + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``prefetch.max.size`` + The amount of data that should be eagerly prefetched and cached + + * Type: int + * Default: 0 + * Valid Values: [0,...,2147483647] + * Importance: medium + +``retention.ms`` + Cache retention time ms, where "-1" represents infinite retention + + * Type: long + * Default: 600000 (10 minutes) + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``get.timeout.ms`` + When getting an object from the fetch, how long to wait before timing out. Defaults to 10 sec. + + * Type: long + * Default: 10000 (10 seconds) + * Valid Values: [1,...,9223372036854775807] + * Importance: low + +``thread.pool.size`` + Size for the thread pool used to schedule asynchronous fetching tasks, default to number of processors. + + * Type: int + * Default: 0 + * Valid Values: [0,...,1024] + * Importance: low + + +================= +DiskChunkCacheConfig +================= +Under ``fetch.chunk.cache.`` + +``path`` + Cache base directory. It is required to exist and be writable prior to the execution of the plugin. + + * Type: string + * Importance: high + +``size`` + Cache size in bytes, where "-1" represents unbounded cache + + * Type: long + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``prefetch.max.size`` + The amount of data that should be eagerly prefetched and cached + + * Type: int + * Default: 0 + * Valid Values: [0,...,2147483647] + * Importance: medium + +``retention.ms`` + Cache retention time ms, where "-1" represents infinite retention + + * Type: long + * Default: 600000 (10 minutes) + * Valid Values: [-1,...,9223372036854775807] + * Importance: medium + +``get.timeout.ms`` + When getting an object from the fetch, how long to wait before timing out. Defaults to 10 sec. + + * Type: long + * Default: 10000 (10 seconds) + * Valid Values: [1,...,9223372036854775807] + * Importance: low + +``thread.pool.size`` + Size for the thread pool used to schedule asynchronous fetching tasks, default to number of processors. + + * Type: int + * Default: 0 + * Valid Values: [0,...,1024] + * Importance: low + + +================= +Storage Backends +================= +Under ``storage.`` + +----------------- +AzureBlobStorageStorageConfig +----------------- +``azure.container.name`` + Azure container to store log segments + + * Type: string + * Valid Values: non-empty string + * Importance: high + +``azure.account.name`` + Azure account name + + * Type: string + * Default: null + * Valid Values: null or non-empty string + * Importance: high + +``azure.account.key`` + Azure account key + + * Type: password + * Default: null + * Valid Values: null or Non-empty password text + * Importance: medium + +``azure.connection.string`` + Azure connection string. Cannot be used together with azure.account.name, azure.account.key, and azure.endpoint.url + + * Type: password + * Default: null + * Valid Values: null or Non-empty password text + * Importance: medium + +``azure.sas.token`` + Azure SAS token + + * Type: password + * Default: null + * Valid Values: null or Non-empty password text + * Importance: medium + +``azure.upload.block.size`` + Size of blocks to use when uploading objects to Azure + + * Type: int + * Default: 5242880 + * Valid Values: [102400,...,2147483647] + * Importance: medium + +``azure.endpoint.url`` + Custom Azure Blob Storage endpoint URL + + * Type: string + * Default: null + * Valid Values: null or Valid URL as defined in rfc2396 + * Importance: low + + +----------------- +AzureBlobStorageStorageConfig +----------------- +``gcs.bucket.name`` + GCS bucket to store log segments + + * Type: string + * Valid Values: non-empty string + * Importance: high + +``gcs.credentials.default`` + Use the default GCP credentials. Cannot be set together with "gcs.credentials.json" or "gcs.credentials.path" + + * Type: boolean + * Default: null + * Importance: medium + +``gcs.credentials.json`` + GCP credentials as a JSON string. Cannot be set together with "gcs.credentials.path" or "gcs.credentials.default" + + * Type: password + * Default: null + * Valid Values: Non-empty password text + * Importance: medium + +``gcs.credentials.path`` + The path to a GCP credentials file. Cannot be set together with "gcs.credentials.json" or "gcs.credentials.default" + + * Type: string + * Default: null + * Valid Values: non-empty string + * Importance: medium + +``gcs.resumable.upload.chunk.size`` + The chunk size for resumable upload. Must be a multiple of 256 KiB (256 x 1024 bytes). Larger chunk sizes typically make uploads faster, but requires bigger memory buffers. The recommended minimum is 8 MiB. The default is 15 MiB. + + * Type: int + * Default: null + * Valid Values: [256 KiB...] values multiple of 262144 bytes + * Importance: medium + +``gcs.endpoint.url`` + Custom GCS endpoint URL. To be used with custom GCS-compatible backends. + + * Type: string + * Default: null + * Valid Values: Valid URL as defined in rfc2396 + * Importance: low + + +----------------- +S3StorageConfig +----------------- +``s3.bucket.name`` + S3 bucket to store log segments + + * Type: string + * Valid Values: non-empty string + * Importance: high + +``s3.region`` + AWS region where S3 bucket is placed + + * Type: string + * Importance: medium + +``aws.access.key.id`` + AWS access key ID. To be used when static credentials are provided. + + * Type: password + * Default: null + * Valid Values: Non-empty password text + * Importance: medium + +``aws.checksum.check.enabled`` + This property is used to enable checksum validation done by AWS library. When set to "false", there will be no validation. It is disabled by default as Kafka already validates integrity of the files. + + * Type: boolean + * Default: false + * Importance: medium + +``aws.secret.access.key`` + AWS secret access key. To be used when static credentials are provided. + + * Type: password + * Default: null + * Valid Values: Non-empty password text + * Importance: medium + +``s3.multipart.upload.part.size`` + Size of parts in bytes to use when uploading. All parts but the last one will have this size. Valid values: between 5MiB and 2GiB + + * Type: int + * Default: 5242880 + * Valid Values: [5242880,...,2147483647] + * Importance: medium + +``aws.certificate.check.enabled`` + This property is used to enable SSL certificate checking for AWS services. When set to "false", the SSL certificate checking for AWS services will be bypassed. Use with caution and always only in a test environment, as disabling certificate lead the storage to be vulnerable to man-in-the-middle attacks. + + * Type: boolean + * Default: true + * Importance: low + +``aws.credentials.provider.class`` + AWS credentials provider. If not set, AWS SDK uses the default software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain + + * Type: class + * Default: null + * Valid Values: Any implementation of software.amazon.awssdk.auth.credentials.AwsCredentialsProvider + * Importance: low + +``s3.api.call.attempt.timeout`` + AWS S3 API call attempt timeout in milliseconds + + * Type: long + * Default: null + * Valid Values: null or [1,...,9223372036854775807] + * Importance: low + +``s3.api.call.timeout`` + AWS S3 API call timeout in milliseconds + + * Type: long + * Default: null + * Valid Values: null or [1,...,9223372036854775807] + * Importance: low + +``s3.endpoint.url`` + Custom S3 endpoint URL. To be used with custom S3-compatible backends (e.g. minio). + + * Type: string + * Default: null + * Valid Values: Valid URL as defined in rfc2396 + * Importance: low + +``s3.path.style.access.enabled`` + Whether to use path style access or virtual hosts. By default, empty value means S3 library will auto-detect. Amazon S3 uses virtual hosts by default (true), but other S3-compatible backends may differ (e.g. minio). + + * Type: boolean + * Default: null + * Importance: low + + +----------------- +FilesystemStorageConfig +----------------- +> Only for development/testing purposes +``root`` + Root directory + + * Type: string + * Importance: high + +``overwrite.enabled`` + Enable overwriting existing files + + * Type: boolean + * Default: false + * Importance: medium + + diff --git a/docs/build.gradle b/docs/build.gradle new file mode 100644 index 00000000..7ae589ca --- /dev/null +++ b/docs/build.gradle @@ -0,0 +1,33 @@ +/* + * Copyright 2024 Aiven Oy + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +plugins { + id 'java-library' +} + +dependencies { + implementation "org.apache.kafka:kafka-clients:$kafkaVersion" + implementation project(":core") + implementation project(":storage:s3") + implementation project(":storage:gcs") + implementation project(":storage:azure") + implementation project(":storage:filesystem") +} + +tasks.register('genConfigDocs', JavaExec) { + classpath = sourceSets.main.runtimeClasspath + mainClass = 'io.aiven.kafka.tieredstorage.misc.ConfigDocs' + standardOutput = new File("config.rst").newOutputStream() +} diff --git a/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java new file mode 100644 index 00000000..d0dcfc5d --- /dev/null +++ b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java @@ -0,0 +1,102 @@ +/* + * Copyright 2024 Aiven Oy + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.aiven.kafka.tieredstorage.misc; + +import org.apache.kafka.common.config.ConfigDef; + +import io.aiven.kafka.tieredstorage.config.ChunkCacheConfig; +import io.aiven.kafka.tieredstorage.config.ChunkManagerFactoryConfig; +import io.aiven.kafka.tieredstorage.config.DiskChunkCacheConfig; +import io.aiven.kafka.tieredstorage.config.RemoteStorageManagerConfig; +import io.aiven.kafka.tieredstorage.fetch.index.MemorySegmentIndexesCache; +import io.aiven.kafka.tieredstorage.fetch.manifest.MemorySegmentManifestCache; +import io.aiven.kafka.tieredstorage.storage.azure.AzureBlobStorageConfig; +import io.aiven.kafka.tieredstorage.storage.filesystem.FileSystemStorageConfig; +import io.aiven.kafka.tieredstorage.storage.gcs.GcsStorageConfig; +import io.aiven.kafka.tieredstorage.storage.s3.S3StorageConfig; + +import static io.aiven.kafka.tieredstorage.config.ChunkManagerFactoryConfig.FETCH_CHUNK_CACHE_PREFIX; +import static io.aiven.kafka.tieredstorage.config.RemoteStorageManagerConfig.FETCH_INDEXES_CACHE_PREFIX; +import static io.aiven.kafka.tieredstorage.config.RemoteStorageManagerConfig.SEGMENT_MANIFEST_CACHE_PREFIX; +import static io.aiven.kafka.tieredstorage.config.RemoteStorageManagerConfig.STORAGE_PREFIX; + +/** + * Gather all config definitions across the project and generate a documentation page + **/ +public class ConfigDocs { + public static void main(final String[] args) { + printSectionTitle("RemoteStorageManagerConfig"); + final var rsmConfigDef = RemoteStorageManagerConfig.configDef(); + System.out.println(rsmConfigDef.toEnrichedRst()); + + printSectionTitle("SegmentManifestCacheConfig"); + System.out.println("Under ``" + SEGMENT_MANIFEST_CACHE_PREFIX + "``\n"); + final var segmentManifestCacheDef = MemorySegmentManifestCache.configDef(); + System.out.println(segmentManifestCacheDef.toEnrichedRst()); + + printSectionTitle("SegmentIndexesCacheConfig"); + System.out.println("Under ``" + FETCH_INDEXES_CACHE_PREFIX + "``\n"); + final var segmentIndexesCacheDef = MemorySegmentIndexesCache.configDef(); + System.out.println(segmentIndexesCacheDef.toEnrichedRst()); + + printSectionTitle("ChunkManagerFactoryConfig"); + final var chunkCacheFactoryDef = ChunkManagerFactoryConfig.configDef(); + System.out.println(chunkCacheFactoryDef.toEnrichedRst()); + + printSectionTitle("MemoryChunkCacheConfig"); + System.out.println("Under ``" + FETCH_CHUNK_CACHE_PREFIX + "``\n"); + final var memChunkCacheDef = ChunkCacheConfig.configDef(new ConfigDef()); + System.out.println(memChunkCacheDef.toEnrichedRst()); + + printSectionTitle("DiskChunkCacheConfig"); + System.out.println("Under ``" + FETCH_CHUNK_CACHE_PREFIX + "``\n"); + final var diskChunkCacheDef = DiskChunkCacheConfig.configDef(); + System.out.println(diskChunkCacheDef.toEnrichedRst()); + + printSectionTitle("Storage Backends"); + System.out.println("Under ``" + STORAGE_PREFIX + "``\n"); + + printSubsectionTitle("AzureBlobStorageStorageConfig"); + final var azBlobStorageConfigDef = AzureBlobStorageConfig.configDef(); + System.out.println(azBlobStorageConfigDef.toEnrichedRst()); + + printSubsectionTitle("AzureBlobStorageStorageConfig"); + final var googleCloudConfigDef = GcsStorageConfig.configDef(); + System.out.println(googleCloudConfigDef.toEnrichedRst()); + + printSubsectionTitle("S3StorageConfig"); + final var s3StorageConfigDef = S3StorageConfig.configDef(); + System.out.println(s3StorageConfigDef.toEnrichedRst()); + + printSubsectionTitle("FilesystemStorageConfig"); + System.out.println("> Only for development/testing purposes"); + final var fsStorageConfigDef = FileSystemStorageConfig.configDef(); + System.out.println(fsStorageConfigDef.toEnrichedRst()); + } + + static void printSectionTitle(final String title) { + System.out.println("=================\n" + + title + "\n" + + "================="); + } + + static void printSubsectionTitle(final String title) { + System.out.println("-----------------\n" + + title + "\n" + + "-----------------"); + } +} diff --git a/settings.gradle b/settings.gradle index 1d6840ab..95924756 100644 --- a/settings.gradle +++ b/settings.gradle @@ -24,3 +24,5 @@ include 'storage:gcs' include 'storage:s3' include 'e2e' include 'commons' +include 'docs' +