From 7ed75fcdaac7f9d1cb2cf98defd789eb5bd9f954 Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Tue, 13 Aug 2024 18:07:55 -0400 Subject: [PATCH] Update release notes for 2.1.3 * Update gems * Fix links to javadoc.io * Remove draft status from 2.1.3 release notes * Line wrap items * Remove some internal, non-user facing items from the notes * Add some missing notable items * Organize items by configurability, observable behavior, bug fix, metrics, and other * Add note about JDK 17 requirement for the build * Add link to the git history --- Gemfile.lock | 6 +- _config.yml | 4 +- _posts/release/2024-08-12-accumulo-2.1.3.md | 233 ++++++++++++-------- 3 files changed, 142 insertions(+), 101 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 8b40c8c78..7d4df355b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -4,14 +4,14 @@ GEM addressable (2.8.7) public_suffix (>= 2.0.2, < 7.0) colorator (1.1.0) - concurrent-ruby (1.3.3) + concurrent-ruby (1.3.4) em-websocket (0.5.3) eventmachine (>= 0.12.9) http_parser.rb (~> 0) eventmachine (1.2.7) ffi (1.17.0-x86_64-linux-gnu) forwardable-extended (2.6.0) - google-protobuf (3.25.3) + google-protobuf (3.25.3-x86_64-linux) http_parser.rb (0.8.0) i18n (1.14.5) concurrent-ruby (~> 1.0) @@ -52,7 +52,7 @@ GEM rb-fsevent (0.11.2) rb-inotify (0.11.1) ffi (~> 1.0) - rexml (3.3.3) + rexml (3.3.5) strscan rouge (4.3.0) safe_yaml (1.0.5) diff --git a/_config.yml b/_config.yml index 791ea7af1..7aa94713c 100644 --- a/_config.yml +++ b/_config.yml @@ -16,7 +16,7 @@ url: "https://accumulo.apache.org" # the base hostname & protocol for your site latest_release: 2.1.3 javadoc_version: 2.1.3 num_home_posts: 5 -javadoc_base: "https://static.javadoc.io/org.apache.accumulo" +javadoc_base: "https://www.javadoc.io/static/org.apache.accumulo" docs_baseurl: "/docs/2.x" # Build settings @@ -75,7 +75,7 @@ defaults: latest_release: "2.1.3" javadoc_version: "2.1.3" docs_baseurl: "/docs/2.x" - javadoc_base: "https://static.javadoc.io/org.apache.accumulo" + javadoc_base: "https://www.javadoc.io/static/org.apache.accumulo" skiph1fortitle: "true" gh_branch: "main" diff --git a/_posts/release/2024-08-12-accumulo-2.1.3.md b/_posts/release/2024-08-12-accumulo-2.1.3.md index cf934d7f1..054b1ee1b 100644 --- a/_posts/release/2024-08-12-accumulo-2.1.3.md +++ b/_posts/release/2024-08-12-accumulo-2.1.3.md @@ -1,7 +1,6 @@ --- title: Apache Accumulo 2.1.3 sortableversion: '02.01.03' -draft: true LTM: true --- ## About @@ -10,115 +9,155 @@ Apache Accumulo 2.1.3 is a patch release of the 2.1 LTM line. It contains bug fixes and minor enhancements. This version supersedes 2.1.2. Users upgrading to 2.1 should upgrade directly to this version instead of 2.1.2. -Included here are some highlights of the most interesting bugs fixed and -features added in 2.1.3. For the full set of changes, please see the commit -history or issue tracker. +## Notable Changes -### Notable Improvements +The full set of changes are too numerous to be useful here. Included here are some highlights of the +most interesting bugs fixed and features added in 2.1.3. For the full set of changes, please see the +commit history or issue tracker milestone links below. -Improvements that affect performance: +### Configurable Improvements + +Many notable improvements have been added that include a means for users to configure them via new +or existing properties to improve system performance or behavior. These include: * {% ghi 3722 %} Adds properties {% plink general.file.name.allocation.batch.size.min %} and - {% plink general.file.name.allocation.batch.size.max%} that allow the batch size - for unique filename allocation in ZooKeeper to be configurable. In a system that requires large numbers - of unique names, larger batch sizes can reduce ZooKeeper contention because more file names can be + {% plink general.file.name.allocation.batch.size.max%} that allow the batch size for unique + filename allocation in ZooKeeper to be configurable. In a system that requires large numbers of + unique names, larger batch sizes can reduce ZooKeeper contention because more file names can be reserved with a single ZooKeeper call. -* {% ghi 3733 %} Avoid creating server side threads when failed writes are cancelled. In versions 2.1.2 - and earlier, the thrift close call creates a new thread to cancel the thrift session. With 2.1.3, a - new thrift method is available to test if a session is reserved and deletes it if it is not reserved - without creating an additional thread. If the new method is not available it falls back to the previous - close method to preserve interoperability between 2.x versions. -* {% ghi 3738 %} Adds parameter {% plink gc.remove.in.use.candidates %}, that enables the Garbage Collector - to remove candidates that have active tablet file references. This is expected to increase the speed of - subsequent GC Runs. -* {% ghi 3756 %} Added new RPC named cancelUpdate that reduces the amount of threads waiting to close failed - batch write sessions. -* {% ghi 4682 %} Changed the ScanServer file reference format in the metadata table to sort by UUID to increase performance. -* {% ghi 4536 %} Created alternate ScanServerSelector implementation that tries to use scan servers on the same host to leverage - shared off-heap-cache usage. -* {% ghi 4544 %} Made scan servers refresh cached list of tablet files before expiration. Added new property - {% plink sserver.cache.metadata.refresh.percent %} to control when refresh happens. -* {% ghi 3813 %} Made compactors use cached address for compaction coordinator when getting next compaction job. - This lowers load on zookeeper when running many compactors. -* {% ghi 3706 %} Avoid unnescessary copying of hadoop config that was causing Accumulo GC slowdown. -* {% ghi 4709 %} Modified Manager balancer code such that the root, metadata, and user tables will be balanced - separately, and in that order. For example, balancing for user tables will not occur while the metadata table is unbalanced. +* {% ghi 3738 %} Adds an experimental property, {% plink gc.remove.in.use.candidates %}, that + enables the Garbage Collector to remove candidates that have active tablet file references, with + the expectation that a new deletion candidate for the file will reappear when the reference is no + longer in use. This is expected to speed up subsequent GC runs by skipping checks for candidates + that were previously known to still be in use. This experimental property defaults to `false`, but + is expected to become the default behavior in future releases. +* {% ghi 4544 %} Made scan servers refresh their cached list of tablet files before expiration using + a new property, {% plink sserver.cache.metadata.refresh.percent %}, to control when the refresh + happens. +* {% ghi 4536 %} Created a ScanServerSelector implementation available to users using the + `scan.server.selector.impl` client property called + `org.apache.accumulo.core.spi.scan.ConfigurableScanServerHostSelector` that tries to use scan + servers on the same host to leverage shared off-heap-cache usage. +* {% ghi 3737 %},{% ghi 3783 %} Addressed some common transport layer errors about max message sizes + being exceeded. The max message size configuration was greatly simplified by removing any + experimental max message size properties, and deprecating the other non-experimental ones, all in + favor of a new common single property, {% plink rpc.message.size.max %}, for all server types. The + default value is now very large to avoid users experiencing errors, but can be constrained by + setting a smaller value. If the new property is set, it overrides any of the deprecated properties + values that might be set. However, if it is not set, the deprecated property values may still be + in effect. It is recommended that users remove the use of the deprecated properties, so that their + servers will use the new property, whose default value should be sufficient for most users. The + deprecated properties will be removed in version 3.1. A related bug was also fixed where the + configured max message size was not being used in some cases. +* {% ghi 3751 %} Added property {% plink rpc.backlog %} to configure backlog size for Thrift server + sockets. +* {% ghi 4468 %} GrepIterator received several new options to control which portions of the Key + and/or Value are matched. The default behavior preserves the previous behavior of matching + anywhere except in the ColumnVisibility. A related change was made to the shell's `grep` command, + which uses this iterator internally, so that it will also match on ColumnVisibility, which it + didn't do before. This means that any existing uses of the GrepIterator will not see any change in + behavior. However, users should be aware that any scripted uses of the shell's `grep` command may + see additional results that were not previously shown if the matched term is found only in the + ColumnVisibility. +* {% ghi 4348 %} When using the generate-splits utility, detect non-printable characters and throw + an error if base64 encoding option was not used; this is a change from the previous behavior which + would have attempted to use a custom hex-encoding for non-printable characters, which could be + unreliable and hide binary data in split points without the user noticing; users are advised to + always use the base64 option whenever there is a chance of a non-printable character appearing in + a split point +* {% ghi 4223 %} Added properties {% plink compactor.wait.time.job.min %} and + {% plink compactor.wait.time.job.max %} to control compactor polling times. +* {% ghi 3725 %} Changed the gc batch size option, {% plink gc.candidate.batch.size %}, to support + memory percentages in addition to fixed memory sizes, and updated the default value accordingly. + +Some notable performance-related improvements may affect the behavior of the system in noticeable +ways, but are not directly configurable by the user. These include: + +* {% ghi 4309 %} Optimized the logic for getting a random TabletServer connection which + substantially improved Shell startup times, as well as startup times of any other Accumulo client. +* {% ghi 3813 %} Reduced the load on ZooKeeper when running many compactors. +* {% ghi 4709 %} Modified Manager balancer code such that the root, metadata, and user tables will + be balanced separately, and in that order. For example, balancing for user tables will not occur + while the metadata table is unbalanced. +* {% ghi 4682 %} Changed the ScanServer file reference format in the metadata table to sort by UUID + to increase performance. ### Notable Bug Fixes -* {% ghi 3721 %} Fixes issue with writes happening in a retry after batch writer was closed. This +* {% ghi 4283 %} Fixed a bug with client side iterators that prevented them from accessing the + plugin environment. +* {% ghi 3721 %} Fixed an issue with writes happening in a retry after batch writer was closed. This strengthens metadata consistency. -* {% ghi 3749 %} Fixes issue where deleting a compaction pool with running compactions would - leave the tserver in a bad state. -* {% ghi 3748 %} Fixes bug where wal could remained locked if an exception occurred. -* {% ghi 3747 %} Adds validation to prevent possible deadlock when acquiring wal locks. -* {% ghi 3737 %} Use custom Transport to set Transport message and frame size. This fixes - a bug where Accumulo would not change the max message size allowed. -* {% ghi #608 %}, {% ghi 3755 %} Add validation to GC that checks that the scanner used by GC to determine - candidates for deletion returned a complete row as a mitigation for {% ghi #608 %} where - garbage collector removes file that are referenced and in-use. -* {% ghi 3744 %} Fixed bug regarding improperly created GCRun logger name. -* {% ghi 3737 %} Adds a custom Transport Factory to set transport message and frame size to avoid infinite loops - as described in {% ghi 3731 %}. -* {% ghi 3750 %} Fixed issue when deleting a compaction thread pool that would leave tablets in a bad state. -* {% ghi 4117 %} Fixed a bug in compaction properties where the replacement `maxOpen` property was being - ignored in favor of the deprecated `open.max` property. +* {% ghi 3749 %},{% ghi 3750 %} Fixed an issue where deleting a compaction pool with running + compactions would leave the tserver in a bad state. +* {% ghi 3748 %} Fixed a bug where a wal could remained locked if an exception occurred. +* {% ghi 608 %}, {% ghi 3755 %} Add validation to GC that checks that the scanner used by GC to + determine candidates for deletion returned a complete row as a mitigation for {% ghi 608 %} which + observed referenced files being removed when they are still in use. +* {% ghi 3744 %} Fixed a bug regarding improperly created GCRun logger name. +* {% ghi 3737 %} Adds a custom Transport Factory to set transport message and frame size to avoid + infinite loops as described in {% ghi 3731 %}. +* {% ghi 4117 %} Fixed a bug in compaction properties where the replacement `maxOpen` property was + being ignored in favor of the deprecated `open.max` property. * {% ghi 4681 %} Stopped listing all compactors in each compactor to reduce load on Zookeeper. -* {% ghi 4309 %} Optimized logic for getting a random TabletServer connection which improved Shell startup times. -* {% ghi 3873 %} Increase Thrift max message size defaults which avoids errors with large key values. Added - property {% plink rpc.message.size.max %}. Removed experimental properties `sserver.server.message.size.max`, - `compactor.message.size.max`,`compaction.coordinator.message.size.max`. -* {% ghi 3966 %} Changed the default value of the the property {% plink table.majc.compaction.strategy %} - to empty string to fix a compatibility bug with old and new compaction plugins. -* {% ghi 4554 %} Fixed a race condition that could cause duplicate compactions to run. While harmless in - terms of data, the duplicate compactions could waste significant compute resources. -* {% ghi 4127 %} Updated new compaction plugins to honor {% plink table.file.max %} property. Implemented - a much more efficient algorithm than old compaction plugins had for this property. -* {% ghi 4485 %} Interrupt compactions on tablet unload. This prevents long running compactions from blocking - tablet migration. -* {% ghi 3512 %} Fixed issue with improperly cleaned up scans preventing metadata tablet unload. -* {% ghi 4456 %} Setting empty property value no longer deletes property -* {% ghi 4000 %} Fixed bug that could cause bulk import to lose files when errors happened in the tablet server. +* {% ghi 3966 %} Changed the default value of the the property + {% plink table.majc.compaction.strategy %} to empty string to fix a compatibility bug with old and + new compaction plugins. +* {% ghi 4554 %} Fixed a race condition that could cause duplicate compactions to run. While + harmless in terms of data, the duplicate compactions could waste significant compute resources. +* {% ghi 4127 %} Updated new compaction plugins to honor {% plink table.file.max %} property using a + much more efficient algorithm than old compaction plugins had for this property. +* {% ghi 4485 %} Interrupt compactions on tablet unload. This prevents long running compactions from + blocking tablet migration. +* {% ghi 3512 %} Fixed an issue with improperly cleaned up scans preventing metadata tablet unload. +* {% ghi 4456 %} Fixed an issue where setting an empty property value deleted the property +* {% ghi 4000 %} Fixed a bug that could cause bulk import to lose files when errors happened in the + tablet server. * {% ghi 4462 %} Fixed bug that prevented listing Fate operations in some situations. -* {% ghi 4573 %} Modified CredentialProviderToken to no longer store password in serialized form. -* {% ghi 4684 %} Fixed an issue that was causing table creation to get progressively slower when creating a lot of tables. - -### Improvements that help with administration: - -* {% ghi 3697 %} Allow `ACCUMULO_JAVA_PREFIX` option in `accumulo-env.sh` so it can be passed - as an array. This simplifies passing user options when starting Accumulo processes, for example - `numactl` parameters. -* {% ghi 3751 %} Added property {% plink rpc.backlog %} to configure backlog size for - Thrift server sockets. -* {% ghi 3745 %} Adds prefix to gc deletion log messages. This makes it easier to isolate the deletion +* {% ghi 4573 %} Modified CredentialProviderToken serialization so that it no longer stores the + resolved password in serialized form if the user were to serialize it. +* {% ghi 4684 %} Fixed an issue that was causing table creation to get progressively slower when + creating a lot of tables. + +### Metrics Improvements + +* {% ghi 4461 %}, {% ghi 4522 %}, {% ghi 4577 %}, {% ghi 4492 %}, {% ghi 4740 %}, {% ghi 4470 %} + Added new metrics. See {% jlink org.apache.accumulo.core.metrics.MetricsProducer %} for a full + list. +* {% ghi 4459 %} Added the ability to multiple instances of `MeterRegistryFactory` to export metrics + to several destinations at the same time. See {% plink general.micrometer.factory %} +* {% ghi 4622 %}, {% ghi 4716 %} Added metric to report when a server is idle or busy and added a + property, {% plink general.metrics.process.idle %}, to make the threshold configurable. +* {% ghi 3998 %} Added instance name tag to metrics + +### Other Improvements + +* {% ghi 4532 %} Add an option, `timeToWaitForScanServers`, to the `ConfigurableScanServerSelector` + implementation of the `scan.server.selector.impl` client property to cause scans to wait for scan + servers to be online before scanning +* {% ghi 3697 %} Support and document `ACCUMULO_JAVA_PREFIX` option in `accumulo-env.sh` as either + an array or as a scalar, to better support things like `numactl` in calling scripts. +* {% ghi 3745 %} Adds a prefix to gc deletion log messages to it easier to isolate the deletion actions of the garbage collector for analysis. -* {% ghi 3724 %} Adds logging of transactions when metadata and in-memory differences are detected. -* {% ghi 3725 %} Changed the gc batch size from bytes to memory percentage value. Modified default value of - property {% plink gc.candidate.batch.size %}. -* {% ghi 3684 %} Consolidated y/n prompts in the shell. Users can now exit out of multi-table delete operations - without accepting prompts for each one. +* {% ghi 3684 %} Consolidated y/n prompts in the shell. Users can now exit out of multi-table delete + operations without accepting prompts for each one. * {% ghi 3726 %} Adjusted reauthentication messages from the shell to assist with troubleshooting. -* {% ghi 4461 %}, {% ghi 4522 %}, {% ghi 4577 %} Added various metrics for scan servers to determine scan reservations and usage -* {% ghi 4492 %} Emit new metrics for the caches in tablet and scan servers. -* {% ghi 4459 %} Added the ability to specify multiple MeterRegistryFactorys to allow for various metric exporters - to be used at the same time. Updated documentation on the property {% plink general.micrometer.factory %} -* {% ghi 4622 %} Added compactor busy and server idle metrics {% ghi 4740 %} to allow for proper scale-in operations. - Added property {% plink general.metrics.process.idle %} to configure idle time for metrics. -* {% ghi 3927 %} Added a new JSON property type that validates the value is json. Updated the properties - {% plink monitor.resources.external %} and {% plink tserver.compaction.major.service.meta.planner.opts.executors %} to use this new type. -* {% ghi 4223 %} Added properties {% plink compactor.wait.time.job.min %} and {% plink compactor.wait.time.job.max %} to - control the min and max times compactors use when polling for work. -* {% ghi 3998 %} Added instance name tag to metrics. This is useful for the case when metrics from multiple Accumulo instances - are flowing into a single metrics system. -* {% ghi 4763 %} Improved the accumulo-cluster script and cluster.yaml file for the use case of starting and stopping specific - groups of compactors and scan servers. -* {% ghi 4487 %} Scan server properties can now be set in the shell. -* {% ghi 4768 %} Modified thread pool names so that the user will be able to easily find their location in the source code - when looking at thread pool metrics. This change affects the thread names in the output of a jstack on the process. -* {% ghi 4470 %} Added new metrics to indicate how many migrations are remaining. -* {% ghi 4558 %} Added a log message in the Manager when it has been waiting over 15 minutes for a tablet to unload. -* {% ghi 4495 %} Added `accumulo admin serviceStatus` command to quickly get system process status from the command line. +* {% ghi 3927 %} Added validation of JSON property types +* {% ghi 4763 %} Improved the accumulo-cluster script and cluster.yaml file for the use case of + starting and stopping specific groups of compactors and scan servers. +* {% ghi 4487 %} Scan server properties can now be set in the system configuration in ZooKeeper (via + the API or the shell) +* {% ghi 4768 %} Better thread names to make it easier to correlate thread dumps with their related + source code +* {% ghi 4558 %} Added a log message in the Manager when it has been waiting over 15 minutes for a + tablet to unload. +* {% ghi 4495 %} Added `accumulo admin serviceStatus` command to quickly get system process status + from the command line. + + +## Requirements +Accumulo 2.1.3 now requires JDK 17 to build, but still supports Java 11 runtime. ## Upgrading @@ -126,6 +165,7 @@ View the [Upgrading Accumulo documentation][upgrade] for guidance. ## Useful Links +* [All Changes since 2.1.2][all-changes] * [All tickets related to this release][milestone] This release also contains bug fixes from 1.10.4, which was released after 2.1.2. @@ -133,3 +173,4 @@ This release also contains bug fixes from 1.10.4, which was released after 2.1.2 [upgrade]: /docs/2.x/administration/upgrading [milestone]: https://github.com/apache/accumulo/milestone/17 +[all-changes]: https://github.com/apache/accumulo/compare/rel/2.1.2...apache:rel/2.1.3