From 01d4157a7a3314344fb60372a15a31187b517a0a Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 6 Jan 2025 11:32:51 -0500 Subject: [PATCH] Fix flakiness on TestFilestreamMetadataUpdatedOnRename For some reason this test became flaky, the root of the flakiness is not on the test, it is on how a rename operation is detected. Even though this test uses `os.Rename`, it does not seem to be an atomic operation. https://www.man7.org/linux/man-pages/man2/rename.2.html does not make it clear whether 'renameat' (used by `os.Rename`) is atomic. On a flaky execution, the file is actually perceived as removed and then a new file is created, both with the same inode. This happens on a system that does not reuse inodes as soon they're freed. Because the file is detected as removed, it's state is also removed. Then when more data is added, only the offset of the new data is tracked by the registry, causing the test to fail. A workaround for this is to not remove the state when the file is removed, hence `clean_removed: false` is set in the test config. --- .../input/filestream/input_integration_test.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/filebeat/input/filestream/input_integration_test.go b/filebeat/input/filestream/input_integration_test.go index 80327d8bcf2c..0761f39264b6 100644 --- a/filebeat/input/filestream/input_integration_test.go +++ b/filebeat/input/filestream/input_integration_test.go @@ -97,6 +97,23 @@ func TestFilestreamMetadataUpdatedOnRename(t *testing.T) { "id": id, "paths": []string{env.abspath(testlogName) + "*"}, "prospector.scanner.check_interval": "1ms", + // For some reason this test became flaky, the root of the flakiness + // is not on the test, it is on how a rename operation is detected. + // Even though this test uses `os.Rename`, it does not seem to be an atomic + // operation. https://www.man7.org/linux/man-pages/man2/rename.2.html + // does not make it clear whether 'renameat' (used by `os.Rename`) is + // atomic. + // + // On a flaky execution, the file is actually perceived as removed + // and then a new file is created, both with the same inode. This + // happens on a system that does not reuse inodes as soon they're + // freed. Because the file is detected as removed, it's state is also + // removed. Then when more data is added, only the offset of the new + // data is tracked by the registry, causing the test to fail. + // + // A workaround for this is to not remove the state when the file is + // removed, hence `clean_removed: false` is set here. + "clean_removed": false, }) testline := []byte("log line\n")