Deflaking tests to remove chance of false positives causing tests to …

…fail (#34) * Deflaking tests to remove chance of false positives causing tests to fail Signed-off-by: zackcam <zackcam@amazon.com> * Update tests/test_bloom_command.py Co-authored-by: KarthikSubbarao <karthikrs2021@gmail.com> Signed-off-by: zackcam <zackcam@amazon.com> --------- Signed-off-by: zackcam <zackcam@amazon.com> Co-authored-by: KarthikSubbarao <karthikrs2021@gmail.com>
valkey-io · Dec 17, 2024 · 98ccdc6 · 98ccdc6
1 parent 3589d30
commit 98ccdc6
Show file tree

Hide file tree

Showing 10 changed files with 164 additions and 56 deletions.
diff --git a/src/bloom/utils.rs b/src/bloom/utils.rs
@@ -598,12 +598,14 @@ mod tests {
     /// Loops until the capacity of the provided bloom filter is reached and adds a new item to it in every iteration.
     /// The item name is rand_prefix + the index (starting from starting_item_idx).
     /// With every add operation, fp_count is tracked as we expect the add operation to return 1, since it is a new item.
+    /// There is an option to pass in an expected error and assert that we throw that error
     /// Returns the number of errors (false positives) and the final item index.
     fn add_items_till_capacity(
         bf: &mut BloomFilterType,
         capacity_needed: i64,
         starting_item_idx: i64,
         rand_prefix: &String,
+        expected_error: Option<BloomError>,
     ) -> (i64, i64) {
         let mut new_item_idx = starting_item_idx;
         let mut fp_count = 0;
@@ -616,14 +618,26 @@ mod tests {
                     fp_count += 1;
                 }
                 Ok(1) => {
+                    if let Some(err) = expected_error {
+                        panic!(
+                            "Expected error on the bloom object during during item add: {:?}",
+                            err
+                        );
+                    }
                     cardinality += 1;
                 }
                 Ok(i64::MIN..=-1_i64) | Ok(2_i64..=i64::MAX) => {
                     panic!("We do not expect add_item to return any Integer other than 0 or 1.")
                 }
-                Err(e) => {
-                    panic!("We do not expect add_item to throw errors on this scalable filter test, {:?}", e);
-                }
+                Err(e) => match &expected_error {
+                    Some(expected) => {
+                        assert_eq!(&e, expected, "Error doesn't match the expected error");
+                        break;
+                    }
+                    None => {
+                        panic!("Unexpected error when adding items: {:?}", e);
+                    }
+                },
             };
             new_item_idx += 1;
         }
@@ -781,10 +795,14 @@ mod tests {
         )
         .expect("Expect bloom creation to succeed");
         let (error_count, add_operation_idx) =
-            add_items_till_capacity(&mut bf, initial_capacity, 1, &rand_prefix);
-        assert_eq!(
-            bf.add_item(b"new_item", true),
-            Err(BloomError::NonScalingFilterFull)
+            add_items_till_capacity(&mut bf, initial_capacity, 1, &rand_prefix, None);
+        // Check adding to a full non scaling filter will throw an error
+        add_items_till_capacity(
+            &mut bf,
+            initial_capacity + 1,
+            1,
+            &rand_prefix,
+            Some(BloomError::NonScalingFilterFull),
         );
         assert_eq!(bf.capacity(), initial_capacity);
         assert_eq!(bf.cardinality(), initial_capacity);
@@ -860,6 +878,7 @@ mod tests {
                 expected_total_capacity,
                 add_operation_idx + 1,
                 &rand_prefix,
+                None,
             );
             add_operation_idx = new_add_operation_idx;
             total_error_count += error_count;

diff --git a/tests/test_aofrewrite.py b/tests/test_aofrewrite.py
@@ -1,4 +1,4 @@
-import time
+from util.waiters import *
 from valkeytests.valkey_test_case import ValkeyAction
 from valkey_bloom_test_case import ValkeyBloomTestCaseBase
 from valkeytests.conftest import resource_port_tracker
@@ -61,7 +61,6 @@ def test_aofrewrite_bloomfilter_metrics(self):
         self.client.bgrewriteaof()
         self.server.wait_for_action_done(ValkeyAction.AOF_REWRITE)
         # restart server
-        time.sleep(1)
         self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=True)
         assert self.server.is_alive()
         restored_server_digest = self.client.debug_digest()
@@ -78,4 +77,5 @@ def test_aofrewrite_bloomfilter_metrics(self):
 
         # Delete the scaled bloomfilter to check both filters are deleted and metrics stats are set accordingly
         self.client.execute_command('DEL key1')
-        self.verify_bloom_metrics(self.client.execute_command("INFO bf"), 0, 0, 0, 0, 0)
+        wait_for_equal(lambda: self.client.execute_command('BF.EXISTS key1 item_prefix1'), 0)
+        self.verify_bloom_metrics(self.client.execute_command("INFO bf"), 0, 0, 0, 0, 0)
diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -31,7 +31,7 @@ def test_basic(self):
         bf_exists_result = client.execute_command('BF.EXISTS filter1 item1')
         assert bf_exists_result == 1
         bf_exists_result = client.execute_command('BF.EXISTS filter1 item2')
-        assert bf_exists_result == 0
+        assert bf_exists_result == 0 or bf_exists_result == 1
 
     def test_copy_and_exists_cmd(self):
         client = self.server.get_new_client()

diff --git a/tests/test_bloom_acl_category.py b/tests/test_bloom_acl_category.py
@@ -9,11 +9,11 @@ def test_bloom_acl_category(self):
         bloom_commands = [
             ('BF.ADD add_key item', 1),
             ('BF.EXISTS add_key item', 1),
-            ('BF.MADD madd_key item1 item2 item3', [1, 1, 1]),
-            ('BF.MEXISTS madd_key item2 item3 item4', [1, 1, 0]),
+            ('BF.CARD add_key', 1),
+            ('BF.MADD madd_key item1 item2 item3', 3),
+            ('BF.MEXISTS madd_key item2 item3 item4', 3),
             ('BF.INSERT insert_key ITEMS item', [1]),
             ('BF.INFO insert_key filters', 1),
-            ('BF.CARD madd_key', 3),
             ('BF.RESERVE reserve_key 0.01 1000', b'OK'),
         ]
         client = self.server.get_new_client()
@@ -42,7 +42,12 @@ def test_bloom_acl_category(self):
             cmd_name = cmd[0].split()[0]
             try:
                 result = client.execute_command(cmd[0])
-                assert result == cmd[1], f"{cmd_name} should work for default user"
+                if cmd[0].startswith("BF.M"):
+                    assert len(result) == cmd[1]
+                    # The first add in a new bloom object should always return 1. For MEXISTS the first item we check will have been added as well so should exist
+                    assert result[0] == 1
+                else:
+                    assert result == cmd[1], f"{cmd_name} should work for default user"
             except Exception as e:
                 assert False, f"bloomuser should be able to execute {cmd_name}: {str(e)}"
 

diff --git a/tests/test_bloom_command.py b/tests/test_bloom_command.py
@@ -25,10 +25,6 @@ def test_bloom_command_error(self):
         # test set up
         assert self.client.execute_command('BF.ADD key item') == 1
         assert self.client.execute_command('BF.RESERVE bf 0.01 1000') == b'OK'
-        # non scaling filter
-        assert self.client.execute_command('BF.RESERVE bf_non 0.01 2 NONSCALING') == b'OK'
-        assert self.client.execute_command('BF.ADD bf_non 0') == 1
-        assert self.client.execute_command('BF.ADD bf_non 1') == 1
 
         basic_error_test_cases = [
             # not found
@@ -93,27 +89,24 @@ def test_bloom_command_behavior(self):
         basic_behavior_test_case = [
             ('BF.ADD key item', 1),
             ('BF.ADD key item', 0),
-            ('BF.ADD key item1', 1),
             ('BF.EXISTS key item', 1),
-            ('BF.EXISTS key item2', 0),
-            ('BF.MADD key item item2', [0, 1]),
+            ('BF.MADD key item item2', 2),
             ('BF.EXISTS key item', 1),
             ('BF.EXISTS key item2', 1),
-            ('BF.EXISTS key item3', 0),
-            ('BF.MADD hello world1 world2 world3', [1, 1, 1]),
-            ('BF.MADD hello world1 world2 world3 world4', [0, 0, 0, 1]),
-            ('BF.MEXISTS hello world5', [0]),
-            ('BF.MADD hello world5', [1]),
-            ('BF.MEXISTS hello world5 world6 world7', [1, 0, 0]),
-            ('BF.INSERT TEST ITEMS ITEM', [1]),
-            ('BF.INSERT TEST CAPACITY 1000 ITEMS ITEM', [0]),
-            ('BF.INSERT TEST CAPACITY 200 error 0.50 ITEMS ITEM ITEM1 ITEM2', [0, 1, 1]),
-            ('BF.INSERT TEST CAPACITY 300 ERROR 0.50 EXPANSION 1 ITEMS ITEM FOO', [0, 1]),
-            ('BF.INSERT TEST ERROR 0.50 EXPANSION 3 NOCREATE items BOO', [1]), 
-            ('BF.INSERT TEST ERROR 0.50 EXPANSION 1 NOCREATE NONSCALING items BOO', [0]),
-            ('BF.INSERT TEST_EXPANSION EXPANSION 9 ITEMS ITEM', [1]),
-            ('BF.INSERT TEST_CAPACITY CAPACITY 2000 ITEMS ITEM', [1]),
-            ('BF.INSERT TEST_ITEMS ITEMS 1 2 3 EXPANSION 2', [1, 1, 1, 1, 0]),
+            ('BF.MADD hello world1 world2 world3', 3),
+            ('BF.MADD hello world1 world2 world3 world4', 4),
+            ('BF.MEXISTS hello world5', 1),
+            ('BF.MADD hello world5', 1),
+            ('BF.MEXISTS hello world5 world6 world7', 3),
+            ('BF.INSERT TEST ITEMS ITEM', 1),
+            ('BF.INSERT TEST CAPACITY 1000 ITEMS ITEM', 1),
+            ('BF.INSERT TEST CAPACITY 200 error 0.50 ITEMS ITEM ITEM1 ITEM2', 3),
+            ('BF.INSERT TEST CAPACITY 300 ERROR 0.50 EXPANSION 1 ITEMS ITEM FOO', 2),
+            ('BF.INSERT TEST ERROR 0.50 EXPANSION 3 NOCREATE items BOO', 1), 
+            ('BF.INSERT TEST ERROR 0.50 EXPANSION 1 NOCREATE NONSCALING items BOO', 1),
+            ('BF.INSERT TEST_EXPANSION EXPANSION 9 ITEMS ITEM', 1),
+            ('BF.INSERT TEST_CAPACITY CAPACITY 2000 ITEMS ITEM', 1),
+            ('BF.INSERT TEST_ITEMS ITEMS 1 2 3 EXPANSION 2', 5),
             ('BF.INFO TEST Capacity', 100),
             ('BF.INFO TEST ITEMS', 5),
             ('BF.INFO TEST filters', 1),
@@ -125,6 +118,7 @@ def test_bloom_command_behavior(self):
             ('BF.CARD TEST', 5),
             ('bf.card HELLO', 0),
             ('BF.RESERVE bf 0.01 1000', b'OK'),
+            ('BF.EXISTS bf non_existant', 0),
             ('BF.RESERVE bf_exp 0.01 1000 EXPANSION 2', b'OK'),
             ('BF.RESERVE bf_non 0.01 1000 NONSCALING', b'OK'),
             ('bf.info bf_exp expansion', 2),
@@ -133,7 +127,14 @@ def test_bloom_command_behavior(self):
 
         for test_case in basic_behavior_test_case:
             cmd = test_case[0]
+            # For non multi commands, this is the verbatim expected result. 
+            # For multi commands, test_case[1] contains the number of item add/exists results which are expected to be 0 or 1.
             expected_result = test_case[1]
+            # For Cardinality commands we want to add items till we are at the number of items we expect then check Cardinality worked
+            if cmd.upper().startswith("BF.CARD"):
+                self.add_items_till_capacity(self.client, cmd.split()[-1], expected_result, 1, "item_prefix")
+            # For multi commands expected result is actually the length of the expected return. While for other commands this we have the literal
+            # expected result
             self.verify_command_success_reply(self.client, cmd, expected_result)
 
         # test bf.info

diff --git a/tests/test_bloom_metrics.py b/tests/test_bloom_metrics.py
@@ -21,11 +21,11 @@ def test_basic_command_metrics(self):
         # Check that other commands don't influence metrics
         assert(self.client.execute_command('BF.EXISTS key item1') == 1)
         assert(self.client.execute_command('BF.ADD key item2') == 1)
-        assert(self.client.execute_command('BF.MADD key item3 item4')== [1, 1])
-        assert(self.client.execute_command('BF.MEXISTS key item3 item5')== [1, 0])
-        assert(self.client.execute_command('BF.CARD key') == 4)
+        assert(len(self.client.execute_command('BF.MADD key item3 item4')) == 2)
+        assert(len(self.client.execute_command('BF.MEXISTS key item3 item5')) == 2)
+        assert(1 <= self.client.execute_command('BF.CARD key') <= 4)
         self.client.execute_command("BF.INFO key")
-        assert(self.client.execute_command('BF.INSERT key ITEMS item5 item6')== [1, 1])
+        assert(len(self.client.execute_command('BF.INSERT key ITEMS item5 item6'))== 2)
 
         self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE, 1, 1, 6, DEFAULT_BLOOM_FILTER_CAPACITY)
         self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), DEFAULT_BLOOM_FILTER_SIZE, 1, 1, 6, DEFAULT_BLOOM_FILTER_CAPACITY)
@@ -79,9 +79,6 @@ def test_basic_command_metrics(self):
 
     def test_scaled_bloomfilter_metrics(self):
         self.client.execute_command('BF.RESERVE key1 0.001 7000')
-        # We use a number greater than 7000 in order to have a buffer for any false positives
-        variables = [f"key{i+1}" for i in range(7500)]
-
         # Get original size to compare against size after scaled
         info_obj = self.client.execute_command('BF.INFO key1')
         # Add keys until bloomfilter will scale out

diff --git a/tests/test_correctness.py b/tests/test_correctness.py
@@ -15,8 +15,7 @@ def test_non_scaling_filter(self):
         assert client.execute_command(f'BF.RESERVE {filter_name} {expected_fp_rate} {capacity} NONSCALING') == b"OK"
         # Add items and fill the filter to capacity.
         error_count, add_operation_idx = self.add_items_till_capacity(client, filter_name, capacity, 1, item_prefix)
-        with pytest.raises(Exception, match="non scaling filter is full"):
-            client.execute_command(f'BF.ADD {filter_name} new_item')
+        self.add_items_till_scaling_failure(client, filter_name, add_operation_idx, item_prefix)
         # Validate that is is filled.
         info = client.execute_command(f'BF.INFO {filter_name}')
         it = iter(info)

diff --git a/tests/test_save_and_restore.py b/tests/test_save_and_restore.py
@@ -85,7 +85,6 @@ def test_restore_failed_large_bloom_filter(self):
         assert bf_exists_result_1 == 1
         bf_info_result_1 = client.execute_command('BF.INFO testSave')
         assert(len(bf_info_result_1)) != 0
-        curr_item_count_1 = client.info_obj().num_keys()
 
         # Save rdb and try to load this on a sever. Validate module data type load fails and server does not startup.
         client.bgsave()

diff --git a/tests/test_valkeypy_bloom_compatibility.py b/tests/test_valkeypy_bloom_compatibility.py
@@ -33,32 +33,88 @@ def test_create(self):
         assert decoded_r.bf().create("bloom_e", 0.01, 1000, expansion=1)
         assert decoded_r.bf().create("bloom_ns", 0.01, 1000, noScale=True)
 
+# valkey-bloom start
+
+    # def test_bf_add(self):
+    #     decoded_r = self.server.get_new_client()
+    #     assert decoded_r.bf().create("bloom", 0.01, 1000)
+    #     assert 1 == decoded_r.bf().add("bloom", "foo")
+    #     assert 0 == decoded_r.bf().add("bloom", "foo")
+    #     assert [0] == self.intlist(decoded_r.bf().madd("bloom", "foo"))
+    #     assert [0, 1] == decoded_r.bf().madd("bloom", "foo", "bar")
+    #     assert [0, 0, 1] == decoded_r.bf().madd("bloom", "foo", "bar", "baz")
+    #     assert 1 == decoded_r.bf().exists("bloom", "foo")
+    #     assert 0 == decoded_r.bf().exists("bloom", "noexist")
+    #     assert [1, 0] == self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist"))
+
     def test_bf_add(self):
         decoded_r = self.server.get_new_client()
         assert decoded_r.bf().create("bloom", 0.01, 1000)
+        assert 0 == decoded_r.bf().exists("bloom", "noexist")
         assert 1 == decoded_r.bf().add("bloom", "foo")
         assert 0 == decoded_r.bf().add("bloom", "foo")
         assert [0] == self.intlist(decoded_r.bf().madd("bloom", "foo"))
-        assert [0, 1] == decoded_r.bf().madd("bloom", "foo", "bar")
-        assert [0, 0, 1] == decoded_r.bf().madd("bloom", "foo", "bar", "baz")
+        madd_return = decoded_r.bf().madd("bloom", "foo", "bar")
+        assert 0 == madd_return[0]
+        assert 2 == len(madd_return)
+        self.check_return_of_multi_commands(madd_return)
+        madd_return = decoded_r.bf().madd("bloom", "foo", "bar", "baz")
+        assert 3 == len(madd_return)
+        self.check_return_of_multi_commands(madd_return)
         assert 1 == decoded_r.bf().exists("bloom", "foo")
-        assert 0 == decoded_r.bf().exists("bloom", "noexist")
-        assert [1, 0] == self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist"))
+        mexists_return = self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist"))
+        assert 1 == mexists_return[0]
 
+    # def test_bf_insert(self):
+    #     decoded_r = self.server.get_new_client()
+    #     assert decoded_r.bf().create("bloom", 0.01, 1000)
+    #     assert [1] == self.intlist(decoded_r.bf().insert("bloom", ["foo"]))
+    #     assert [0, 1] == self.intlist(decoded_r.bf().insert("bloom", ["foo", "bar"]))
+    #     assert [1] == self.intlist(decoded_r.bf().insert("captest", ["foo"], capacity=10))
+    #     assert [1] == self.intlist(decoded_r.bf().insert("errtest", ["foo"], error=0.01))
+    #     assert 1 == decoded_r.bf().exists("bloom", "foo")
+    #     assert 0 == decoded_r.bf().exists("bloom", "noexist")
+    #     assert [1, 0] == self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist"))
+    #     info = decoded_r.bf().info("bloom")
+    #     self.assert_resp_response(
+    #         decoded_r,
+    #         2,
+    #         info.get("insertedNum"),
+    #         info.get("Number of items inserted"),
+    #     )
+    #     self.assert_resp_response(
+    #         decoded_r,
+    #         1000,
+    #         info.get("capacity"),
+    #         info.get("Capacity"),
+    #     )
+    #     self.assert_resp_response(
+    #         decoded_r,
+    #         1,
+    #         info.get("filterNum"),
+    #         info.get("Number of filters"),
+    #     )
     def test_bf_insert(self):
         decoded_r = self.server.get_new_client()
         assert decoded_r.bf().create("bloom", 0.01, 1000)
         assert [1] == self.intlist(decoded_r.bf().insert("bloom", ["foo"]))
-        assert [0, 1] == self.intlist(decoded_r.bf().insert("bloom", ["foo", "bar"]))
+        bloom_insert_return = self.intlist(decoded_r.bf().insert("bloom", ["foo", "bar"]))
+        assert 2 == len(bloom_insert_return)
+        assert 0 == bloom_insert_return[0]
+        num_items_inserted = -1
+        if bloom_insert_return[1] == 1 or bloom_insert_return[1] == 0:
+            # We have inserted either 1 or 2 items. If this returned 1 that means we inserted a new item and have two items
+            # otherwise we had a false positive and only have inserted 1 item 
+            num_items_inserted = bloom_insert_return[1] + 1
         assert [1] == self.intlist(decoded_r.bf().insert("captest", ["foo"], capacity=10))
         assert [1] == self.intlist(decoded_r.bf().insert("errtest", ["foo"], error=0.01))
         assert 1 == decoded_r.bf().exists("bloom", "foo")
-        assert 0 == decoded_r.bf().exists("bloom", "noexist")
-        assert [1, 0] == self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist"))
+        mexists_return = self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist"))
+        assert 1 == mexists_return[0]
         info = decoded_r.bf().info("bloom")
         self.assert_resp_response(
             decoded_r,
-            2,
+            num_items_inserted,
             info.get("insertedNum"),
             info.get("Number of items inserted"),
         )
@@ -75,6 +131,12 @@ def test_bf_insert(self):
             info.get("Number of filters"),
         )
 
+    def check_return_of_multi_commands(self, returned_count):
+        for value in returned_count:
+            assert value in [0, 1], f"Returned Value: {value} is not 0 or 1"
+
+# valkey-bloom end
+
     def test_bf_info(self):
         decoded_r = self.server.get_new_client()
         expansion = 4