Fix composite search bug and other improvements.

- Fix bug where composite search failed to focus on reproducibility shifts with the new system - Increase use of binary mete-data cache for Chromium to improve efficiency - Update tooltips as part of the UI overhaul - Add composite search test
DistriNet · Apr 8, 2024 · 3c9b14b · 3c9b14b
1 parent fb59fe6
commit 3c9b14b
Show file tree

Hide file tree

Showing 15 changed files with 119 additions and 33 deletions.
diff --git a/bci/database/mongo/mongodb.py b/bci/database/mongo/mongodb.py
@@ -4,11 +4,14 @@
 from abc import ABC
 from datetime import datetime, timezone
 
+from flatten_dict import flatten
 from pymongo import MongoClient
 from pymongo.collection import Collection
 from pymongo.errors import ServerSelectionTimeoutError
 
-from bci.evaluations.logic import DatabaseConnectionParameters, PlotParameters, TestParameters, TestResult, WorkerParameters
+from bci.evaluations.logic import (DatabaseConnectionParameters,
+                                   PlotParameters, TestParameters, TestResult,
+                                   WorkerParameters)
 from bci.version_control.states.state import State
 
 logger = logging.getLogger(__name__)
@@ -181,7 +184,7 @@ def get_binary_availability_collection(browser_name: str):
     @staticmethod
     def has_binary_available_online(browser: str, state: State):
         collection = MongoDB.get_binary_availability_collection(browser)
-        document = collection.find_one({'state': state.to_dict()})
+        document = collection.find_one({'state': state.to_dict(make_complete=False)})
         if document is None:
             return None
         return document["binary_online"]
@@ -202,6 +205,19 @@ def get_stored_binary_availability(browser):
             result.sort('build_id', -1)
         return result
 
+    @staticmethod
+    def get_complete_state_dict_from_binary_availability_cache(state: State):
+        collection = MongoDB.get_binary_availability_collection(state.browser_name)
+        # We have to flatten the state dictionary to ignore missing attributes.
+        state_dict = {
+            'state': state.to_dict(make_complete=False)
+        }
+        query = flatten(state_dict, reducer='dot')
+        document = collection.find_one(query)
+        if document is None:
+            return None
+        return document['state']
+
     @staticmethod
     def store_binary_availability_online_cache(browser: str, state: State, binary_online: bool, url: str = None):
         collection = MongoDB.get_binary_availability_collection(browser)

diff --git a/bci/evaluations/logic.py b/bci/evaluations/logic.py
@@ -253,7 +253,7 @@ def padded_browser_version(self):
 
     @property
     def reproduced(self):
-        entry_if_reproduced = {'val': 'reproduced', 'var': 'OK'}
+        entry_if_reproduced = {'var': 'reproduced', 'val': 'OK'}
         reproduced_in_req_vars = [entry for entry in self.data['req_vars'] if entry == entry_if_reproduced] != []
         reproduced_in_log_vars = [entry for entry in self.data['log_vars'] if entry == entry_if_reproduced] != []
         return reproduced_in_req_vars or reproduced_in_log_vars

diff --git a/bci/evaluations/outcome_checker.py b/bci/evaluations/outcome_checker.py
@@ -11,11 +11,20 @@ def __init__(self, sequence_config: SequenceConfiguration):
 
     @abstractmethod
     def get_outcome(self, result: TestResult) -> bool:
+        '''
+        Returns the outcome of the test result.
+
+        - None in case of an error.
+        - True if the test was reproduced.
+        - False if the test was not reproduced.
+        '''
+        if result.is_dirty:
+            return None
         if result.reproduced:
             return True
         # Backwards compatibility
         if self.sequence_config.target_mech_id:
-            return (outcome := self.get_outcome_for_proxy(result))
+            return self.get_outcome_for_proxy(result)
 
     def get_outcome_for_proxy(self, result: TestResult) -> bool | None:
         target_mech_id = self.sequence_config.target_mech_id

diff --git a/bci/search_strategy/sequence_elem.py b/bci/search_strategy/sequence_elem.py
@@ -31,7 +31,8 @@ def update_outcome(self, outcome: bool):
             raise AttributeError(f"Outcome was already set to DONE for {repr(self)}")
         if outcome is None:
             self.state = ElemState.ERROR
-        self.state = ElemState.DONE
+        else:
+            self.state = ElemState.DONE
         self.outcome = outcome
 
     def get_deep_copy(self, index=None):

diff --git a/bci/ui/frontend/src/App.vue b/bci/ui/frontend/src/App.vue
@@ -422,7 +422,7 @@ export default {
 
       <!-- Experiments -->
       <div class="form-section flex flex-col grow h-0">
-        <section-header section="experiments" class="w-1/2" left></section-header>
+        <section-header section="experiments" class="w-1/2"></section-header>
 
         <select class="mb-2" v-model="selected.project">
           <option disabled value="">Select a project</option>

diff --git a/bci/ui/frontend/src/components/section-header.vue b/bci/ui/frontend/src/components/section-header.vue
@@ -13,7 +13,7 @@
           },
           "browser_rev_range": {
             "title": "Browser revision range",
-            "tooltip": "Specify a browser range based on revision numebers. The browser version range will be ignored if this is set."
+            "tooltip": "Define a binary range based on revision numbers. The browser version range defined above will be disregarded if this option is used."
           },
           "db_collection": {
             "title": "Database collection",
@@ -22,31 +22,31 @@
           "eval_range": {
             "title": "Browser version range",
             "tooltip":
-              "Specify the scope of revisions to be evaluated by setting the boundaries using either browser release version numbers or revision numbers."
+              "Specify which binaries you want to evaluate by selecting the browser and its version range. Enabling deep search extends the evaluation to include all available revision binaries, offering a more comprehensive analysis."
           },
           "eval_settings": {
             "title": "Evaluation settings",
-            "tooltip": "Customize the evaluation process by selecting the automation mode, search strategy, and the number of parallel containers."
+            "tooltip": "Customize the evaluation process by selecting the search strategy, sequence limit and the number of parallel containers."
           },
           "experiments": {
             "title": "Experiments",
-            "tooltip": "Choose the experiments to be conducted. All available experiments within the selected project are shown here. Note that if multiple experiments are chosen, binary search and composite search will only target the experiment of which the reproduction id is provided."
+            "tooltip": "Pick a project from the dropdown menu to access all available experiments. Then, mark the experiments you wish to conduct. Keep in mind that if multiple experiments are chosen, only a binary sequence will be performed. For a binary search or composite search, select only one experiment."
           },
           "parallel_containers": {
             "title": "Number of parallel containers",
-            "tooltip": "Specify the number of concurrent containers allowed to run for evaluating each revision binary. To disable concurrency and conduct all experiments in the main container, enter '1'. This will prevent new containers from being spawned during the evaluation process."
+            "tooltip": "Specify the maximum number of concurrent containers allowed to run. To disable concurrency, simply input '1'."
           },
           "reproduction_id": {
             "title": "Reproduction id",
-            "tooltip": "Experiments can have one or more boolean outcomes. Each outcome is identified by a reproduction ID, which is linked to a boolean value indicating reproducibility. In most cases, experiments have only one outcome, and the reproduction ID is set to be the same as the experiment ID by default."
+            "tooltip": "This option will be deprecated. Experiments can have one or more boolean outcomes. Each outcome is identified by a reproduction ID, which is linked to a boolean value indicating reproducibility. In most cases, experiments have only one outcome, and the reproduction ID is set to be the same as the experiment ID by default."
           },
           "results": {
             "title": "Results",
-            "tooltip": "The evaluation results can be visualized in a Gantt chart. This chart is generated based on the selected fields. Use the dropdown menu to choose the experiment for which you want to display the data. Squares represent (approximations of) release binaries, while dots represent revision binaries. Note that the zoom level within the Gantt chart widget will reset each time the chart is refreshed."
+            "tooltip": "Choose an experiment from the dropdown menu to visualize its results in the Gantt chart below. Squares represent (approximate) release binaries, while dots represent revision binaries. Disable auto-refresh to prevent chart updates while navigating the chart."
           },
           "search_strategy": {
             "title": "Search strategy",
-            "tooltip": "Configure the evaluation to perform either a general sweep on revision binaries, a targeted search for introductions and fixes, or a combination of the two."
+            "tooltip": "Select a search strategy. Additional information on each specific search strategy is available with each option."
           }
         }
       }

diff --git a/bci/ui/frontend/src/components/tooltip.vue b/bci/ui/frontend/src/components/tooltip.vue
@@ -4,16 +4,16 @@
       return {
         tooltips: {
           "bin_seq": {
-            "tooltip": "Perform a general sweep over the evaluation range. Experiment outcomes will not influence the selection of the next revision to evaluate."
+            "tooltip": "Binaries are selected uniformly over the specified evaluation range. Experiment outcomes do not influence the next binary to be evaluated."
           },
           "bin_search": {
-            "tooltip": "Perform a focused search to identify either an introducing or fixing revision. Should only be performed within a range where a shift in reproducibility has been observed."
+            "tooltip": "Perform a search to identify either an introducing or fixing revision. This should only be performed within a range where one shift in reproducibility has been observed."
           },
           "comp_search": {
-            "tooltip": "Combines the two strategies above. First, a general sweep is performed until the sequence limit is reached. Then, for each shift in reproducibility that can be observed, a targeted search is conducted to identify an introducing or fixing revision."
+            "tooltip": "Combines the two strategies above. First, binaries are selected uniformly over the evaluation range, until the sequence limit is reached. Then, for each shift in reproducibility that can be observed, a search is conducted to identify the introducing or fixing binary."
           },
           "deep_search": {
-            "tooltip": "Evaluate at revision level to find code changes that introduced or fixed a bug. If unchecked, only browser releases (or base positions of releases in case of Chromium) will be evaluated."
+            "tooltip": "Opt to evaluate at the revision level to pinpoint code changes that introduced or fixed a bug. If unchecked, only browser releases (or base positions of releases in the case of Chromium) will be analyzed."
           },
           "mech_id": {
             "tooltip": "Experiments can have one or more boolean outcomes. Each outcome is identified by a reproduction ID, which is linked to a boolean value indicating reproducibility. In most cases, experiments have only one outcome, and the reproduction ID is set to be the same as the experiment ID by default."
@@ -22,7 +22,7 @@
             "tooltip": "TBD"
           },
           "sequence_limit": {
-            "tooltip": "Specify the maximum number of revisions to be evaluated in the general sweep."
+            "tooltip": "Specify the maximum number of binaries to be evaluated during the binary sequence stage."
           },
         }
       }

diff --git a/bci/version_control/states/revisions/base.py b/bci/version_control/states/revisions/base.py
@@ -1,5 +1,6 @@
-from abc import abstractmethod
 import re
+from abc import abstractmethod
+
 from bci.version_control.states.state import State
 
 
@@ -29,18 +30,36 @@ def browser_name(self):
     def name(self):
         return f'{self.revision_number}'
 
-    def to_dict(self) -> dict:
-        return {
-            'type': 'revision',
-            'browser_name': self.browser_name,
-            'revision_id': self.revision_id,
-            'revision_number': self.revision_number
-        }
+    def to_dict(self, make_complete: bool = True) -> dict:
+        '''
+        Returns a dictionary representation of the state.
+        If complete is True, any missing information will be fetched.
+        For example, only the revision id might be known, but not the revision number.
+        '''
+        if make_complete:
+            return {
+                'type': 'revision',
+                'browser_name': self.browser_name,
+                'revision_id': self.revision_id,
+                'revision_number': self.revision_number
+            }
+        else:
+            state_dict = {
+                'type': 'revision',
+                'browser_name': self.browser_name
+            }
+            if self._revision_id is not None:
+                state_dict['revision_id'] = self._revision_id
+            if self._revision_number is not None:
+                state_dict['revision_number'] = self._revision_number
+            return state_dict
 
     @staticmethod
     def from_dict(data: dict) -> State:
-        from bci.version_control.states.revisions.chromium import ChromiumRevision
-        from bci.version_control.states.revisions.firefox import FirefoxRevision
+        from bci.version_control.states.revisions.chromium import \
+            ChromiumRevision
+        from bci.version_control.states.revisions.firefox import \
+            FirefoxRevision
         match data['browser_name']:
             case 'chromium':
                 return ChromiumRevision(

diff --git a/bci/version_control/states/revisions/chromium.py b/bci/version_control/states/revisions/chromium.py
@@ -29,7 +29,11 @@ def get_online_binary_url(self):
         return "https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/%s%%2F%s%%2Fchrome-%s.zip?alt=media" % ('Linux_x64', self._revision_number, 'linux')
 
     def _fetch_revision_id(self) -> str:
+        if state := MongoDB.get_complete_state_dict_from_binary_availability_cache(self):
+            return state['revision_id']
         return PARSER.get_rev_id(self._revision_number)
 
     def _fetch_revision_number(self) -> int:
+        if state := MongoDB.get_complete_state_dict_from_binary_availability_cache(self):
+            return state['revision_number']
         return PARSER.get_rev_number(self._revision_id)
diff --git a/bci/version_control/states/state.py b/bci/version_control/states/state.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from abc import abstractmethod, abstractproperty
-import re
 
 
 class EvaluationResult:

diff --git a/bci/version_control/states/versions/base.py b/bci/version_control/states/versions/base.py
@@ -1,4 +1,5 @@
 from abc import abstractmethod, abstractproperty
+
 from bci.version_control.states.state import State
 
 
@@ -26,7 +27,7 @@ def name(self):
     def browser_name(self):
         pass
 
-    def to_dict(self) -> dict:
+    def to_dict(self, make_complete: bool = True) -> dict:
         return {
             'type': 'version',
             'browser_name': self.browser_name,
@@ -37,7 +38,8 @@ def to_dict(self) -> dict:
 
     @staticmethod
     def from_dict(data: dict) -> State:
-        from bci.version_control.states.versions.chromium import ChromiumVersion
+        from bci.version_control.states.versions.chromium import \
+            ChromiumVersion
         from bci.version_control.states.versions.firefox import FirefoxVersion
         match data['browser_name']:
             case 'chromium':

diff --git a/requirements.in b/requirements.in
@@ -1,6 +1,7 @@
 bokeh
 docker
 Flask
+flatten-dict
 gunicorn
 mitmproxy
 pymongo

diff --git a/requirements.txt b/requirements.txt
@@ -43,6 +43,8 @@ flask==3.0.2
     # via
     #   -r requirements.in
     #   mitmproxy
+flatten-dict==0.4.2
+    # via -r requirements.in
 gunicorn==21.2.0
     # via -r requirements.in
 h11==0.14.0
@@ -139,7 +141,9 @@ ruamel-yaml-clib==0.2.8
 service-identity==24.1.0
     # via aioquic
 six==1.16.0
-    # via python-dateutil
+    # via
+    #   flatten-dict
+    #   python-dateutil
 sortedcontainers==2.4.0
     # via mitmproxy
 tornado==6.4

diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -90,6 +90,8 @@ flask==3.0.2
     # via
     #   -r requirements.txt
     #   mitmproxy
+flatten-dict==0.4.2
+    # via -r requirements.txt
 gunicorn==21.2.0
     # via -r requirements.txt
 h11==0.14.0
@@ -286,6 +288,7 @@ service-identity==24.1.0
 six==1.16.0
     # via
     #   -r requirements.txt
+    #   flatten-dict
     #   python-dateutil
 sortedcontainers==2.4.0
     # via

diff --git a/test/sequence/test_composite_search.py b/test/sequence/test_composite_search.py
@@ -42,3 +42,31 @@ def outcome(x) -> bool:
                 assert seq.sequence_strategy_finished
             assert expected_elem_search == elem_search
             self.assertRaises(SequenceFinished, seq.next)
+
+    def test_composite_search(self):
+        with patch('bci.search_strategy.sequence_elem.SequenceElem.is_available', self.always_true):
+            def outcome(x) -> bool:
+                return x < 22 or x > 60
+
+            values = list(range(100))
+            seq = CompositeSearch(values, 2, 10, NArySequence, NArySearch)
+            seq.is_available = self.always_true
+            expected_sequence_part = [0, 99, 50, 26, 75, 14, 39, 63, 88, 8]
+            expected_search_part = [21, 24, 23, 22, 57, 61, 60]
+
+            actual_sequence_part = []
+            for _ in range(10):
+                elem = seq.next()
+                seq.update_outcome(elem, outcome(elem))
+                actual_sequence_part.append(elem)
+            assert expected_sequence_part == actual_sequence_part
+
+            actual_search_part = []
+            while True:
+                try:
+                    elem = seq.next()
+                    seq.update_outcome(elem, outcome(elem))
+                    actual_search_part.append(elem)
+                except SequenceFinished:
+                    break
+            assert expected_search_part == actual_search_part