Removed 'all', used is_in, static variable filtering prints warning i…

…f no valid rows
justin13601 · Aug 10, 2024 · 9d5e9dc · 9d5e9dc
1 parent 020a3a7
commit 9d5e9dc
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 21 deletions.
diff --git a/sample_configs/inhospital_mortality.yaml b/sample_configs/inhospital_mortality.yaml
@@ -2,7 +2,7 @@
 predicates:
   admission:
     code:
-      regex: ^event_type.*
+      any: ["event_type//ADMISSION", "event_type//DISCHARGE"]
   discharge:
     code: event_type//DISCHARGE
   death:

diff --git a/src/aces/config.py b/src/aces/config.py
@@ -67,49 +67,49 @@ def MEDS_eval_expr(self) -> pl.Expr:
             [(col("code")) == (String(BP//diastolic))].all_horizontal([[(col("chamber")) ==
                (String(atrial))]])
 
-            >>> cfg = PlainPredicateConfig(code={'regex': None, 'any': None, 'all': None})
+            >>> cfg = PlainPredicateConfig(code={'regex': None, 'any': None})
             >>> expr = cfg.MEDS_eval_expr() # doctest: +NORMALIZE_WHITESPACE
             Traceback (most recent call last):
                 ...
-            ValueError: Only one of 'regex', 'any', or 'all' can be specified in the code field!
-            Got: ['regex', 'any', 'all'].
+            ValueError: Only one of 'regex' or 'any' can be specified in the code field!
+            Got: ['regex', 'any'].
             >>> cfg = PlainPredicateConfig(code={'foo': None})
             >>> expr = cfg.MEDS_eval_expr() # doctest: +NORMALIZE_WHITESPACE
             Traceback (most recent call last):
                 ...
             ValueError: Invalid specification in the code field! Got: {'foo': None}.
-            Expected one of 'regex', 'any', or 'all'.
+            Expected one of 'regex', 'any'.
             >>> cfg = PlainPredicateConfig(code={'regex': ''})
             >>> expr = cfg.MEDS_eval_expr() # doctest: +NORMALIZE_WHITESPACE
             Traceback (most recent call last):
                 ...
             ValueError: Invalid specification in the code field! Got: {'regex': ''}.
             Expected a non-empty string for 'regex'.
-            >>> cfg = PlainPredicateConfig(code={'all': []})
+            >>> cfg = PlainPredicateConfig(code={'any': []})
             >>> expr = cfg.MEDS_eval_expr() # doctest: +NORMALIZE_WHITESPACE
             Traceback (most recent call last):
                 ...
-            ValueError: Invalid specification in the code field! Got: {'all': []}.
-            Expected a list of strings for 'all'.
+            ValueError: Invalid specification in the code field! Got: {'any': []}.
+            Expected a list of strings for 'any'.
 
             >>> cfg = PlainPredicateConfig(code={'regex': '^foo.*'})
             >>> expr = cfg.MEDS_eval_expr() # doctest: +NORMALIZE_WHITESPACE
             >>> print(expr) # doctest: +NORMALIZE_WHITESPACE
             col("code").str.contains([String(^foo.*)])
-            >>> cfg = PlainPredicateConfig(code={'all': ['foo', 'bar']})
+            >>> cfg = PlainPredicateConfig(code={'any': ['foo', 'bar']})
             >>> expr = cfg.MEDS_eval_expr() # doctest: +NORMALIZE_WHITESPACE
             >>> print(expr) # doctest: +NORMALIZE_WHITESPACE
             [(col("code")) == (String(foo))].all_horizontal([[(col("code")) == (String(bar))]])
             >>> cfg = PlainPredicateConfig(code={'any': ['foo', 'bar']})
             >>> expr = cfg.MEDS_eval_expr() # doctest: +NORMALIZE_WHITESPACE
             >>> print(expr) # doctest: +NORMALIZE_WHITESPACE
-            [(col("code")) == (String(foo))].any_horizontal([[(col("code")) == (String(bar))]])
+            col("code").is_in([Series])
         """
         criteria = []
         if isinstance(self.code, dict):
             if len(self.code) > 1:
                 raise ValueError(
-                    "Only one of 'regex', 'any', or 'all' can be specified in the code field! "
+                    "Only one of 'regex' or 'any' can be specified in the code field! "
                     f"Got: {list(self.code.keys())}."
                 )
 
@@ -121,24 +121,19 @@ def MEDS_eval_expr(self) -> pl.Expr:
                         "Expected a non-empty string for 'regex'."
                     )
                 criteria.append(pl.col("code").str.contains(self.code["regex"]))
-            elif "any" in self.code or "all" in self.code:  # 'all' is redundant? it shouldn't be possible...?
-                logic = list(self.code.keys())[0]
-                if not self.code[logic] or not isinstance(self.code[logic], list):
+            elif "any" in self.code:
+                if not self.code["any"] or not isinstance(self.code["any"], list):
                     raise ValueError(
                         "Invalid specification in the code field! "
                         f"Got: {self.code}. "
-                        f"Expected a list of strings for '{logic}'."
+                        f"Expected a list of strings for 'any'."
                     )
-                criteria.append(
-                    pl.all_horizontal([pl.col("code") == code for code in self.code[logic]])
-                    if logic == "all"
-                    else pl.any_horizontal([pl.col("code") == code for code in self.code[logic]])
-                )
+                criteria.append(pl.Expr.is_in(pl.col("code"), self.code["any"]))
             else:
                 raise ValueError(
                     "Invalid specification in the code field! "
                     f"Got: {self.code}. "
-                    "Expected one of 'regex', 'any', or 'all'."
+                    "Expected one of 'regex', 'any'."
                 )
         else:
             criteria.append(pl.col("code") == self.code)

diff --git a/src/aces/query.py b/src/aces/query.py
@@ -65,6 +65,10 @@ def query(cfg: TaskExtractorConfig, predicates_df: pl.DataFrame) -> pl.DataFrame
         logger.info("No static variable criteria specified, removing all rows with null timestamps...")
         predicates_df = predicates_df.drop_nulls(subset=["subject_id", "timestamp"])
 
+    if predicates_df.is_empty():
+        logger.warning("No valid rows found after filtering patient demographics. Exiting.")
+        return pl.DataFrame()
+
     logger.info("Identifying possible trigger nodes based on the specified trigger event...")
     prospective_root_anchors = check_constraints({cfg.trigger.predicate: (1, None)}, predicates_df).select(
         "subject_id", pl.col("timestamp").alias("subtree_anchor_timestamp")