From ad2791e0af0fcf8153e553e92d8ef5a158e31ab7 Mon Sep 17 00:00:00 2001 From: Oleksandr Yakushev Date: Tue, 30 Jul 2024 22:15:54 -0400 Subject: [PATCH] Perform batch hydration in O(N) This hadn't been noticed previously since N is generally small, eg. 50 or 60 at most. Working with a Metabase dashboard that loaded 10000 fields (100 each from 100 tables) in a single `select` + `hydrate`, I found this was consuming about 3 seconds. The slow path was effectively a very slow `conj`: taking the `acc` vector, and then for each `annotated-instances` doing: ```clojure (recur (vec (concat acc nil [(first hydrated-instances)])) ...) ``` which is pouring the whole vector into a lazy sequence and back into a vector, once for each instance in the list. The new version first creates an "index" of instances that need hydration, and later uses that index to map the instances in the hydrated instances batch to their places in the initial collection. Co-authored-by: Braden Shepherdson --- src/toucan2/tools/hydrate.clj | 59 ++++++++++------------------- test/toucan2/tools/hydrate_test.clj | 39 ++++++++----------- 2 files changed, 36 insertions(+), 62 deletions(-) diff --git a/src/toucan2/tools/hydrate.clj b/src/toucan2/tools/hydrate.clj index 2322d59..5b0516c 100644 --- a/src/toucan2/tools/hydrate.clj +++ b/src/toucan2/tools/hydrate.clj @@ -430,48 +430,29 @@ ;;; `:needs-hydration?`, replace it with the first hydrated instance; repeat this process until we have spliced all ;;; the hydrated instances back in. -(defn- merge-hydrated-instances - "Merge the annotated instances as returned by [[annotate-instances]] and the hydrated instances as returned - by [[hydrate-annotated-instances]] back into a single un-annotated sequence. - - (merge-hydrated-instances - [{:needs-hydration? false, :instance {:x 1, :y 1}} - {:needs-hydration? false, :instance {:x 2, :y 2}} - {:needs-hydration? true, :instance {:x 3}} - {:needs-hydration? true, :instance {:x 4}} - {:needs-hydration? false, :instance {:x 5, :y 5}} - {:needs-hydration? true, :instance {:x 6}}] - [{:x 3, :y 3} {:x 4, :y 4} {:x 6, :y 6}]) - => - [{:x 1, :y 1} - {:x 2, :y 2} - {:x 3, :y 3} - {:x 4, :y 4} - {:x 5, :y 5} - {:x 6, :y 6}]" - [annotated-instances hydrated-instances] - (loop [acc [], annotated-instances annotated-instances, hydrated-instances hydrated-instances] - (if (empty? hydrated-instances) - (concat acc (map :instance annotated-instances)) - (let [[not-hydrated [_needed-hydration & more]] (split-with (complement :needs-hydration?) annotated-instances)] - (recur (vec (concat acc (map :instance not-hydrated) [(first hydrated-instances)])) - more - (rest hydrated-instances)))))) - -(defn- annotate-instances [model k instances] - (for [instance instances] - {:needs-hydration? (needs-hydration? model k instance) - :instance instance})) - -(defn- hydrate-annotated-instances [model k annotated-instances] - (when-let [instances-that-need-hydration (not-empty (map :instance (filter :needs-hydration? annotated-instances)))] - (batched-hydrate model k instances-that-need-hydration))) +(defn- index-instances-needing-hydration + "Given a list of instances, return a list of the same length and order where each element is a tuple `[i instance]`. + For instances that need hydration, `i` represents the serial number of instances needing hydration. For instances that + don't need hydration, `i` is nil." + [model k instances] + (let [idx (volatile! -1)] + (mapv (fn [instance] + [(when (needs-hydration? model k instance) + (vswap! idx inc)) + instance]) + instances))) (m/defmethod hydrate-with-strategy ::multimethod-batched [model _strategy k instances] - (let [annotated-instances (annotate-instances model k instances) - hydrated-instances (hydrate-annotated-instances model k annotated-instances)] - (merge-hydrated-instances annotated-instances hydrated-instances))) + (let [indexed (index-instances-needing-hydration model k instances) + instances-to-hydrate (not-empty (map second (filter first indexed))) + hydrated-instances (when instances-to-hydrate + (vec (batched-hydrate model k instances-to-hydrate)))] + (mapv (fn [[i unhydrated-instance]] + (if i + (nth hydrated-instances i) + unhydrated-instance)) + indexed))) ;;; Method-Based Simple Hydration (using impls of [[simple-hydrate]]) diff --git a/test/toucan2/tools/hydrate_test.clj b/test/toucan2/tools/hydrate_test.clj index e60c6ca..3b93183 100644 --- a/test/toucan2/tools/hydrate_test.clj +++ b/test/toucan2/tools/hydrate_test.clj @@ -352,29 +352,22 @@ (is (= {:f [:a 100]} (hydrate/hydrate {:f [:a 100]} :p))))) -(deftest ^:parallel merge-hydrated-instances-test - (doseq [[a b c d e f :as order] (take 1 (math.combo/permutations (range 1 7))) - :let [annotated-instances (map (fn [i] - (case (long i) - 1 {:needs-hydration? false, :instance {:x 1, :y 1}} - 2 {:needs-hydration? false, :instance {:x 2, :y 2}} - 3 {:needs-hydration? true, :instance {:x 3}} - 4 {:needs-hydration? true, :instance {:x 4}} - 5 {:needs-hydration? false, :instance {:x 5, :y 5}} - 6 {:needs-hydration? true, :instance {:x 6}})) - order) - hydrated-instances (->> order - (filter #{3 4 6}) - (map (fn [i] - {:x i, :y i})))]] - (testing (pr-str order) - (is (= [{:x a, :y a} - {:x b, :y b} - {:x c, :y c} - {:x d, :y d} - {:x e, :y e} - {:x f, :y f}] - (#'hydrate/merge-hydrated-instances annotated-instances hydrated-instances)))))) +(deftest ^:parallel index-instances-needing-hydration-test + (let [instances [{:x 1, :y 1} + {:x 2, :y 2} + {:x 3} + {:x 4} + {:x 5, :y 5} + {:x 6}]] + (with-redefs [hydrate/needs-hydration? (fn [_ _ instance] + (= (count instance) 1))] + (is (= [[nil {:x 1, :y 1}] + [nil {:x 2, :y 2}] + [0 {:x 3}] + [1 {:x 4}] + [nil {:x 5, :y 5}] + [2 {:x 6}]] + (#'hydrate/index-instances-needing-hydration nil nil instances)))))) (m/defmethod hydrate/batched-hydrate [:default ::is-bird?] [_model _k rows]