diff --git a/src/erdos/algo/leapfrog/triejoin.clj b/src/erdos/algo/leapfrog/triejoin.clj index d91cb37..c736275 100644 --- a/src/erdos/algo/leapfrog/triejoin.clj +++ b/src/erdos/algo/leapfrog/triejoin.clj @@ -395,15 +395,20 @@ :when (= (count route) (count variables))] (zipmap variables route))) -#_ (defn singleton - "Returns an iterator over a single value." - [value] - (reify' {} - (->next [_] nil) - (get-key [_] value) - (->seek [t k] (when (= k value) t)) - (trie-open [_] nil))) + "Returns a TrieIterator carrying a single key. trie-open returns child + (default nil). Metadata defaults to that of child. When the singleton is + used as a leaf, annotation-value is exposed via the Annotated protocol." + ([value] (singleton value nil nil)) + ([value child] (singleton value child nil)) + ([value child annotation-value] + (reify' (meta child) + (->next [_] nil) + (get-key [_] value) + (->seek [t k] (when (compare<= k value) t)) + (trie-open [_] child) + (annotation [_] annotation-value) + (toString [_] (str ""))))) (defn seek-to @@ -415,7 +420,7 @@ iterator))) -(defn- reorder-1 [ctx new-var-order old-var-order old-iterator] +(defn- reorder-1 [ctx new-var-order old-var-order old-iterator annotation-value] (assert (map? ctx)) (assert (= (set new-var-order) (into (set old-var-order) (keys ctx)))) @@ -427,42 +432,51 @@ old-iterator (= (first new-var-order) (first old-var-order)) - ((fn ctor [old-iterator] - (when old-iterator - (reify' (meta old-iterator) - (get-key [_] (get-key old-iterator)) - (->next [_] (ctor (->next old-iterator))) - (->seek [_ k] (ctor (->seek old-iterator k))) - (trie-open [_] - (reorder-1 ctx (next new-var-order) (next old-var-order) (trie-open old-iterator)))))) - old-iterator) + (let [leaf? (empty? (next new-var-order))] + ((fn ctor [old-iterator] + (when old-iterator + (reify' (meta old-iterator) + (get-key [_] (get-key old-iterator)) + (->next [_] (ctor (->next old-iterator))) + (->seek [_ k] (ctor (->seek old-iterator k))) + (trie-open [_] + (let [next-old (next old-var-order) + next-ann (if (empty? next-old) (annotation old-iterator) annotation-value)] + (reorder-1 ctx (next new-var-order) next-old (trie-open old-iterator) next-ann))) + (annotation [_] (when leaf? (annotation old-iterator)))))) + old-iterator)) (contains? ctx (first new-var-order)) - (reify' {} - (get-key [_] (get ctx (first new-var-order))) - (->next [_] nil) - (->seek [_ k] (when (= k (get ctx (first new-var-order))) k)) - (trie-open [_] - ;; TODO: inline this check to the top of reorder-1 - (reorder-1 (dissoc ctx (first new-var-order)) - (next new-var-order) - old-var-order - old-iterator))) + (let [leaf? (empty? (next new-var-order))] + (reify' {} + (get-key [_] (get ctx (first new-var-order))) + (->next [_] nil) + (->seek [_ k] (when (= k (get ctx (first new-var-order))) k)) + (trie-open [_] + ;; TODO: inline this check to the top of reorder-1 + (reorder-1 (dissoc ctx (first new-var-order)) + (next new-var-order) + old-var-order + old-iterator + annotation-value)) + (annotation [_] (when leaf? annotation-value)))) :else - (->> (iterate ->next old-iterator) - (take-while some?) - (keep #(reorder-1 (assoc ctx (first old-var-order) (get-key %)) - new-var-order - (next old-var-order) - (trie-open %))) - (map #(hash-map :variables new-var-order :trie-iterator %)) - (union-iterator)))) + (let [advance-leaf? (empty? (next old-var-order))] + (-> (->> (iterate ->next old-iterator) + (take-while some?) + (keep #(reorder-1 (assoc ctx (first old-var-order) (get-key %)) + new-var-order + (next old-var-order) + (trie-open %) + (if advance-leaf? (annotation %) annotation-value))) + (map #(hash-map :variables new-var-order :trie-iterator %))) + (union-iterator (fn [a _] a)))))) (defn reorder [new-var-order iter] {:variables new-var-order - :trie-iterator (reorder-1 {} new-var-order (:variables iter) (:trie-iterator iter))}) + :trie-iterator (reorder-1 {} new-var-order (:variables iter) (:trie-iterator iter) nil)}) (defn eager-iterator [{:keys [trie-iterator variables]}] @@ -476,6 +490,7 @@ (get-key [_] (get-key iter)) (->seek [_ k] (ctor (->seek iter k) depth)) (trie-open [_] child) + (annotation [_] (when (zero? depth) (annotation iter))) (toString [_] "")))))) trie-iterator (dec (count variables)))) @@ -526,3 +541,112 @@ {:variables (:variables rel) :trie-iterator (filtering-iterator rel filter-variables predicate)}) + +(defn- insert-after-last [variables subseq-variables target-key] + (if (empty? subseq-variables) + (vec (cons target-key variables)) + (let [last-sub (last subseq-variables) + [head tail] (split-with #(not= last-sub %) variables)] + (vec (concat head [last-sub target-key] (rest tail)))))) + + +(defn mapping-iterator [{:keys [variables trie-iterator]} subseq-variables function target-key] + (assert (subseq? subseq-variables variables)) + (assert (not-any? #{target-key} variables)) + (if (empty? subseq-variables) + (singleton (function) trie-iterator) + ((fn ctor [iter vars-rem subseq-rem bindings] + (cond + (nil? iter) nil + (empty? subseq-rem) iter + + :else + (reify' (meta iter) + (->next [_] (ctor (->next iter) vars-rem subseq-rem bindings)) + (get-key [_] (get-key iter)) + (->seek [_ k] (ctor (->seek iter k) vars-rem subseq-rem bindings)) + (trie-open [_] + (let [bind-here? (= (first vars-rem) (first subseq-rem)) + bindings' (if bind-here? (conj bindings (get-key iter)) bindings) + subseq' (if bind-here? (next subseq-rem) subseq-rem)] + (if (and bind-here? (empty? subseq')) + (let [child (trie-open iter)] + (singleton (apply function bindings') + child + (when (nil? child) (annotation iter)))) + (ctor (trie-open iter) (next vars-rem) subseq' bindings')))) + (toString [_] "")))) + trie-iterator variables subseq-variables []))) + + +(defn mapping + "Returns a relation map {:variables … :trie-iterator …} that wraps rel, + inserting a new level into the variable ordering immediately after the + last element of subseq-variables (or at the beginning when + subseq-variables is empty). The new level's key is produced by applying + function to the bindings of subseq-variables along the path. + subseq-variables must be a subseq of rel's :variables, target-key must + not already appear in :variables, and function's arity must match + subseq-variables." + [rel subseq-variables function target-key] + {:variables (insert-after-last (:variables rel) subseq-variables target-key) + :trie-iterator (mapping-iterator rel subseq-variables function target-key)}) + + +(defn- collect-annotation [vars-rem iter combine-fn] + (when iter + (if (empty? (next vars-rem)) + (annotation iter) + (some->> (trie-open iter) + (iterate ->next) + (take-while some?) + (keep #(collect-annotation (next vars-rem) % combine-fn)) + not-empty + (reduce combine-fn))))) + + +(defn omit-iterator + "Returns a TrieIterator over subseq-variables that projects rel onto + subseq-variables, omitting layers not present in subseq-variables. + Annotations of original leaves whose projections coincide are reduced + via combine-fn." + [iter-map subseq-variables combine-fn] + (assert (subseq? subseq-variables (:variables iter-map))) + ((fn ctor [iter vars-rem subseq-rem] + (cond + (nil? iter) nil + + (= (first vars-rem) (first subseq-rem)) + (let [new-leaf? (empty? (next subseq-rem))] + (reify' (meta iter) + (get-key [_] (get-key iter)) + (->next [_] (ctor (->next iter) vars-rem subseq-rem)) + (->seek [_ k] (ctor (->seek iter k) vars-rem subseq-rem)) + (trie-open [_] + (when-not new-leaf? + (ctor (trie-open iter) (next vars-rem) (next subseq-rem)))) + (annotation [_] + (when new-leaf? (collect-annotation vars-rem iter combine-fn))) + (toString [_] ""))) + + :else + (union-iterator + (for [it (take-while some? (iterate ->next iter)) + :let [child (ctor (trie-open it) (next vars-rem) subseq-rem)] + :when child] + {:variables subseq-rem, :trie-iterator child}) + combine-fn))) + (eager-iterator iter-map) (:variables iter-map) subseq-variables)) + + +(defn omit + "Returns a relation map {:variables subseq-variables :trie-iterator …} + that projects rel onto subseq-variables, dropping the variables of rel + not present in subseq-variables. subseq-variables must be a subsequence + of rel's :variables. Annotations of original tuples whose projections + coincide are reduced via combine-fn. Phantom paths in the source are + pruned via `eager` so they're not promoted to complete tuples." + [rel subseq-variables combine-fn] + {:variables subseq-variables + :trie-iterator (omit-iterator rel subseq-variables combine-fn)}) + diff --git a/test/erdos/algo/leapfrog/triejoin_test.clj b/test/erdos/algo/leapfrog/triejoin_test.clj index 096f958..2687545 100644 --- a/test/erdos/algo/leapfrog/triejoin_test.clj +++ b/test/erdos/algo/leapfrog/triejoin_test.clj @@ -347,6 +347,65 @@ (trie-routes (filtering-iterator rel [:a] even?))))))) +(deftest mapping-tests + (testing "empty subseq prepends a constant level" + (let [rel (test-trie-iter [:a :b] [[1 10] [2 20]]) + out (mapping rel [] (constantly 99) :c)] + (is (= [:c :a :b] (:variables out))) + (is (= [[99 1 10] [99 2 20]] + (trie-routes (:trie-iterator out)))))) + + (testing "single-arg mapping inserts level immediately after subseq variable" + (let [rel (test-trie-iter [:n] [[1] [2] [3]]) + out (mapping rel [:n] #(* % %) :sq)] + (is (= [:n :sq] (:variables out))) + (is (= [[1 1] [2 4] [3 9]] + (trie-routes (:trie-iterator out)))))) + + (testing "multi-arg mapping passes bindings spread as args" + (let [rel (test-trie-iter [:a :b] [[1 10] [2 20] [3 30]]) + out (mapping rel [:a :b] + :sum)] + (is (= [:a :b :sum] (:variables out))) + (is (= [[1 10 11] [2 20 22] [3 30 33]] + (trie-routes (:trie-iterator out)))))) + + (testing "non-prefix subseq inserts after the last subseq variable" + (let [rel (test-trie-iter [:a :b :c] [[1 10 100] [2 20 200]]) + out (mapping rel [:a :c] + :sum)] + (is (= [:a :b :c :sum] (:variables out))) + (is (= [[1 10 100 101] [2 20 200 202]] + (trie-routes (:trie-iterator out)))))) + + (testing "insertion position depends on last subseq variable" + (let [rel (test-trie-iter [:a :b :c] [[1 10 100]])] + (is (= [:m :a :b :c] (:variables (mapping rel [] (constantly 0) :m)))) + (is (= [:a :m :b :c] (:variables (mapping rel [:a] (constantly 0) :m)))) + (is (= [:a :b :m :c] (:variables (mapping rel [:a :b] (constantly 0) :m)))) + (is (= [:a :b :c :m] (:variables (mapping rel [:a :b :c] (constantly 0) :m)))))) + + (testing "->seek on inserted singleton level honors compare<= semantics" + (let [rel (test-trie-iter [:n] [[5]]) + out (mapping rel [:n] #(* 2 %) :double) + singleton (-> out :trie-iterator trie-open)] + (is (= 10 (get-key singleton))) + (is (= 10 (-> singleton (->seek 5) get-key))) + (is (= 10 (-> singleton (->seek 10) get-key))) + (is (nil? (->seek singleton 11))) + (is (nil? (->next singleton))))) + + (testing "mapping composes with relations to populate the new variable" + (let [rel (test-trie-iter [:a :b] [[1 10] [2 20]]) + out (mapping rel [:a :b] * :prod)] + (is (= [{:a 1 :b 10 :prod 10} + {:a 2 :b 20 :prod 40}] + (relations out))))) + + (testing "mapping-iterator returns the trie-iterator directly" + (let [rel (test-trie-iter [:n] [[1] [2] [3]])] + (is (= [[1 1] [2 4] [3 9]] + (trie-routes (mapping-iterator rel [:n] #(* % %) :sq))))))) + + (deftest annotated-trie-iterator-tests (testing "Map input stores annotation at the leaf, accessible via Annotated" (let [iter (trie-iterator {[1 2 3] :foo})] @@ -403,3 +462,141 @@ (is (= #{[1 2 9] [1 3 9]} (set (trie-routes (:trie-iterator j)))))))) + +(deftest annotated-mapping-tests + (testing "mapping inserted at the new leaf carries the underlying leaf's annotation" + (let [rel {:variables [:a :b] :trie-iterator (trie-iterator {[1 10] :alpha + [2 20] :beta})} + out (mapping rel [:a :b] + :sum) + leaf (-> out :trie-iterator trie-open trie-open)] + (is (= [:a :b :sum] (:variables out))) + (is (= 11 (get-key leaf))) + (is (= :alpha (annotation leaf))))) + + (testing "mapping inserted before the original leaf preserves leaf annotations" + (let [rel {:variables [:a :b] :trie-iterator (trie-iterator {[1 10] :alpha + [2 20] :beta})} + out (mapping rel [:a] #(* 2 %) :double) + leaf (-> out :trie-iterator trie-open trie-open)] + (is (= [:a :double :b] (:variables out))) + (is (= 10 (get-key leaf))) + (is (= :alpha (annotation leaf))))) + + (testing "mapping with empty subseq preserves leaf annotations" + (let [rel {:variables [:a :b] :trie-iterator (trie-iterator {[1 10] :alpha})} + out (mapping rel [] (constantly 99) :c) + leaf (-> out :trie-iterator trie-open trie-open)] + (is (= [:c :a :b] (:variables out))) + (is (= :alpha (annotation leaf))))) + + (testing "mapping over a single-variable rel becomes a leaf-level singleton with annotation" + (let [rel {:variables [:n] :trie-iterator (trie-iterator {[5] :marker})} + out (mapping rel [:n] #(* % %) :sq) + leaf (-> out :trie-iterator trie-open)] + (is (= [:n :sq] (:variables out))) + (is (= 25 (get-key leaf))) + (is (= :marker (annotation leaf))))) + + (testing "mapping inserted in the middle preserves leaf annotation" + (let [rel {:variables [:a :b :c] :trie-iterator (trie-iterator {[1 2 3] :gamma})} + out (mapping rel [:a :b] + :m) + leaf (-> out :trie-iterator trie-open trie-open trie-open)] + (is (= [:a :b :m :c] (:variables out))) + (is (= 3 (get-key leaf))) + (is (= :gamma (annotation leaf)))))) + + +(deftest annotated-reorder-tests + (testing "no-op reorder preserves annotation" + (let [rel {:variables [:a :b] :trie-iterator (trie-iterator {[1 2] :alpha})} + out (reorder [:a :b] rel) + leaf (-> out :trie-iterator trie-open)] + (is (= [:a :b] (:variables out))) + (is (= :alpha (annotation leaf))))) + + (testing "swapping two variables preserves the annotation of each tuple" + (let [rel {:variables [:a :b] :trie-iterator (trie-iterator {[1 10] :x + [1 20] :y + [2 10] :z})} + out (reorder [:b :a] rel)] + (is (= [:b :a] (:variables out))) + (is (= {[10 1] :x, [10 2] :z, [20 1] :y} + (into {} + (for [b-iter (iterate ->next (:trie-iterator out)) + :while b-iter + :let [a-iter (trie-open b-iter)] + leaf (iterate ->next a-iter) + :while leaf] + [[(get-key b-iter) (get-key leaf)] (annotation leaf)])))))) + + (testing "reorder swapping last two of three variables preserves leaf annotations" + (let [rel {:variables [:a :b :c] :trie-iterator (trie-iterator {[1 2 3] :p + [1 4 5] :q})} + out (reorder [:a :c :b] rel)] + (is (= [:a :c :b] (:variables out))) + (is (= {[1 3 2] :p, [1 5 4] :q} + (into {} + (for [a-iter (iterate ->next (:trie-iterator out)) + :while a-iter + :let [c-iter (trie-open a-iter)] + c-iter (iterate ->next c-iter) + :while c-iter + :let [b-iter (trie-open c-iter)] + leaf (iterate ->next b-iter) + :while leaf] + [[(get-key a-iter) (get-key c-iter) (get-key leaf)] + (annotation leaf)]))))))) + + +(defn- annotated-tuples + "Walks rel's trie and returns a {path → leaf-annotation} map." + [{:keys [trie-iterator]}] + (letfn [(walk [iter prefix] + (mapcat (fn [it] + (let [path (conj prefix (get-key it))] + (if-let [child (trie-open it)] + (walk child path) + [[path (annotation it)]]))) + (take-while some? (iterate ->next iter))))] + (into {} (walk trie-iterator [])))) + +(deftest omit-tests + (testing "identity omit preserves all tuples" + (let [out (omit (test-trie-iter [:a :b] [[1 10] [2 20]]) [:a :b] +)] + (is (= [:a :b] (:variables out))) + (is (= {[1 10] nil, [2 20] nil} (annotated-tuples out))))) + + (testing "omitting the trailing variable combines merged annotations" + (let [out (omit (test-trie-iter [:a :b] {[1 10] 1, [1 20] 2, [2 30] 4}) [:a] +)] + (is (= [:a] (:variables out))) + (is (= {[1] 3, [2] 4} (annotated-tuples out))))) + + (testing "omitting the leading variable combines across collapsed paths" + (let [out (omit (test-trie-iter [:a :b] {[1 10] 5, [2 10] 7, [3 20] 9}) [:b] +)] + (is (= [:b] (:variables out))) + (is (= {[10] 12, [20] 9} (annotated-tuples out))))) + + (testing "omitting a middle variable combines across the omitted layer" + (let [out (omit (test-trie-iter [:a :b :c] {[1 5 10] 100, [1 6 10] 200, [1 6 20] 50}) [:a :c] +)] + (is (= [:a :c] (:variables out))) + (is (= {[1 10] 300, [1 20] 50} (annotated-tuples out))))) + + (testing "combine-fn is not invoked when projections are unique" + (let [calls (atom 0) + cb (fn [a b] (swap! calls inc) (+ a b))] + (annotated-tuples (omit (test-trie-iter [:a :b] {[1 10] 1, [2 20] 2}) [:a] cb)) + (is (= 0 @calls)))) + + (testing "phantom paths from antijoin are not promoted to complete tuples" + (let [base (test-trie-iter [:a :b :c] [[1 5 10] [2 5 10] [2 6 20] [3 7 30]]) + rel (trie-antijoin base (test-trie-iter [:c] [[10]]))] + (is (= [[1 5] [2 5] [2 6 20] [3 7 30]] + (trie-routes (:trie-iterator rel))) + "sanity: rel has phantom paths [1 5] and [2 5]") + (is (= [{:b 6 :c 20} {:b 7 :c 30}] + (relations (omit rel [:b :c] +)))) + (is (= [{:a 2 :c 20} {:a 3 :c 30}] + (relations (omit rel [:a :c] +)))) + (is (= [{:a 2 :b 6} {:a 3 :b 7}] + (relations (omit rel [:a :b] +))))))) +