From 219c996c5e3a1cf2072504e03449b22823faae3b Mon Sep 17 00:00:00 2001 From: Janos Erdos Date: Wed, 6 May 2026 20:13:52 +0200 Subject: [PATCH 1/3] feat: filtering iterator --- src/erdos/algo/leapfrog/triejoin.clj | 37 ++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/erdos/algo/leapfrog/triejoin.clj b/src/erdos/algo/leapfrog/triejoin.clj index c80ba8b..adeeb15 100644 --- a/src/erdos/algo/leapfrog/triejoin.clj +++ b/src/erdos/algo/leapfrog/triejoin.clj @@ -421,3 +421,40 @@ {:variables (:variables rel) :trie-iterator (eager-iterator rel)}) + +(defn filtering-iterator [{:keys [variables trie-iterator]} filter-variables predicate] + (assert (subseq? filter-variables variables)) + ((fn ctor [iter vars-rem filter-rem bindings] + (cond + (nil? iter) nil + (empty? filter-rem) iter + + (and (= (first vars-rem) (first filter-rem)) + (nil? (next filter-rem)) + (not (apply predicate (conj bindings (get-key iter))))) + (recur (->next iter) vars-rem filter-rem bindings) + + :else + (reify' (meta iter) + (->next [_] (ctor (->next iter) vars-rem filter-rem bindings)) + (get-key [_] (get-key iter)) + (->seek [_ k] (ctor (->seek iter k) vars-rem filter-rem bindings)) + (trie-open [_] + (let [filter-here? (= (first vars-rem) (first filter-rem))] + (ctor (trie-open iter) (next vars-rem) + (if filter-here? (next filter-rem) filter-rem) + (if filter-here? (conj bindings (get-key iter)) bindings)))) + (toString [_] "")))) + trie-iterator variables filter-variables [])) + + +(defn filtering + "Returns a relation map {:variables … :trie-iterator …} that wraps rel, + restricting the trie so a subtree under a path is only emitted when + predicate returns true for the bindings of filter-variables along that + path. filter-variables must be a subseq of rel's :variables and predicate + arity matches filter-variables." + [rel filter-variables predicate] + {:variables (:variables rel) + :trie-iterator (filtering-iterator rel filter-variables predicate)}) + From 5a41140812b1dd0460ec5adf1b075d7d6e63a9a4 Mon Sep 17 00:00:00 2001 From: Janos Erdos Date: Wed, 6 May 2026 20:14:36 +0200 Subject: [PATCH 2/3] Add naive filter test for filtering matches --- test/erdos/algo/leapfrog/triejoin_gen_test.clj | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/erdos/algo/leapfrog/triejoin_gen_test.clj b/test/erdos/algo/leapfrog/triejoin_gen_test.clj index 08a1fa5..e3533dc 100644 --- a/test/erdos/algo/leapfrog/triejoin_gen_test.clj +++ b/test/erdos/algo/leapfrog/triejoin_gen_test.clj @@ -139,3 +139,15 @@ perm (gen/shuffle variables)] (= (tree->maps rel) (tree->maps (reorder perm rel))))) + + +(defspec filtering-matches-naive-filter + 1000 + (for-props [variables gen-variables + filter-vars (gen-subsequence-of variables) + rel (gen-n-tuple-trie variables)] + (= (->> (relations rel) + (filter (fn [m] (even? (reduce + (map m filter-vars)))))) + (->> (filtering rel filter-vars (fn [& xs] (even? (reduce + xs)))) + (relations))))) + From 771aa0b4155d57e44df91123358d02702405b64d Mon Sep 17 00:00:00 2001 From: Janos Erdos Date: Wed, 6 May 2026 20:15:19 +0200 Subject: [PATCH 3/3] Add filtering tests for triejoin functionality --- test/erdos/algo/leapfrog/triejoin_test.clj | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/test/erdos/algo/leapfrog/triejoin_test.clj b/test/erdos/algo/leapfrog/triejoin_test.clj index 73f5e73..b326e13 100644 --- a/test/erdos/algo/leapfrog/triejoin_test.clj +++ b/test/erdos/algo/leapfrog/triejoin_test.clj @@ -290,3 +290,41 @@ (testing "empty relation yields empty sequence" (is (empty? (relations {:variables [:a :b] :trie-iterator nil}))))) + + +(deftest filtering-tests + (testing "predicate always true is a no-op over the relation" + (is (= (trie-routes (:trie-iterator exponents)) + (trie-routes (:trie-iterator (filtering exponents [:n1] (constantly true)))))) + (is (= (trie-routes (:trie-iterator exponents)) + (trie-routes (:trie-iterator (filtering exponents [:n1 :n2] (constantly true))))))) + + (testing "predicate always false prunes all child subtrees from filter-var depth" + (let [rel (test-trie-iter [:a :b] [[1 10] [2 20] [3 30]]) + out (filtering rel [:a] (constantly false))] + (is (empty? (trie-routes (:trie-iterator out)))))) + + (testing "filtering on prefix variable: keys past predicate-false branch have no children" + (let [out (filtering exponents [:n1] odd?)] + (is (= [[1 1 1] [3 9 27]] + (trie-routes (:trie-iterator out)))))) + + (testing "filtering on multiple variables uses bindings in filter-var order" + (let [out (filtering exponents [:n1 :n2] <)] + (is (= [[1] [2 4 8] [3 9 27] [4 16 64]] + (trie-routes (:trie-iterator out)))))) + + (testing "filter variables can be a non-prefix subseq of node variables" + (let [out (filtering exponents [:n2] even?)] + (is (= [[1] [2 4 8] [3] [4 16 64]] + (trie-routes (:trie-iterator out)))))) + + (testing "filtering composes with eager to drop phantom paths" + (is (= [{:n1 2 :n2 4 :n3 8} {:n1 3 :n2 9 :n3 27} {:n1 4 :n2 16 :n3 64}] + (relations (eager (filtering exponents [:n1 :n2] <)))))) + + (testing "filtering-iterator returns the trie-iterator directly" + (let [rel (test-trie-iter [:a :b] [[1 10] [2 20] [3 30] [4 40]])] + (is (= [[2 20] [4 40]] + (trie-routes (filtering-iterator rel [:a] even?))))))) +