From f3bf93550940cdbf7da3c24a2c379a38dd2d72c4 Mon Sep 17 00:00:00 2001 From: Nathan Marz Date: Mon, 6 Jun 2016 20:29:45 -0400 Subject: [PATCH] new semantics for select* for increased performance, new select-any operation with maximal query performance, needs more tests --- scripts/benchmarks.clj | 24 ++- src/clj/com/rpl/specter.cljx | 40 +++-- src/clj/com/rpl/specter/impl.cljx | 188 ++++++++++++++++------ src/clj/com/rpl/specter/macros.clj | 23 ++- src/clj/com/rpl/specter/util_macros.clj | 15 ++ src/java/com/rpl/specter/MutableCell.java | 17 ++ 6 files changed, 236 insertions(+), 71 deletions(-) create mode 100644 src/clj/com/rpl/specter/util_macros.clj create mode 100644 src/java/com/rpl/specter/MutableCell.java diff --git a/scripts/benchmarks.clj b/scripts/benchmarks.clj index e93357e..13ad435 100644 --- a/scripts/benchmarks.clj +++ b/scripts/benchmarks.clj @@ -26,8 +26,10 @@ (defn average-time-ms [iters amt-per-iter afn] (avg - (for [i (range iters)] - (time-ms amt-per-iter afn)))) + ;; treat 1st run as warmup + (next + (for [i (range (inc iters))] + (time-ms amt-per-iter afn))))) (defn compare-benchmark [amt-per-iter afn-map] (let [results (transform [ALL LAST] @@ -42,7 +44,7 @@ ))) (defmacro run-benchmark [name amt-per-iter & exprs] - (let [afn-map (->> exprs (map (fn [e] [`(quote ~e) `(fn [] ~e)])) (into {}))] + (let [afn-map (->> exprs shuffle (map (fn [e] [`(quote ~e) `(fn [] ~e)])) (into {}))] `(do (println "Benchmark:" ~name) (compare-benchmark ~amt-per-iter ~afn-map) @@ -54,12 +56,11 @@ (let [data {:a {:b {:c 1}}} p (comp-paths :a :b :c)] (run-benchmark "get value in nested map" 10000000 + (select-any [:a :b :c] data) + (compiled-select-any p data) (get-in data [:a :b :c]) - (select [:a :b :c] data) - (compiled-select p data) - (-> data :a :b :c vector) - ) - ) + (-> data :a :b :c) + )) ;; because below 1.7 there is no update function @@ -87,6 +88,13 @@ (transform ALL inc data) )) +(let [data [1 2 3 4 5 6 7 8 9 10]] + (run-benchmark "filter a sequence" 1000000 + (doall (filter even? data)) + (select [ALL even?] data) + (select-first (filterer even?) data) + )) + (defn- update-pair [[k v]] [k (inc v)]) diff --git a/src/clj/com/rpl/specter.cljx b/src/clj/com/rpl/specter.cljx index 9d7bac2..ba1ce3e 100644 --- a/src/clj/com/rpl/specter.cljx +++ b/src/clj/com/rpl/specter.cljx @@ -9,6 +9,8 @@ defnav defpathedfn ]] + [com.rpl.specter.util-macros :refer + [doseqres]] ) (:use [com.rpl.specter.protocols :only [Navigator]] #+clj [com.rpl.specter.macros :only @@ -18,6 +20,7 @@ defcollector defnav defpathedfn]] + #+clj [com.rpl.specter.util-macros :only [doseqres]] ) (:require [com.rpl.specter.impl :as i] [clojure.set :as set]) @@ -76,10 +79,19 @@ (defn select-first* - "Returns first element found. Not any more efficient than select, just a convenience" + "Returns first element found." [path structure] (compiled-select-first (i/comp-paths* path) structure)) +(def ^{:doc "Version of select-any that takes in a path pre-compiled with comp-paths"} + compiled-select-any i/compiled-select-any*) + +(def NONE i/NONE) + +(defn select-any* + "Returns any element found." + [path structure] + (compiled-select-any (i/comp-paths* path) structure)) ;; Transformation functions @@ -143,7 +155,7 @@ STOP [] (select* [this structure next-fn] - nil ) + NONE ) (transform* [this structure next-fn] structure )) @@ -165,7 +177,9 @@ (defnav MAP-VALS [] (select* [this structure next-fn] - (doall (mapcat next-fn (vals structure)))) + (doseqres NONE [v (vals structure)] + (next-fn v) + )) (transform* [this structure next-fn] (i/map-vals-transform structure next-fn) )) @@ -212,11 +226,9 @@ continuous-subseqs [pred] (select* [this structure next-fn] - (doall - (mapcat - (fn [[s e]] (i/srange-select structure s e next-fn)) - (i/matching-ranges structure pred) - ))) + (doseqres NONE [[s e] (i/matching-ranges structure pred)] + (i/srange-select structure s e next-fn) + )) (transform* [this structure next-fn] (reduce (fn [structure [s e]] @@ -322,7 +334,8 @@ [k] (select* [this structure next-fn] (if (contains? structure k) - (next-fn (get structure k)))) + (next-fn (get structure k)) + NONE)) (transform* [this structure next-fn] (if (contains? structure k) (assoc structure k (next-fn (get structure k))) @@ -561,11 +574,10 @@ [& paths] (variable-pathed-nav [compiled-paths paths] (select* [this structure next-fn] - (->> compiled-paths - (mapcat #(compiled-select % structure)) - (mapcat next-fn) - doall - )) + (doseqres NONE [c compiled-paths] + (doseqres NONE [e (compiled-select c structure)] + (next-fn e) + ))) (transform* [this structure next-fn] (reduce (fn [structure path] diff --git a/src/clj/com/rpl/specter/impl.cljx b/src/clj/com/rpl/specter/impl.cljx index c1db2ed..4840b0b 100644 --- a/src/clj/com/rpl/specter/impl.cljx +++ b/src/clj/com/rpl/specter/impl.cljx @@ -3,9 +3,12 @@ [com.rpl.specter.prot-opt-invoke :refer [mk-optimized-invocation]] [com.rpl.specter.defhelpers :refer [define-ParamsNeededPath]] + [com.rpl.specter.util-macros :refer [doseqres]] ) (:use [com.rpl.specter.protocols :only - [select* transform* collect-val]]) + [select* transform* collect-val]] + [com.rpl.specter.util-macros :only [doseqres]] +) (:require [com.rpl.specter.protocols :as p] [clojure.walk :as walk] #+clj [clojure.core.reducers :as r] @@ -14,9 +17,17 @@ #+clj [riddley.walk :as riddley] ) #+clj - (:import [com.rpl.specter Util]) + (:import [com.rpl.specter Util MutableCell]) ) +(def NONE ::NONE) + +#+clj +(def kw-identical? identical?) + +#+cljs +(def kw-identical? keyword-identical?) + (defn spy [e] (println "SPY:") (println (pr-str e)) @@ -92,15 +103,18 @@ (dotimes [_ iters] (afn)))) -(deftype ExecutorFunctions [type select-executor transform-executor]) +(deftype ExecutorFunctions [type traverse-executor transform-executor]) (def RichPathExecutor (->ExecutorFunctions :richpath - (fn [params params-idx selector structure] + (fn [params params-idx selector result-fn structure] (selector params params-idx [] structure (fn [_ _ vals structure] - (if-not (identical? [] vals) [(conj vals structure)] [structure])))) + (result-fn + (if (identical? vals []) + structure + (conj vals structure)))))) (fn [params params-idx transformer transform-fn structure] (transformer params params-idx [] structure (fn [_ _ vals structure] @@ -112,8 +126,8 @@ (def LeanPathExecutor (->ExecutorFunctions :leanpath - (fn [params params-idx selector structure] - (selector structure (fn [structure] [structure]))) + (fn [params params-idx selector result-fn structure] + (selector structure result-fn)) (fn [params params-idx transformer transform-fn structure] (transformer structure transform-fn)) )) @@ -420,26 +434,44 @@ ;; cell implementation idea taken from prismatic schema library +#+cljs (defprotocol PMutableCell - #+clj (get_cell [cell]) (set_cell [cell x])) +#+cljs (deftype MutableCell [^:volatile-mutable q] PMutableCell - #+clj (get_cell [cell] q) (set_cell [this x] (set! q x))) +#+cljs (defn mutable-cell ([] (mutable-cell nil)) ([init] (MutableCell. init))) +#+cljs (defn set-cell! [cell val] (set_cell cell val)) +#+cljs (defn get-cell [cell] #+clj (get_cell cell) #+cljs (.-q cell) ) + +#+clj +(defn mutable-cell + ([] (mutable-cell nil)) + ([v] (MutableCell. v))) + +#+clj +(defn get-cell [^MutableCell c] + (.get c)) + +#+clj +(defn set-cell! [^MutableCell c v] + (.set c v)) + + (defn update-cell! [cell afn] (let [ret (afn (get-cell cell))] (set-cell! cell ret) @@ -538,16 +570,94 @@ ret )))) -(defn- conj-all! [cell elems] - (set-cell! cell (concat (get-cell cell) elems))) +(defn transform-fns-field [^CompiledPath path] + (.-transform-fns path)) -(defn compiled-select* - [^com.rpl.specter.impl.CompiledPath path structure] - (let [^com.rpl.specter.impl.TransformFunctions tfns (.-transform-fns path) - ^com.rpl.specter.impl.ExecutorFunctions ex (.-executors tfns)] - ((.-select-executor ex) (.-params path) (.-params-idx path) (.-selector tfns) structure) +(defn executors-field [^TransformFunctions tfns] + (.-executors tfns)) + +(defn traverse-executor-field [^ExecutorFunctions ex] + (.-traverse-executor ex)) + +(defn params-field [^CompiledPath path] + (.-params path)) + +(defn params-idx-field [^CompiledPath path] + (.-params-idx path)) + +(defn selector-field [^TransformFunctions tfns] + (.-selector tfns)) + +;; amazingly doing this as a macro shows a big effect in the +;; benchmark for getting a value out of a nested map +(defmacro compiled-traverse* + [path result-fn structure] + `(let [tfns# (transform-fns-field ~path) + ex# (executors-field tfns#)] + ((traverse-executor-field ex#) + (params-field ~path) + (params-idx-field ~path) + (selector-field tfns#) + ~result-fn + ~structure) )) +(defn compiled-select* [path structure] + (let [res (mutable-cell (transient [])) + result-fn (fn [structure] + (let [curr (get-cell res)] + (set-cell! res (conj! curr structure)) + ))] + (compiled-traverse* path result-fn structure) + (persistent! (get-cell res)) + )) + +(defn compiled-select-one* [path structure] + (let [res (mutable-cell ::NOTHING) + result-fn (fn [structure] + (let [curr (get-cell res)] + (if (kw-identical? curr ::NOTHING) + (set-cell! res structure) + (throw-illegal "More than one element found in structure: " structure) + )))] + (compiled-traverse* path result-fn structure) + (let [ret (get-cell res)] + (if (kw-identical? ret ::NOTHING) + nil + ret + )))) + +(defn compiled-select-one!* [path structure] + (let [res (mutable-cell ::NOTHING) + result-fn (fn [structure] + (let [curr (get-cell res)] + (if (kw-identical? curr ::NOTHING) + (set-cell! res structure) + (throw-illegal "More than one element found in structure: " structure) + )))] + (compiled-traverse* path result-fn structure) + (let [ret (get-cell res)] + (if (kw-identical? ::NOTHING ret) + (throw-illegal "Found no elements for select-one! on " structure)) + ret + ))) + +(defn compiled-select-first* [path structure] + (let [res (mutable-cell ::NOTHING) + result-fn (fn [structure] + (let [curr (get-cell res)] + (if (kw-identical? curr ::NOTHING) + (set-cell! res structure))))] + (compiled-traverse* path result-fn structure) + (let [ret (get-cell res)] + (if (kw-identical? ret ::NOTHING) + nil + ret + )))) + +(defn compiled-select-any* [path structure] + (compiled-traverse* path identity structure)) + (defn compiled-transform* [^com.rpl.specter.impl.CompiledPath path transform-fn structure] (let [^com.rpl.specter.impl.TransformFunctions tfns (.-transform-fns path) @@ -565,14 +675,17 @@ [compiled-path structure] (not (not-selected?* compiled-path structure))) -;; returns vector of all results (defn walk-select [pred continue-fn structure] - (let [ret (mutable-cell []) + (let [ret (mutable-cell NONE) walker (fn this [structure] (if (pred structure) - (conj-all! ret (continue-fn structure)) - (walk/walk this identity structure)) - )] + (let [r (continue-fn structure)] + (if-not (kw-identical? r NONE) + (set-cell! ret r)) + r + ) + (walk/walk this identity structure) + ))] (walker structure) (get-cell ret) )) @@ -584,13 +697,9 @@ (assoc structure akey (next-fn (get structure akey)) )) -#+clj (defn all-select [structure next-fn] - (into [] (r/mapcat next-fn structure))) - -#+cljs -(defn all-select [structure next-fn] - (into [] (mapcat #(next-fn %)) structure)) + (doseqres NONE [e structure] + (next-fn e))) #+cljs (defn queue? [coll] @@ -758,7 +867,8 @@ PosNavigator (select* [this structure next-fn] (if-not (fast-empty? structure) - (next-fn ((.-getter this) structure)))) + (next-fn ((.-getter this) structure)) + NONE)) (transform* [this structure next-fn] (if (fast-empty? structure) structure @@ -825,8 +935,7 @@ (let [apath (if (then-tester structure) late-then late-else)] - (doall (mapcat next-fn (compiled-select* apath structure))) - )) + (compiled-traverse* apath next-fn structure))) (defn if-transform [structure next-fn then-tester late-then late-else] (let [apath (if (then-tester structure) @@ -837,7 +946,8 @@ (defn filter-select [afn structure next-fn] (if (afn structure) - (next-fn structure))) + (next-fn structure) + NONE)) (defn filter-transform [afn structure next-fn] (if (afn structure) @@ -945,6 +1055,7 @@ (let [afn (aget ^objects params params-idx)] (if (afn structure) (next-fn params (inc params-idx) vals structure) + NONE ))) (fn [params params-idx vals structure next-fn] (let [afn (aget ^objects params params-idx)] @@ -1181,21 +1292,6 @@ )) -(defn compiled-select-one* [path structure] - (let [res (compiled-select* path structure)] - (when (> (count res) 1) - (throw-illegal "More than one element found for params: " path structure)) - (first res) - )) - -(defn compiled-select-one!* [path structure] - (let [res (compiled-select* path structure)] - (when (not= 1 (count res)) (throw-illegal "Expected exactly one element for params: " path structure)) - (first res) - )) - -(defn compiled-select-first* [path structure] - (first (compiled-select* path structure))) (defn compiled-setval* [path val structure] (compiled-transform* path (fn [_] val) structure)) diff --git a/src/clj/com/rpl/specter/macros.clj b/src/clj/com/rpl/specter/macros.clj index 4846233..1db55aa 100644 --- a/src/clj/com/rpl/specter/macros.clj +++ b/src/clj/com/rpl/specter/macros.clj @@ -1,5 +1,6 @@ (ns com.rpl.specter.macros - (:require [com.rpl.specter.impl :as i]) + (:require [com.rpl.specter.impl :as i] + [clojure.walk :as cljwalk]) ) (defn ^:no-doc gensyms [amt] @@ -456,7 +457,15 @@ (-> &env :locals keys set) ;cljs (-> &env keys set) ;clj ) - used-locals (vec (i/walk-select local-syms vector path)) + used-locals-cell (i/mutable-cell []) + _ (cljwalk/postwalk + (fn [e] + (if (local-syms e) + (i/update-cell! used-locals-cell #(conj % e)) + e + )) + path) + used-locals (i/get-cell used-locals-cell) ;; note: very important to use riddley's macroexpand-all here, so that ;; &env is preserved in any potential nested calls to select (like via @@ -549,13 +558,21 @@ `(i/compiled-select-one* (path ~apath) ~structure)) (defmacro select-first - "Returns first element found. Not any more efficient than select, just a convenience. + "Returns first element found. This macro will attempt to do inline factoring and caching of the path, falling back to compiling the path on every invocation it it's not possible to factor/cache the path." [apath structure] `(i/compiled-select-first* (path ~apath) ~structure)) +(defmacro select-any + "Returns first element found. + This macro will attempt to do inline factoring and caching of the path, falling + back to compiling the path on every invocation it it's not possible to + factor/cache the path." + [apath structure] + `(i/compiled-select-any* (path ~apath) ~structure)) + (defmacro transform "Navigates to each value specified by the path and replaces it by the result of running the transform-fn on it. diff --git a/src/clj/com/rpl/specter/util_macros.clj b/src/clj/com/rpl/specter/util_macros.clj new file mode 100644 index 0000000..3a015e5 --- /dev/null +++ b/src/clj/com/rpl/specter/util_macros.clj @@ -0,0 +1,15 @@ +(ns com.rpl.specter.util-macros) + +(defmacro doseqres [backup-res-kw [n aseq] & body] + (let [platform (if (contains? &env :locals) :cljs :clj) + idfn (if (= platform :clj) 'identical? 'keyword-identical?)] + `(reduce + (fn [curr# ~n] + (let [ret# (do ~@body)] + (if (~idfn ret# ~backup-res-kw) + curr# + ret# + ))) + ~backup-res-kw + ~aseq + ))) diff --git a/src/java/com/rpl/specter/MutableCell.java b/src/java/com/rpl/specter/MutableCell.java new file mode 100644 index 0000000..0d14a6b --- /dev/null +++ b/src/java/com/rpl/specter/MutableCell.java @@ -0,0 +1,17 @@ +package com.rpl.specter; + +public class MutableCell { + private Object o; + + public MutableCell(Object o) { + this.o = o; + } + + public Object get() { + return o; + } + + public void set(Object o) { + this.o = o; + } +}