new semantics for select* for increased performance, new select-any operation with maximal query performance, needs more tests

This commit is contained in:
Nathan Marz 2016-06-06 20:29:45 -04:00
parent 4379a3dc9c
commit f3bf935509
6 changed files with 236 additions and 71 deletions

View file

@ -26,8 +26,10 @@
(defn average-time-ms [iters amt-per-iter afn]
(avg
(for [i (range iters)]
(time-ms amt-per-iter afn))))
;; treat 1st run as warmup
(next
(for [i (range (inc iters))]
(time-ms amt-per-iter afn)))))
(defn compare-benchmark [amt-per-iter afn-map]
(let [results (transform [ALL LAST]
@ -42,7 +44,7 @@
)))
(defmacro run-benchmark [name amt-per-iter & exprs]
(let [afn-map (->> exprs (map (fn [e] [`(quote ~e) `(fn [] ~e)])) (into {}))]
(let [afn-map (->> exprs shuffle (map (fn [e] [`(quote ~e) `(fn [] ~e)])) (into {}))]
`(do
(println "Benchmark:" ~name)
(compare-benchmark ~amt-per-iter ~afn-map)
@ -54,12 +56,11 @@
(let [data {:a {:b {:c 1}}}
p (comp-paths :a :b :c)]
(run-benchmark "get value in nested map" 10000000
(select-any [:a :b :c] data)
(compiled-select-any p data)
(get-in data [:a :b :c])
(select [:a :b :c] data)
(compiled-select p data)
(-> data :a :b :c vector)
)
)
(-> data :a :b :c)
))
;; because below 1.7 there is no update function
@ -87,6 +88,13 @@
(transform ALL inc data)
))
(let [data [1 2 3 4 5 6 7 8 9 10]]
(run-benchmark "filter a sequence" 1000000
(doall (filter even? data))
(select [ALL even?] data)
(select-first (filterer even?) data)
))
(defn- update-pair [[k v]]
[k (inc v)])

View file

@ -9,6 +9,8 @@
defnav
defpathedfn
]]
[com.rpl.specter.util-macros :refer
[doseqres]]
)
(:use [com.rpl.specter.protocols :only [Navigator]]
#+clj [com.rpl.specter.macros :only
@ -18,6 +20,7 @@
defcollector
defnav
defpathedfn]]
#+clj [com.rpl.specter.util-macros :only [doseqres]]
)
(:require [com.rpl.specter.impl :as i]
[clojure.set :as set])
@ -76,10 +79,19 @@
(defn select-first*
"Returns first element found. Not any more efficient than select, just a convenience"
"Returns first element found."
[path structure]
(compiled-select-first (i/comp-paths* path) structure))
(def ^{:doc "Version of select-any that takes in a path pre-compiled with comp-paths"}
compiled-select-any i/compiled-select-any*)
(def NONE i/NONE)
(defn select-any*
"Returns any element found."
[path structure]
(compiled-select-any (i/comp-paths* path) structure))
;; Transformation functions
@ -143,7 +155,7 @@
STOP
[]
(select* [this structure next-fn]
nil )
NONE )
(transform* [this structure next-fn]
structure
))
@ -165,7 +177,9 @@
(defnav MAP-VALS []
(select* [this structure next-fn]
(doall (mapcat next-fn (vals structure))))
(doseqres NONE [v (vals structure)]
(next-fn v)
))
(transform* [this structure next-fn]
(i/map-vals-transform structure next-fn)
))
@ -212,11 +226,9 @@
continuous-subseqs
[pred]
(select* [this structure next-fn]
(doall
(mapcat
(fn [[s e]] (i/srange-select structure s e next-fn))
(i/matching-ranges structure pred)
)))
(doseqres NONE [[s e] (i/matching-ranges structure pred)]
(i/srange-select structure s e next-fn)
))
(transform* [this structure next-fn]
(reduce
(fn [structure [s e]]
@ -322,7 +334,8 @@
[k]
(select* [this structure next-fn]
(if (contains? structure k)
(next-fn (get structure k))))
(next-fn (get structure k))
NONE))
(transform* [this structure next-fn]
(if (contains? structure k)
(assoc structure k (next-fn (get structure k)))
@ -561,11 +574,10 @@
[& paths]
(variable-pathed-nav [compiled-paths paths]
(select* [this structure next-fn]
(->> compiled-paths
(mapcat #(compiled-select % structure))
(mapcat next-fn)
doall
))
(doseqres NONE [c compiled-paths]
(doseqres NONE [e (compiled-select c structure)]
(next-fn e)
)))
(transform* [this structure next-fn]
(reduce
(fn [structure path]

View file

@ -3,9 +3,12 @@
[com.rpl.specter.prot-opt-invoke
:refer [mk-optimized-invocation]]
[com.rpl.specter.defhelpers :refer [define-ParamsNeededPath]]
[com.rpl.specter.util-macros :refer [doseqres]]
)
(:use [com.rpl.specter.protocols :only
[select* transform* collect-val]])
[select* transform* collect-val]]
[com.rpl.specter.util-macros :only [doseqres]]
)
(:require [com.rpl.specter.protocols :as p]
[clojure.walk :as walk]
#+clj [clojure.core.reducers :as r]
@ -14,9 +17,17 @@
#+clj [riddley.walk :as riddley]
)
#+clj
(:import [com.rpl.specter Util])
(:import [com.rpl.specter Util MutableCell])
)
(def NONE ::NONE)
#+clj
(def kw-identical? identical?)
#+cljs
(def kw-identical? keyword-identical?)
(defn spy [e]
(println "SPY:")
(println (pr-str e))
@ -92,15 +103,18 @@
(dotimes [_ iters]
(afn))))
(deftype ExecutorFunctions [type select-executor transform-executor])
(deftype ExecutorFunctions [type traverse-executor transform-executor])
(def RichPathExecutor
(->ExecutorFunctions
:richpath
(fn [params params-idx selector structure]
(fn [params params-idx selector result-fn structure]
(selector params params-idx [] structure
(fn [_ _ vals structure]
(if-not (identical? [] vals) [(conj vals structure)] [structure]))))
(result-fn
(if (identical? vals [])
structure
(conj vals structure))))))
(fn [params params-idx transformer transform-fn structure]
(transformer params params-idx [] structure
(fn [_ _ vals structure]
@ -112,8 +126,8 @@
(def LeanPathExecutor
(->ExecutorFunctions
:leanpath
(fn [params params-idx selector structure]
(selector structure (fn [structure] [structure])))
(fn [params params-idx selector result-fn structure]
(selector structure result-fn))
(fn [params params-idx transformer transform-fn structure]
(transformer structure transform-fn))
))
@ -420,26 +434,44 @@
;; cell implementation idea taken from prismatic schema library
#+cljs
(defprotocol PMutableCell
#+clj (get_cell [cell])
(set_cell [cell x]))
#+cljs
(deftype MutableCell [^:volatile-mutable q]
PMutableCell
#+clj (get_cell [cell] q)
(set_cell [this x] (set! q x)))
#+cljs
(defn mutable-cell
([] (mutable-cell nil))
([init] (MutableCell. init)))
#+cljs
(defn set-cell! [cell val]
(set_cell cell val))
#+cljs
(defn get-cell [cell]
#+clj (get_cell cell) #+cljs (.-q cell)
)
#+clj
(defn mutable-cell
([] (mutable-cell nil))
([v] (MutableCell. v)))
#+clj
(defn get-cell [^MutableCell c]
(.get c))
#+clj
(defn set-cell! [^MutableCell c v]
(.set c v))
(defn update-cell! [cell afn]
(let [ret (afn (get-cell cell))]
(set-cell! cell ret)
@ -538,16 +570,94 @@
ret
))))
(defn- conj-all! [cell elems]
(set-cell! cell (concat (get-cell cell) elems)))
(defn transform-fns-field [^CompiledPath path]
(.-transform-fns path))
(defn compiled-select*
[^com.rpl.specter.impl.CompiledPath path structure]
(let [^com.rpl.specter.impl.TransformFunctions tfns (.-transform-fns path)
^com.rpl.specter.impl.ExecutorFunctions ex (.-executors tfns)]
((.-select-executor ex) (.-params path) (.-params-idx path) (.-selector tfns) structure)
(defn executors-field [^TransformFunctions tfns]
(.-executors tfns))
(defn traverse-executor-field [^ExecutorFunctions ex]
(.-traverse-executor ex))
(defn params-field [^CompiledPath path]
(.-params path))
(defn params-idx-field [^CompiledPath path]
(.-params-idx path))
(defn selector-field [^TransformFunctions tfns]
(.-selector tfns))
;; amazingly doing this as a macro shows a big effect in the
;; benchmark for getting a value out of a nested map
(defmacro compiled-traverse*
[path result-fn structure]
`(let [tfns# (transform-fns-field ~path)
ex# (executors-field tfns#)]
((traverse-executor-field ex#)
(params-field ~path)
(params-idx-field ~path)
(selector-field tfns#)
~result-fn
~structure)
))
(defn compiled-select* [path structure]
(let [res (mutable-cell (transient []))
result-fn (fn [structure]
(let [curr (get-cell res)]
(set-cell! res (conj! curr structure))
))]
(compiled-traverse* path result-fn structure)
(persistent! (get-cell res))
))
(defn compiled-select-one* [path structure]
(let [res (mutable-cell ::NOTHING)
result-fn (fn [structure]
(let [curr (get-cell res)]
(if (kw-identical? curr ::NOTHING)
(set-cell! res structure)
(throw-illegal "More than one element found in structure: " structure)
)))]
(compiled-traverse* path result-fn structure)
(let [ret (get-cell res)]
(if (kw-identical? ret ::NOTHING)
nil
ret
))))
(defn compiled-select-one!* [path structure]
(let [res (mutable-cell ::NOTHING)
result-fn (fn [structure]
(let [curr (get-cell res)]
(if (kw-identical? curr ::NOTHING)
(set-cell! res structure)
(throw-illegal "More than one element found in structure: " structure)
)))]
(compiled-traverse* path result-fn structure)
(let [ret (get-cell res)]
(if (kw-identical? ::NOTHING ret)
(throw-illegal "Found no elements for select-one! on " structure))
ret
)))
(defn compiled-select-first* [path structure]
(let [res (mutable-cell ::NOTHING)
result-fn (fn [structure]
(let [curr (get-cell res)]
(if (kw-identical? curr ::NOTHING)
(set-cell! res structure))))]
(compiled-traverse* path result-fn structure)
(let [ret (get-cell res)]
(if (kw-identical? ret ::NOTHING)
nil
ret
))))
(defn compiled-select-any* [path structure]
(compiled-traverse* path identity structure))
(defn compiled-transform*
[^com.rpl.specter.impl.CompiledPath path transform-fn structure]
(let [^com.rpl.specter.impl.TransformFunctions tfns (.-transform-fns path)
@ -565,14 +675,17 @@
[compiled-path structure]
(not (not-selected?* compiled-path structure)))
;; returns vector of all results
(defn walk-select [pred continue-fn structure]
(let [ret (mutable-cell [])
(let [ret (mutable-cell NONE)
walker (fn this [structure]
(if (pred structure)
(conj-all! ret (continue-fn structure))
(walk/walk this identity structure))
)]
(let [r (continue-fn structure)]
(if-not (kw-identical? r NONE)
(set-cell! ret r))
r
)
(walk/walk this identity structure)
))]
(walker structure)
(get-cell ret)
))
@ -584,13 +697,9 @@
(assoc structure akey (next-fn (get structure akey))
))
#+clj
(defn all-select [structure next-fn]
(into [] (r/mapcat next-fn structure)))
#+cljs
(defn all-select [structure next-fn]
(into [] (mapcat #(next-fn %)) structure))
(doseqres NONE [e structure]
(next-fn e)))
#+cljs
(defn queue? [coll]
@ -758,7 +867,8 @@
PosNavigator
(select* [this structure next-fn]
(if-not (fast-empty? structure)
(next-fn ((.-getter this) structure))))
(next-fn ((.-getter this) structure))
NONE))
(transform* [this structure next-fn]
(if (fast-empty? structure)
structure
@ -825,8 +935,7 @@
(let [apath (if (then-tester structure)
late-then
late-else)]
(doall (mapcat next-fn (compiled-select* apath structure)))
))
(compiled-traverse* apath next-fn structure)))
(defn if-transform [structure next-fn then-tester late-then late-else]
(let [apath (if (then-tester structure)
@ -837,7 +946,8 @@
(defn filter-select [afn structure next-fn]
(if (afn structure)
(next-fn structure)))
(next-fn structure)
NONE))
(defn filter-transform [afn structure next-fn]
(if (afn structure)
@ -945,6 +1055,7 @@
(let [afn (aget ^objects params params-idx)]
(if (afn structure)
(next-fn params (inc params-idx) vals structure)
NONE
)))
(fn [params params-idx vals structure next-fn]
(let [afn (aget ^objects params params-idx)]
@ -1181,21 +1292,6 @@
))
(defn compiled-select-one* [path structure]
(let [res (compiled-select* path structure)]
(when (> (count res) 1)
(throw-illegal "More than one element found for params: " path structure))
(first res)
))
(defn compiled-select-one!* [path structure]
(let [res (compiled-select* path structure)]
(when (not= 1 (count res)) (throw-illegal "Expected exactly one element for params: " path structure))
(first res)
))
(defn compiled-select-first* [path structure]
(first (compiled-select* path structure)))
(defn compiled-setval* [path val structure]
(compiled-transform* path (fn [_] val) structure))

View file

@ -1,5 +1,6 @@
(ns com.rpl.specter.macros
(:require [com.rpl.specter.impl :as i])
(:require [com.rpl.specter.impl :as i]
[clojure.walk :as cljwalk])
)
(defn ^:no-doc gensyms [amt]
@ -456,7 +457,15 @@
(-> &env :locals keys set) ;cljs
(-> &env keys set) ;clj
)
used-locals (vec (i/walk-select local-syms vector path))
used-locals-cell (i/mutable-cell [])
_ (cljwalk/postwalk
(fn [e]
(if (local-syms e)
(i/update-cell! used-locals-cell #(conj % e))
e
))
path)
used-locals (i/get-cell used-locals-cell)
;; note: very important to use riddley's macroexpand-all here, so that
;; &env is preserved in any potential nested calls to select (like via
@ -549,13 +558,21 @@
`(i/compiled-select-one* (path ~apath) ~structure))
(defmacro select-first
"Returns first element found. Not any more efficient than select, just a convenience.
"Returns first element found.
This macro will attempt to do inline factoring and caching of the path, falling
back to compiling the path on every invocation it it's not possible to
factor/cache the path."
[apath structure]
`(i/compiled-select-first* (path ~apath) ~structure))
(defmacro select-any
"Returns first element found.
This macro will attempt to do inline factoring and caching of the path, falling
back to compiling the path on every invocation it it's not possible to
factor/cache the path."
[apath structure]
`(i/compiled-select-any* (path ~apath) ~structure))
(defmacro transform
"Navigates to each value specified by the path and replaces it by the result of running
the transform-fn on it.

View file

@ -0,0 +1,15 @@
(ns com.rpl.specter.util-macros)
(defmacro doseqres [backup-res-kw [n aseq] & body]
(let [platform (if (contains? &env :locals) :cljs :clj)
idfn (if (= platform :clj) 'identical? 'keyword-identical?)]
`(reduce
(fn [curr# ~n]
(let [ret# (do ~@body)]
(if (~idfn ret# ~backup-res-kw)
curr#
ret#
)))
~backup-res-kw
~aseq
)))

View file

@ -0,0 +1,17 @@
package com.rpl.specter;
public class MutableCell {
private Object o;
public MutableCell(Object o) {
this.o = o;
}
public Object get() {
return o;
}
public void set(Object o) {
this.o = o;
}
}