Improve performance of removing elements from sequences

Merging `InsertBeforeIndex` protocol into new `IndexedOps` protocol for optimized indexed based operations (which now includes inserting before and removing at)

Updating `nthpath` and `keypath` to use new helper fns

Adding tests and benchmarks
This commit is contained in:
Jeff Evans 2021-05-25 13:47:54 -05:00
parent e8225f0e58
commit 2ace3be456
3 changed files with 119 additions and 35 deletions

View file

@ -112,8 +112,8 @@
(run-benchmark "transform values of a list"
(transform ALL inc data)
(doall (sequence (map inc) data))
(reverse (into '() (map inc) data))
))
(reverse (into '() (map inc) data))))
(let [data {:a 1 :b 2 :c 3 :d 4}]
(run-benchmark "transform values of a small map"
@ -127,8 +127,8 @@
(into {} (map (fn [e] [(key e) (inc (val e))]) data))
(into {} (map (fn [e] [(key e) (inc (val e))])) data)
(map-vals-map-iterable data inc)
(map-vals-map-iterable-transient data inc)
))
(map-vals-map-iterable-transient data inc)))
(let [data (->> (for [i (range 1000)] [i i]) (into {}))]
@ -152,8 +152,8 @@
(first data)
(select-any ALL data)
(select-any FIRST data)
(select-first ALL data)
))
(select-first ALL data)))
(let [data [1 2 3 4 5]]
(run-benchmark "map a function over a vector"
@ -192,8 +192,8 @@
(run-benchmark "prepend to a vector"
(vec (cons 0 data))
(setval BEFORE-ELEM 0 data)
(into [0] data)
))
(into [0] data)))
(declarepath TreeValues)
@ -314,8 +314,8 @@
(map (fn [[k v]] [(keyword (str *ns*) (name k)) v]))
data)
(reduce-kv (fn [m k v] (assoc m (keyword (str *ns*) (name k)) v)) {} data)
(setval [MAP-KEYS NAMESPACE] (str *ns*) data)
))
(setval [MAP-KEYS NAMESPACE] (str *ns*) data)))
(let [data (->> (for [i (range 1000)] [(keyword (str i)) i]) (into {}))]
@ -324,8 +324,8 @@
(map (fn [[k v]] [(keyword (str *ns*) (name k)) v]))
data)
(reduce-kv (fn [m k v] (assoc m (keyword (str *ns*) (name k)) v)) {} data)
(setval [MAP-KEYS NAMESPACE] (str *ns*) data)
))
(setval [MAP-KEYS NAMESPACE] (str *ns*) data)))
(defnav walker-old [afn]
(select* [this structure next-fn]
@ -336,8 +336,8 @@
(let [data {:a [1 2 {:c '(3 4) :d {:e [1 2 3] 7 8 9 10}}]}]
(run-benchmark "walker vs. clojure.walk version"
(transform (walker number?) inc data)
(transform (walker-old number?) inc data)
))
(transform (walker-old number?) inc data)))
(let [size 1000
middle-idx (/ size 2)
@ -354,4 +354,29 @@
(run-benchmark "before-index at 0 vs. srange vs. cons (list)"
(setval (before-index 0) v data-lst)
(setval (srange 0 0) [v] data-lst)
(cons v data-lst)))
(cons v data-lst))
(run-benchmark "set keypath and nthpath at index to NONE versus srange in middle (vector)"
(setval (nthpath middle-idx) NONE data-vec)
(setval (keypath middle-idx) NONE data-vec)
(setval (srange middle-idx (inc middle-idx)) [] data-vec))
(run-benchmark "set keypath and nthpath at index to NONE versus srange in middle (list)"
;; this case still needs to be optimized in nthpath*
(setval (nthpath middle-idx) NONE data-lst)
(setval (keypath middle-idx) NONE data-lst)
(setval (srange middle-idx (inc middle-idx)) [] data-lst))
(run-benchmark "set keypath and nthpath at beginning to NONE versus srange and subvec (vector)"
(setval (nthpath 0) NONE data-vec)
(setval (keypath 0) NONE data-vec)
(setval (srange 0 1) [] data-vec)
(subvec data-vec 1))
(run-benchmark "set keypath and nthpath at beginning to NONE versus srange and rest (list)"
;; this case still needs to be optimized in nthpath*
(setval (nthpath 0) NONE data-lst)
(setval (keypath 0) NONE data-lst)
(setval (srange 0 1) [] data-lst)
(rest data-lst))
(run-benchmark "set keypath and nthpath at end to NONE versus srange and subvec (vector)"
(setval (nthpath (dec size)) NONE data-vec)
(setval (keypath (dec size)) NONE data-vec)
(setval (srange (dec size) size) [] data-vec)
(subvec data-vec 0 (dec size))))

View file

@ -492,9 +492,6 @@
(defprotocol FastEmpty
(fast-empty? [s]))
(defprotocol InsertBeforeIndex
(insert-before-idx [aseq idx val]))
(defnav PosNavigator [getter updater]
(select* [this structure next-fn]
(if-not (fast-empty? structure)
@ -643,7 +640,29 @@
(nth s (-> s count dec))
))
(defprotocol IndexedOps
"Fast indexed operations on sequential types"
(insert-before-idx [aseq idx val])
(remove-at-idx [aseq idx]))
;; helper fns for indexed operations
(defn- insert-before-index-list [lst idx v]
;; an implementation that is most efficient for list style structures
(let [[front back] (split-at idx lst)]
(concat front (cons v back))))
(defn- remove-at-index-vec [aseq idx]
(condp = idx
0 (subvec aseq 1)
(vec-count aseq) (subvec aseq 0 (vec-count aseq))
(into (subvec aseq 0 idx) (subvec aseq (inc idx)))))
(defn- remove-at-index-list [lst idx]
;; an implementation that is most efficient for list style structures
(condp = idx
0 (rest lst)
(let [[front back] (split-at idx lst)]
(concat front (rest back)))))
(extend-protocol FastEmpty
nil
@ -664,7 +683,7 @@
(let [newv (next-fn vals (get structure key))]
(if (identical? newv i/NONE)
(if (sequential? structure)
(i/srange-transform* structure key (inc key) (fn [_] []))
(remove-at-idx structure key)
(dissoc structure key))
(assoc structure key newv))))
@ -704,8 +723,8 @@
(if (vector? structure)
(let [newv (next-fn vals (nth structure i))]
(if (identical? newv i/NONE)
(i/srange-transform* structure i (inc i) (fn [_] []))
(assoc structure i newv)))
(remove-at-index-vec structure i)
(assoc structure i newv)))
(i/srange-transform* ; can make this much more efficient with alternate impl
structure
i
@ -726,35 +745,60 @@
(end-fn structure)
))
(defn- insert-before-index-list [lst idx val]
;; an implementation that is most efficient for list style structures
(let [[front back] (split-at idx lst)]
(concat front (cons val back))))
(extend-protocol InsertBeforeIndex
(extend-protocol IndexedOps
nil
(insert-before-idx [_ idx val]
(if (= 0 idx)
(list val)
(throw (ex-info "For a nil structure, can only insert before index 0"
{:insertion-index idx}))))
(remove-at-idx [_ _]
;; removing from nil structure at any index should just be nil?
nil)
#?(:clj java.lang.String :cljs string)
(insert-before-idx [aseq idx val]
(apply str (insert-before-index-list aseq idx val)))
(insert-before-idx [aseq idx v]
(apply str (insert-before-index-list aseq idx v)))
(remove-at-idx [s idx]
(str (subs s 0 idx) (subs s idx)))
#?(:clj clojure.lang.LazySeq :cljs cljs.core/LazySeq)
(insert-before-idx [aseq idx val]
(insert-before-index-list aseq idx val))
(insert-before-idx [aseq idx v]
(insert-before-index-list aseq idx v))
(remove-at-idx [aseq idx]
(remove-at-index-list aseq idx))
#?(:clj clojure.lang.IPersistentVector :cljs cljs.core/PersistentVector)
(insert-before-idx [aseq idx val]
(let [front (subvec aseq 0 idx)
back (subvec aseq idx)]
(into (conj front val) back)))
(remove-at-idx [aseq idx]
(remove-at-index-vec aseq idx))
;; TODO: incorporate this into the transients namespace instead/in addition to?
#?(:clj clojure.lang.ITransientVector :cljs cljs.core/TransientVector)
(insert-before-idx [aseq idx val]
(loop [v aseq prev-val val curr-idx idx]
(if
(= curr-idx (transient-vec-count v))
(assoc! v curr-idx prev-val)
(let [curr-val (nth v curr-idx)]
(recur (assoc! v curr-idx prev-val) curr-val (inc curr-idx))))))
(remove-at-idx [aseq idx]
(loop [v aseq curr-idx idx]
(if
(< curr-idx (dec (transient-vec-count v)))
(let [next-val (nth v (inc curr-idx))]
(recur (assoc! v curr-idx next-val) (inc curr-idx)))
(pop! v))))
#?(:clj clojure.lang.IPersistentList :cljs cljs.core/List)
(insert-before-idx [aseq idx val]
(cond (= idx 0)
(if (= idx 0)
(cons val aseq)
:else (insert-before-index-list aseq idx val))))
(insert-before-index-list aseq idx val)))
(remove-at-idx [aseq idx]
(if (= idx 0)
(rest aseq)
(remove-at-index-list aseq idx))))

View file

@ -1,6 +1,6 @@
(ns com.rpl.specter.core-test
#?(:cljs (:require-macros
[cljs.test :refer [is deftest]]
[cljs.test :refer [is deftest testing]]
[clojure.test.check.clojure-test :refer [defspec]]
[com.rpl.specter.cljs-test-helpers :refer [for-all+]]
[com.rpl.specter.test-helpers :refer [ic-test]]
@ -13,7 +13,7 @@
defdynamicnav traverse-all satisfies-protpath? end-fn
vtransform]]))
(:use
#?(:clj [clojure.test :only [deftest is]])
#?(:clj [clojure.test :only [deftest is testing]])
#?(:clj [clojure.test.check.clojure-test :only [defspec]])
#?(:clj [com.rpl.specter.test-helpers :only [for-all+ ic-test]])
#?(:clj [com.rpl.specter
@ -33,6 +33,7 @@
#?(:cljs [clojure.test.check.generators :as gen])
#?(:cljs [clojure.test.check.properties :as prop :include-macros true])
[com.rpl.specter :as s]
[com.rpl.specter.navs :as n]
[com.rpl.specter.transients :as t]
[clojure.set :as set]))
@ -1331,6 +1332,7 @@
(deftest nthpath-test
(is (predand= vector? [1 2 -3 4] (transform (s/nthpath 2) - [1 2 3 4])))
(is (predand= vector? [1 2 4] (setval (s/nthpath 2) s/NONE [1 2 3 4])))
(is (predand= vector? [1 2 3] (setval (s/nthpath 3) s/NONE [1 2 3 4])))
(is (predand= (complement vector?) '(1 -2 3 4) (transform (s/nthpath 1) - '(1 2 3 4))))
(is (predand= (complement vector?) '(1 2 4) (setval (s/nthpath 2) s/NONE '(1 2 3 4))))
(is (= [0 1 [2 4 4]] (transform (s/nthpath 2 1) inc [0 1 [2 3 4]])))
@ -1702,3 +1704,16 @@
(is (satisfies-protpath? FooPP "a"))
(is (not (satisfies-protpath? FooPP 1)))
)))
;; adding a separate test because these are not yet exercised by the rest of the suite
(deftest indexed-opts-transient-vectors-test
(testing "IndexedOps fns work properly for transient vectors"
(let [v (vec (range 10))]
(doseq [[f args exp] [[n/remove-at-idx [0] (vec (range 1 10))]
[n/remove-at-idx [5] [0 1 2 3 4 6 7 8 9]]
[n/remove-at-idx [9] (vec (range 9))]
[n/insert-before-idx [0 -1] (vec (range -1 10))]
[n/insert-before-idx [7 -1] [0 1 2 3 4 5 6 -1 7 8 9]]
[n/insert-before-idx [10 10] (vec (range 11))]]]
(is (= exp (-> (apply f (cons (transient v) args))
persistent!)))))))