From c2c46167ec664a89b83521f8f9a17a066fc31eb7 Mon Sep 17 00:00:00 2001 From: Peter Taoussanis Date: Tue, 4 Dec 2012 13:16:29 +0700 Subject: [PATCH] Swap hash-map thaw implementation for perf & to avoid OOM errors Thanks to moonranger for pointing out the OOM issue. --- benchmarks/benchmarks.clj | 5 +++++ src/taoensso/nippy.clj | 13 +++++++++++-- src/taoensso/nippy/utils.clj | 8 ++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/benchmarks/benchmarks.clj b/benchmarks/benchmarks.clj index e97de81..4f56786 100644 --- a/benchmarks/benchmarks.clj +++ b/benchmarks/benchmarks.clj @@ -34,6 +34,11 @@ ;; :nippy {:freeze 3751, :thaw 4184, :round 7769}} ;; (float (/ 59545 7769)) = 7.6644354 + ;; Clojure 1.4.0, Nippy 1.0.0 + ;; {:reader {:freeze 22595, :thaw 31148, :round 54059} + ;; :nippy {:freeze 3324, :thaw 3725, :round 6918}} + ;; (float (/ 54059 6918)) = 7.814253 + ;;; Data size (let [frozen (reader-freeze data)] (count (.getBytes frozen "UTF8"))) (let [frozen (freeze-to-bytes data)] (count frozen)) diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index 0eb8706..3b4d10d 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -175,6 +175,12 @@ [^DataInputStream s] (repeatedly (.readInt s) (partial thaw-from-stream!* s))) +(defn coll-thaw-pairs! + "Helper to thaw pair-based collection types (e.g. hash maps)." + [^DataInputStream s] + (repeatedly (/ (.readInt s) 2) + (fn [] [(thaw-from-stream!* s) (thaw-from-stream!* s)]))) + (defn- thaw-from-stream!* [^DataInputStream s] (let [type-id (.readByte s)] @@ -190,10 +196,13 @@ id-string (String. (read-bytes! s) "UTF-8") id-keyword (keyword (.readUTF s)) - id-list (apply list (coll-thaw! s)) + id-list (apply list (coll-thaw! s)) ; TODO OOMs for big colls id-vector (into [] (coll-thaw! s)) id-set (into #{} (coll-thaw! s)) - id-map (apply hash-map (coll-thaw! s)) + ;; id-map (apply hash-map (coll-thaw! s)) ; OOMs for big colls + ;; id-map (into {} (map vec (partition 2 x))) ; ~6.4x time + ;; id-map (into {} (utils/pairs (coll-thaw! s))) ; ~1.8x time + id-map (into {} (coll-thaw-pairs! s)) ; ~0.8x time id-coll (doall (coll-thaw! s)) id-queue (into (PersistentQueue/EMPTY) (coll-thaw! s)) diff --git a/src/taoensso/nippy/utils.clj b/src/taoensso/nippy/utils.clj index b70941e..17be404 100644 --- a/src/taoensso/nippy/utils.clj +++ b/src/taoensso/nippy/utils.clj @@ -13,6 +13,14 @@ clauses) ~(when default default)))) +(defn pairs + "Like (partition 2 coll) but faster and returns lazy seq of vector pairs." + [coll] + (lazy-seq + (when-let [s (seq coll)] + (let [n (next s)] + (cons [(first s) (first n)] (pairs (next n))))))) + (defmacro time-ns "Returns number of nanoseconds it takes to execute body." [& body]