diff --git a/README.md b/README.md index 27f6cf7..df21094 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Current [semantic](http://semver.org/) version: Clojure's [rich data types](http://clojure.org/datatypes) are *awesome*. And its [reader](http://clojure.org/reader) allows you to take your data just about anywhere. But the reader can be painfully slow when you've got a lot of data to crunch (like when you're serializing to a database). -Nippy is an attempt to provide a drop-in, high-performance alternative to the reader. It's a fork of [Deep-Freeze](https://github.com/halgari/deep-freeze). +Nippy is an attempt to provide a drop-in, high-performance alternative to the reader. It's a fork of [Deep-Freeze](https://github.com/halgari/deep-freeze) and is used as the [Carmine Redis client](https://github.com/ptaoussanis/carmine) serializer. ## What's In The Box? * Simple, **high-performance** all-Clojure de/serializer. @@ -19,7 +19,7 @@ Nippy is an attempt to provide a drop-in, high-performance alternative to the re ## Status [![Build Status](https://secure.travis-ci.org/ptaoussanis/nippy.png?branch=master)](http://travis-ci.org/ptaoussanis/nippy) -Nippy is relatively mature and is used as the [Carmine Redis client](https://github.com/ptaoussanis/carmine) serializer. The API is expected to remain more or less stable. To run tests against all supported Clojure versions, use: +Nippy is still currently *experimental*. It **has not yet been thoroughly tested in production** and its API is subject to change. To run tests against all supported Clojure versions, use: ```bash lein2 all test @@ -47,13 +47,71 @@ and `require` the library: ### De/Serializing -TODO +As an example of what Nippy can do, let's take a look at its own reference stress data: + +```clojure +nippy/stress-data +=> +{:bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + :nil nil + :boolean true + + :char-utf8 \ಬ + :string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" + :string-long (apply str (range 1000)) + :keyword :keyword + + :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10))) + :list-quoted '(1 2 3 4 5 (6 7 8 (9 10))) + :list-empty (list) + :vector [1 2 3 4 5 [6 7 8 [9 10]]] + :vector-empty [] + :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}} + :map-empty {} + :set #{1 2 3 4 5 #{6 7 8 #{9 10}}} + :set-empty #{} + :meta (with-meta {:a :A} {:metakey :metaval}) + :queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g)) + :queue-empty (PersistentQueue/EMPTY) + :coll (repeatedly 1000 rand) + + :byte (byte 16) + :short (short 42) + :integer (int 3) + :long (long 3) + :bigint (bigint 31415926535897932384626433832795) + + :float (float 3.14) + :double (double 3.14) + :bigdec (bigdec 3.1415926535897932384626433832795) + + :ratio 22/7} +``` + +Serialize it: + +```clojure +(def frozen-stress-data (nippy/freeze-to-bytes nippy/stress-data)) +=> # +``` + +Deserialize it: + +```clojure +(nippy/thaw-from-bytes frozen-stress-data) +=> {:bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + :nil nil + :boolean true + <...> } +``` + +Couldn't be simpler! ## Performance -TODO +![Performance comparison chart](https://github.com/ptaoussanis/nippy/raw/master/benchmarks/chart1.png) -![Performance comparison chart]() +![Data size chart](https://github.com/ptaoussanis/nippy/raw/master/benchmarks/chart2.png) [Detailed benchmark information](https://docs.google.com/spreadsheet/ccc?key=0AuSXb68FH4uhdE5kTTlocGZKSXppWG9sRzA5Y2pMVkE&pli=1#gid=0) is available on Google Docs. diff --git a/benchmarks/benchmarks.clj b/benchmarks/benchmarks.clj new file mode 100644 index 0000000..d83c1ad --- /dev/null +++ b/benchmarks/benchmarks.clj @@ -0,0 +1,48 @@ +(ns taoensso.nippy.benchmarks + {:author "Peter Taoussanis"} + (:use [taoensso.nippy :as nippy :only (freeze-to-bytes thaw-from-bytes)])) + +;; Remove stuff from stress-data that breaks reader +(def bench-data (dissoc nippy/stress-data :queue :queue-empty :bytes)) + +(defn reader-freeze [x] (binding [*print-dup* false] (pr-str x))) +(defn reader-thaw [x] (binding [*read-eval* false] (read-string x))) + +(def roundtrip (comp thaw-from-bytes freeze-to-bytes)) +(def reader-roundtrip (comp reader-thaw reader-freeze)) + +(defmacro time-requests + "Warms up, then executes given number of requests and returns total execution + times in msecs." + [num-requests & body] + `(do (dotimes [_# (int (/ ~num-requests 4))] ~@body) ; Warm-up + (let [start-time# (System/nanoTime)] + (dotimes [_# ~num-requests] ~@body) + (Math/round (/ (- (System/nanoTime) start-time#) 1000000.0))))) + +(comment + + ;;; Times + (println + "---\n" + (let [num 10000] + {:reader {:freeze (time-requests num (reader-freeze bench-data)) + :thaw (let [frozen (reader-freeze bench-data)] + (time-requests num (reader-thaw frozen))) + :round (time-requests num (reader-roundtrip bench-data))} + + :nippy {:freeze (time-requests num (freeze-to-bytes bench-data)) + :thaw (let [frozen (freeze-to-bytes bench-data)] + (time-requests num (thaw-from-bytes frozen))) + :round (time-requests num (roundtrip bench-data))}})) + + ;; Clojure 1.3.0, Nippy 0.9.0 + ;; {:reader {:freeze 23573, :thaw 31923, :round 53253}, + ;; :nippy {:freeze 3805, :thaw 3789, :round 7522}} + ;; (float (/ 53253 7522)) = 7.079633 + + ;;; Data size + (let [frozen (reader-freeze bench-data)] (count (.getBytes frozen "UTF8"))) + (let [frozen (freeze-to-bytes bench-data)] (count frozen)) + ;; 22711, 12168 + ) \ No newline at end of file diff --git a/benchmarks/chart1.png b/benchmarks/chart1.png new file mode 100644 index 0000000..f32eca4 Binary files /dev/null and b/benchmarks/chart1.png differ diff --git a/benchmarks/chart2.png b/benchmarks/chart2.png new file mode 100644 index 0000000..3ceabc3 Binary files /dev/null and b/benchmarks/chart2.png differ diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index c8c86ef..d3e85ae 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -1 +1,283 @@ -(ns taoensso.nippy) +(ns taoensso.nippy + "Simple, high-performance Clojure serialization library. Adapted from + Deep-Freeze." + {:author "Peter Taoussanis"} + (:require [taoensso.nippy.utils :as utils]) + (:import [java.io DataInputStream DataOutputStream ByteArrayOutputStream + ByteArrayInputStream] + [org.xerial.snappy Snappy] + [clojure.lang IPersistentList IPersistentVector IPersistentMap + IPersistentSet PersistentQueue IPersistentCollection Keyword + BigInt Ratio])) + +;;;; Define type IDs + +(def ^:const schema-header "\u0000~0.9.0") + +(def ^:const id-reader (int 1)) ; Fallback: *print-dup* pr-str output +(def ^:const id-bytes (int 2)) +(def ^:const id-nil (int 3)) +(def ^:const id-boolean (int 4)) + +(def ^:const id-char (int 10)) +(def ^:const id-string (int 11)) +(def ^:const id-keyword (int 12)) + +(def ^:const id-list (int 20)) +(def ^:const id-vector (int 21)) +(def ^:const id-old-map (int 22)) ; DEPRECATED as of 0.9.0 +(def ^:const id-set (int 23)) +(def ^:const id-coll (int 24)) ; Fallback: non-specific collection +(def ^:const id-meta (int 25)) +(def ^:const id-queue (int 26)) +(def ^:const id-map (int 27)) + +(def ^:const id-byte (int 40)) +(def ^:const id-short (int 41)) +(def ^:const id-integer (int 42)) +(def ^:const id-long (int 43)) +(def ^:const id-bigint (int 44)) + +(def ^:const id-float (int 60)) +(def ^:const id-double (int 61)) +(def ^:const id-bigdec (int 62)) + +(def ^:const id-ratio (int 70)) + +;;;; Shared low-level stream stuff + +(defn- write-id! [^DataOutputStream stream ^Integer id] (.writeByte stream id)) + +(defn- write-bytes! + [^DataOutputStream stream ^bytes ba] + (let [size (alength ba)] + (.writeInt stream size) ; Encode size of byte array + (.write stream ba 0 size))) + +(defn- read-bytes! + ^bytes [^DataInputStream stream] + (let [size (.readInt stream) + ba (byte-array size)] + (.read stream ba 0 size) ba)) + +(defn- write-as-bytes! + "Write arbitrary object as bytes using reflection." + [^DataOutputStream stream obj] + (write-bytes! stream (.toByteArray obj))) + +(defn- read-biginteger! + "Wrapper around read-bytes! for common case of reading to a BigInteger. + Note that as of Clojure 1.3, java.math.BigInteger ≠ clojure.lang.BigInt." + ^BigInteger [^DataInputStream stream] + (BigInteger. (read-bytes! stream))) + +;;;; Freezing + +(defprotocol Freezable (freeze [this stream])) + +(comment (meta '^:DataOutputStream s)) + +(defmacro freezer + "Helper to extend Freezable protocol." + [type id & body] + `(extend-type ~type + ~'Freezable + (~'freeze [~'x ~(with-meta 's {:tag 'DataOutputStream})] + (write-id! ~'s ~id) + ~@body))) + +(defmacro coll-freezer + "Helper to freeze simple collection types." + [type id & body] + `(freezer + ~type ~id + (.writeInt ~'s (count ~'x)) ; Encode collection length + (doseq [i# ~'x] (freeze-to-stream!* ~'s i#)))) + +(freezer (Class/forName "[B") id-bytes (write-bytes! s x)) +(freezer nil id-nil) +(freezer Boolean id-boolean (.writeBoolean s x)) + +(freezer Character id-char (.writeChar s (int x))) +(freezer String id-string (.writeUTF s x)) +(freezer Keyword id-keyword (.writeUTF s (name x))) + +(declare freeze-to-stream!*) + +(coll-freezer IPersistentList id-list) +(coll-freezer IPersistentVector id-vector) +(freezer IPersistentMap id-map + (.writeInt s (* 2 (count x))) ; Encode num kvs + (doseq [[k v] x] + (freeze-to-stream!* s k) + (freeze-to-stream!* s v))) +(coll-freezer IPersistentSet id-set) +(coll-freezer PersistentQueue id-queue) +(coll-freezer IPersistentCollection id-coll) ; Must be LAST collection freezer! + +(freezer Byte id-byte (.writeByte s x)) +(freezer Short id-short (.writeShort s x)) +(freezer Integer id-integer (.writeInt s x)) +(freezer Long id-long (.writeLong s x)) +(freezer BigInt id-bigint (write-as-bytes! s (.toBigInteger x))) +(freezer BigInteger id-bigint (write-as-bytes! s x)) + +(freezer Float id-float (.writeFloat s x)) +(freezer Double id-double (.writeDouble s x)) +(freezer BigDecimal id-bigdec + (write-as-bytes! s (.unscaledValue x)) + (.writeInt s (.scale x))) + +(freezer Ratio id-ratio + (write-as-bytes! s (.numerator x)) + (write-as-bytes! s (.denominator x))) + +;; Use Clojure's own reader as final fallback +(freezer Object id-reader (.writeUTF s (pr-str x))) + +(defn- freeze-to-stream!* [^DataOutputStream s x] + (if-let [m (meta x)] + (do (write-id! s id-meta) + (freeze-to-stream!* s m))) + (freeze x s)) + +(defn freeze-to-stream! + "Serializes x to given output stream." + [data-output-stream x] + (binding [*print-dup* true] ; For `pr-str` + (freeze-to-stream!* data-output-stream schema-header) + (freeze-to-stream!* data-output-stream x))) + +(defn freeze-to-bytes + "Serializes x to a byte array and returns the array." + (^bytes [x] (freeze-to-bytes x true)) + (^bytes [x compress?] + (let [ba (ByteArrayOutputStream.) + stream (DataOutputStream. ba)] + (freeze-to-stream! stream x) + (let [ba (.toByteArray ba)] + (if compress? (Snappy/compress ba) ba))))) + +;;;; Thawing + +(declare thaw-from-stream!*) + +(defn coll-thaw! + "Helper to thaw simple collection types." + [^DataInputStream s] + (repeatedly (.readInt s) (partial thaw-from-stream!* s))) + +(defn- thaw-from-stream!* + [^DataInputStream s] + (let [type-id (.readByte s)] + (utils/case-eval + type-id + + id-reader (read-string (.readUTF s)) + id-bytes (read-bytes! s) + id-nil nil + id-boolean (.readBoolean s) + + id-char (.readChar s) + id-string (.readUTF s) + id-keyword (keyword (.readUTF s)) + + id-list (apply list (coll-thaw! s)) + id-vector (into [] (coll-thaw! s)) + id-set (into #{} (coll-thaw! s)) + id-map (apply hash-map (coll-thaw! s)) + id-coll (doall (coll-thaw! s)) + id-queue (into (PersistentQueue/EMPTY) (coll-thaw! s)) + + ;; DEPRECATED as of 0.9.0 + id-old-map (apply hash-map (repeatedly (* 2 (.readInt s)) + (partial thaw-from-stream!* s))) + + id-meta (let [m (thaw-from-stream!* s)] (with-meta (thaw-from-stream!* s) m)) + + id-byte (.readByte s) + id-short (.readShort s) + id-integer (.readInt s) + id-long (.readLong s) + id-bigint (bigint (read-biginteger! s)) + + id-float (.readFloat s) + id-double (.readDouble s) + id-bigdec (BigDecimal. (read-biginteger! s) (.readInt s)) + + id-ratio (/ (bigint (read-biginteger! s)) + (bigint (read-biginteger! s))) + + (throw (Exception. (str "Failed to thaw unknown type ID: " type-id)))))) + +;; TODO Scheduled for Carmine version 1.0.0 +;; (defn thaw-from-stream! +;; "Deserializes an object from given input stream." +;; [data-input-stream] +;; (binding [*read-eval* false] ; For `read-string` injection safety - NB!!! +;; (let [schema-header (thaw-from-stream!* data-input-stream)] +;; (thaw-from-stream!* data-input-stream)))) + +;; DEPRECATED: Includes temporary support for older versions of serialization +;; schema that didn't include a version header. This is for people that used +;; Carmine < 0.8.3 and haven't yet migrated their databases. +(defn thaw-from-stream! + "Deserializes an object from given input stream." + [data-input-stream] + (binding [*read-eval* false] ; For `read-string` injection safety - NB!!! + (let [maybe-schema-header (thaw-from-stream!* data-input-stream)] + (if (and (string? maybe-schema-header) + (.startsWith ^String maybe-schema-header "\u0000~")) + (thaw-from-stream!* data-input-stream) + maybe-schema-header)))) + +(defn thaw-from-bytes + "Deserializes an object from given byte array." + ([ba] (thaw-from-bytes ba true)) + ([ba compressed?] + (->> (if compressed? (Snappy/uncompress ba) ba) + (ByteArrayInputStream.) + (DataInputStream.) + (thaw-from-stream!)))) + +(def stress-data + "Reference data used for tests & benchmarks." + {;; Breaks reader, roundtrip equality + :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + + :nil nil + :boolean true + + :char-utf8 \ಬ + :string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" + :string-long (apply str (range 1000)) + :keyword :keyword + + :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10))) + :list-quoted '(1 2 3 4 5 (6 7 8 (9 10))) + :list-empty (list) + :vector [1 2 3 4 5 [6 7 8 [9 10]]] + :vector-empty [] + :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}} + :map-empty {} + :set #{1 2 3 4 5 #{6 7 8 #{9 10}}} + :set-empty #{} + :meta (with-meta {:a :A} {:metakey :metaval}) + + ;; Breaks reader + :queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g)) + :queue-empty (PersistentQueue/EMPTY) + + :coll (repeatedly 1000 rand) + + :byte (byte 16) + :short (short 42) + :integer (int 3) + :long (long 3) + :bigint (bigint 31415926535897932384626433832795) + + :float (float 3.14) + :double (double 3.14) + :bigdec (bigdec 3.1415926535897932384626433832795) + + :ratio 22/7}) \ No newline at end of file diff --git a/src/taoensso/nippy/utils.clj b/src/taoensso/nippy/utils.clj new file mode 100644 index 0000000..6668f1c --- /dev/null +++ b/src/taoensso/nippy/utils.clj @@ -0,0 +1,13 @@ +(ns taoensso.nippy.utils + {:author "Peter Taoussanis"}) + +(defmacro case-eval + "Like `case` but evaluates test constants for their compile-time value." + [e & clauses] + (let [;; Don't evaluate default expression! + default (when (odd? (count clauses)) (last clauses)) + clauses (if default (butlast clauses) clauses)] + `(case ~e + ~@(map-indexed (fn [i# form#] (if (even? i#) (eval form#) form#)) + clauses) + ~(when default default)))) \ No newline at end of file diff --git a/test/test_nippy/main.clj b/test/test_nippy/main.clj index d3fbca6..ec4e41a 100644 --- a/test/test_nippy/main.clj +++ b/test/test_nippy/main.clj @@ -1,4 +1,10 @@ (ns test-nippy.main - (:use [clojure.test])) + (:use [clojure.test]) + (:require [taoensso.nippy :as nippy])) -(deftest test-nothing) +;; Remove stuff from stress-data that breaks roundtrip equality +(def test-data (dissoc nippy/stress-data :bytes)) + +(def roundtrip (comp nippy/thaw-from-bytes nippy/freeze-to-bytes)) + +(deftest test-roundtrip (is (= test-data (roundtrip test-data)))) \ No newline at end of file