Initial source, tests, benchmarks.
This commit is contained in:
parent
53332644bb
commit
146f82e079
7 changed files with 415 additions and 8 deletions
68
README.md
68
README.md
|
|
@ -8,7 +8,7 @@ Current [semantic](http://semver.org/) version:
|
|||
|
||||
Clojure's [rich data types](http://clojure.org/datatypes) are *awesome*. And its [reader](http://clojure.org/reader) allows you to take your data just about anywhere. But the reader can be painfully slow when you've got a lot of data to crunch (like when you're serializing to a database).
|
||||
|
||||
Nippy is an attempt to provide a drop-in, high-performance alternative to the reader. It's a fork of [Deep-Freeze](https://github.com/halgari/deep-freeze).
|
||||
Nippy is an attempt to provide a drop-in, high-performance alternative to the reader. It's a fork of [Deep-Freeze](https://github.com/halgari/deep-freeze) and is used as the [Carmine Redis client](https://github.com/ptaoussanis/carmine) serializer.
|
||||
|
||||
## What's In The Box?
|
||||
* Simple, **high-performance** all-Clojure de/serializer.
|
||||
|
|
@ -19,7 +19,7 @@ Nippy is an attempt to provide a drop-in, high-performance alternative to the re
|
|||
|
||||
## Status [](http://travis-ci.org/ptaoussanis/nippy)
|
||||
|
||||
Nippy is relatively mature and is used as the [Carmine Redis client](https://github.com/ptaoussanis/carmine) serializer. The API is expected to remain more or less stable. To run tests against all supported Clojure versions, use:
|
||||
Nippy is still currently *experimental*. It **has not yet been thoroughly tested in production** and its API is subject to change. To run tests against all supported Clojure versions, use:
|
||||
|
||||
```bash
|
||||
lein2 all test
|
||||
|
|
@ -47,13 +47,71 @@ and `require` the library:
|
|||
|
||||
### De/Serializing
|
||||
|
||||
TODO
|
||||
As an example of what Nippy can do, let's take a look at its own reference stress data:
|
||||
|
||||
```clojure
|
||||
nippy/stress-data
|
||||
=>
|
||||
{:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
|
||||
:nil nil
|
||||
:boolean true
|
||||
|
||||
:char-utf8 \ಬ
|
||||
:string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
|
||||
:string-long (apply str (range 1000))
|
||||
:keyword :keyword
|
||||
|
||||
:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10)))
|
||||
:list-quoted '(1 2 3 4 5 (6 7 8 (9 10)))
|
||||
:list-empty (list)
|
||||
:vector [1 2 3 4 5 [6 7 8 [9 10]]]
|
||||
:vector-empty []
|
||||
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}}
|
||||
:map-empty {}
|
||||
:set #{1 2 3 4 5 #{6 7 8 #{9 10}}}
|
||||
:set-empty #{}
|
||||
:meta (with-meta {:a :A} {:metakey :metaval})
|
||||
:queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g))
|
||||
:queue-empty (PersistentQueue/EMPTY)
|
||||
:coll (repeatedly 1000 rand)
|
||||
|
||||
:byte (byte 16)
|
||||
:short (short 42)
|
||||
:integer (int 3)
|
||||
:long (long 3)
|
||||
:bigint (bigint 31415926535897932384626433832795)
|
||||
|
||||
:float (float 3.14)
|
||||
:double (double 3.14)
|
||||
:bigdec (bigdec 3.1415926535897932384626433832795)
|
||||
|
||||
:ratio 22/7}
|
||||
```
|
||||
|
||||
Serialize it:
|
||||
|
||||
```clojure
|
||||
(def frozen-stress-data (nippy/freeze-to-bytes nippy/stress-data))
|
||||
=> #<byte[] [B@3253bcf3>
|
||||
```
|
||||
|
||||
Deserialize it:
|
||||
|
||||
```clojure
|
||||
(nippy/thaw-from-bytes frozen-stress-data)
|
||||
=> {:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
|
||||
:nil nil
|
||||
:boolean true
|
||||
<...> }
|
||||
```
|
||||
|
||||
Couldn't be simpler!
|
||||
|
||||
## Performance
|
||||
|
||||
TODO
|
||||

|
||||
|
||||
![Performance comparison chart]()
|
||||

|
||||
|
||||
[Detailed benchmark information](https://docs.google.com/spreadsheet/ccc?key=0AuSXb68FH4uhdE5kTTlocGZKSXppWG9sRzA5Y2pMVkE&pli=1#gid=0) is available on Google Docs.
|
||||
|
||||
|
|
|
|||
48
benchmarks/benchmarks.clj
Normal file
48
benchmarks/benchmarks.clj
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
(ns taoensso.nippy.benchmarks
|
||||
{:author "Peter Taoussanis"}
|
||||
(:use [taoensso.nippy :as nippy :only (freeze-to-bytes thaw-from-bytes)]))
|
||||
|
||||
;; Remove stuff from stress-data that breaks reader
|
||||
(def bench-data (dissoc nippy/stress-data :queue :queue-empty :bytes))
|
||||
|
||||
(defn reader-freeze [x] (binding [*print-dup* false] (pr-str x)))
|
||||
(defn reader-thaw [x] (binding [*read-eval* false] (read-string x)))
|
||||
|
||||
(def roundtrip (comp thaw-from-bytes freeze-to-bytes))
|
||||
(def reader-roundtrip (comp reader-thaw reader-freeze))
|
||||
|
||||
(defmacro time-requests
|
||||
"Warms up, then executes given number of requests and returns total execution
|
||||
times in msecs."
|
||||
[num-requests & body]
|
||||
`(do (dotimes [_# (int (/ ~num-requests 4))] ~@body) ; Warm-up
|
||||
(let [start-time# (System/nanoTime)]
|
||||
(dotimes [_# ~num-requests] ~@body)
|
||||
(Math/round (/ (- (System/nanoTime) start-time#) 1000000.0)))))
|
||||
|
||||
(comment
|
||||
|
||||
;;; Times
|
||||
(println
|
||||
"---\n"
|
||||
(let [num 10000]
|
||||
{:reader {:freeze (time-requests num (reader-freeze bench-data))
|
||||
:thaw (let [frozen (reader-freeze bench-data)]
|
||||
(time-requests num (reader-thaw frozen)))
|
||||
:round (time-requests num (reader-roundtrip bench-data))}
|
||||
|
||||
:nippy {:freeze (time-requests num (freeze-to-bytes bench-data))
|
||||
:thaw (let [frozen (freeze-to-bytes bench-data)]
|
||||
(time-requests num (thaw-from-bytes frozen)))
|
||||
:round (time-requests num (roundtrip bench-data))}}))
|
||||
|
||||
;; Clojure 1.3.0, Nippy 0.9.0
|
||||
;; {:reader {:freeze 23573, :thaw 31923, :round 53253},
|
||||
;; :nippy {:freeze 3805, :thaw 3789, :round 7522}}
|
||||
;; (float (/ 53253 7522)) = 7.079633
|
||||
|
||||
;;; Data size
|
||||
(let [frozen (reader-freeze bench-data)] (count (.getBytes frozen "UTF8")))
|
||||
(let [frozen (freeze-to-bytes bench-data)] (count frozen))
|
||||
;; 22711, 12168
|
||||
)
|
||||
BIN
benchmarks/chart1.png
Normal file
BIN
benchmarks/chart1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.7 KiB |
BIN
benchmarks/chart2.png
Normal file
BIN
benchmarks/chart2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.9 KiB |
|
|
@ -1 +1,283 @@
|
|||
(ns taoensso.nippy)
|
||||
(ns taoensso.nippy
|
||||
"Simple, high-performance Clojure serialization library. Adapted from
|
||||
Deep-Freeze."
|
||||
{:author "Peter Taoussanis"}
|
||||
(:require [taoensso.nippy.utils :as utils])
|
||||
(:import [java.io DataInputStream DataOutputStream ByteArrayOutputStream
|
||||
ByteArrayInputStream]
|
||||
[org.xerial.snappy Snappy]
|
||||
[clojure.lang IPersistentList IPersistentVector IPersistentMap
|
||||
IPersistentSet PersistentQueue IPersistentCollection Keyword
|
||||
BigInt Ratio]))
|
||||
|
||||
;;;; Define type IDs
|
||||
|
||||
(def ^:const schema-header "\u0000~0.9.0")
|
||||
|
||||
(def ^:const id-reader (int 1)) ; Fallback: *print-dup* pr-str output
|
||||
(def ^:const id-bytes (int 2))
|
||||
(def ^:const id-nil (int 3))
|
||||
(def ^:const id-boolean (int 4))
|
||||
|
||||
(def ^:const id-char (int 10))
|
||||
(def ^:const id-string (int 11))
|
||||
(def ^:const id-keyword (int 12))
|
||||
|
||||
(def ^:const id-list (int 20))
|
||||
(def ^:const id-vector (int 21))
|
||||
(def ^:const id-old-map (int 22)) ; DEPRECATED as of 0.9.0
|
||||
(def ^:const id-set (int 23))
|
||||
(def ^:const id-coll (int 24)) ; Fallback: non-specific collection
|
||||
(def ^:const id-meta (int 25))
|
||||
(def ^:const id-queue (int 26))
|
||||
(def ^:const id-map (int 27))
|
||||
|
||||
(def ^:const id-byte (int 40))
|
||||
(def ^:const id-short (int 41))
|
||||
(def ^:const id-integer (int 42))
|
||||
(def ^:const id-long (int 43))
|
||||
(def ^:const id-bigint (int 44))
|
||||
|
||||
(def ^:const id-float (int 60))
|
||||
(def ^:const id-double (int 61))
|
||||
(def ^:const id-bigdec (int 62))
|
||||
|
||||
(def ^:const id-ratio (int 70))
|
||||
|
||||
;;;; Shared low-level stream stuff
|
||||
|
||||
(defn- write-id! [^DataOutputStream stream ^Integer id] (.writeByte stream id))
|
||||
|
||||
(defn- write-bytes!
|
||||
[^DataOutputStream stream ^bytes ba]
|
||||
(let [size (alength ba)]
|
||||
(.writeInt stream size) ; Encode size of byte array
|
||||
(.write stream ba 0 size)))
|
||||
|
||||
(defn- read-bytes!
|
||||
^bytes [^DataInputStream stream]
|
||||
(let [size (.readInt stream)
|
||||
ba (byte-array size)]
|
||||
(.read stream ba 0 size) ba))
|
||||
|
||||
(defn- write-as-bytes!
|
||||
"Write arbitrary object as bytes using reflection."
|
||||
[^DataOutputStream stream obj]
|
||||
(write-bytes! stream (.toByteArray obj)))
|
||||
|
||||
(defn- read-biginteger!
|
||||
"Wrapper around read-bytes! for common case of reading to a BigInteger.
|
||||
Note that as of Clojure 1.3, java.math.BigInteger ≠ clojure.lang.BigInt."
|
||||
^BigInteger [^DataInputStream stream]
|
||||
(BigInteger. (read-bytes! stream)))
|
||||
|
||||
;;;; Freezing
|
||||
|
||||
(defprotocol Freezable (freeze [this stream]))
|
||||
|
||||
(comment (meta '^:DataOutputStream s))
|
||||
|
||||
(defmacro freezer
|
||||
"Helper to extend Freezable protocol."
|
||||
[type id & body]
|
||||
`(extend-type ~type
|
||||
~'Freezable
|
||||
(~'freeze [~'x ~(with-meta 's {:tag 'DataOutputStream})]
|
||||
(write-id! ~'s ~id)
|
||||
~@body)))
|
||||
|
||||
(defmacro coll-freezer
|
||||
"Helper to freeze simple collection types."
|
||||
[type id & body]
|
||||
`(freezer
|
||||
~type ~id
|
||||
(.writeInt ~'s (count ~'x)) ; Encode collection length
|
||||
(doseq [i# ~'x] (freeze-to-stream!* ~'s i#))))
|
||||
|
||||
(freezer (Class/forName "[B") id-bytes (write-bytes! s x))
|
||||
(freezer nil id-nil)
|
||||
(freezer Boolean id-boolean (.writeBoolean s x))
|
||||
|
||||
(freezer Character id-char (.writeChar s (int x)))
|
||||
(freezer String id-string (.writeUTF s x))
|
||||
(freezer Keyword id-keyword (.writeUTF s (name x)))
|
||||
|
||||
(declare freeze-to-stream!*)
|
||||
|
||||
(coll-freezer IPersistentList id-list)
|
||||
(coll-freezer IPersistentVector id-vector)
|
||||
(freezer IPersistentMap id-map
|
||||
(.writeInt s (* 2 (count x))) ; Encode num kvs
|
||||
(doseq [[k v] x]
|
||||
(freeze-to-stream!* s k)
|
||||
(freeze-to-stream!* s v)))
|
||||
(coll-freezer IPersistentSet id-set)
|
||||
(coll-freezer PersistentQueue id-queue)
|
||||
(coll-freezer IPersistentCollection id-coll) ; Must be LAST collection freezer!
|
||||
|
||||
(freezer Byte id-byte (.writeByte s x))
|
||||
(freezer Short id-short (.writeShort s x))
|
||||
(freezer Integer id-integer (.writeInt s x))
|
||||
(freezer Long id-long (.writeLong s x))
|
||||
(freezer BigInt id-bigint (write-as-bytes! s (.toBigInteger x)))
|
||||
(freezer BigInteger id-bigint (write-as-bytes! s x))
|
||||
|
||||
(freezer Float id-float (.writeFloat s x))
|
||||
(freezer Double id-double (.writeDouble s x))
|
||||
(freezer BigDecimal id-bigdec
|
||||
(write-as-bytes! s (.unscaledValue x))
|
||||
(.writeInt s (.scale x)))
|
||||
|
||||
(freezer Ratio id-ratio
|
||||
(write-as-bytes! s (.numerator x))
|
||||
(write-as-bytes! s (.denominator x)))
|
||||
|
||||
;; Use Clojure's own reader as final fallback
|
||||
(freezer Object id-reader (.writeUTF s (pr-str x)))
|
||||
|
||||
(defn- freeze-to-stream!* [^DataOutputStream s x]
|
||||
(if-let [m (meta x)]
|
||||
(do (write-id! s id-meta)
|
||||
(freeze-to-stream!* s m)))
|
||||
(freeze x s))
|
||||
|
||||
(defn freeze-to-stream!
|
||||
"Serializes x to given output stream."
|
||||
[data-output-stream x]
|
||||
(binding [*print-dup* true] ; For `pr-str`
|
||||
(freeze-to-stream!* data-output-stream schema-header)
|
||||
(freeze-to-stream!* data-output-stream x)))
|
||||
|
||||
(defn freeze-to-bytes
|
||||
"Serializes x to a byte array and returns the array."
|
||||
(^bytes [x] (freeze-to-bytes x true))
|
||||
(^bytes [x compress?]
|
||||
(let [ba (ByteArrayOutputStream.)
|
||||
stream (DataOutputStream. ba)]
|
||||
(freeze-to-stream! stream x)
|
||||
(let [ba (.toByteArray ba)]
|
||||
(if compress? (Snappy/compress ba) ba)))))
|
||||
|
||||
;;;; Thawing
|
||||
|
||||
(declare thaw-from-stream!*)
|
||||
|
||||
(defn coll-thaw!
|
||||
"Helper to thaw simple collection types."
|
||||
[^DataInputStream s]
|
||||
(repeatedly (.readInt s) (partial thaw-from-stream!* s)))
|
||||
|
||||
(defn- thaw-from-stream!*
|
||||
[^DataInputStream s]
|
||||
(let [type-id (.readByte s)]
|
||||
(utils/case-eval
|
||||
type-id
|
||||
|
||||
id-reader (read-string (.readUTF s))
|
||||
id-bytes (read-bytes! s)
|
||||
id-nil nil
|
||||
id-boolean (.readBoolean s)
|
||||
|
||||
id-char (.readChar s)
|
||||
id-string (.readUTF s)
|
||||
id-keyword (keyword (.readUTF s))
|
||||
|
||||
id-list (apply list (coll-thaw! s))
|
||||
id-vector (into [] (coll-thaw! s))
|
||||
id-set (into #{} (coll-thaw! s))
|
||||
id-map (apply hash-map (coll-thaw! s))
|
||||
id-coll (doall (coll-thaw! s))
|
||||
id-queue (into (PersistentQueue/EMPTY) (coll-thaw! s))
|
||||
|
||||
;; DEPRECATED as of 0.9.0
|
||||
id-old-map (apply hash-map (repeatedly (* 2 (.readInt s))
|
||||
(partial thaw-from-stream!* s)))
|
||||
|
||||
id-meta (let [m (thaw-from-stream!* s)] (with-meta (thaw-from-stream!* s) m))
|
||||
|
||||
id-byte (.readByte s)
|
||||
id-short (.readShort s)
|
||||
id-integer (.readInt s)
|
||||
id-long (.readLong s)
|
||||
id-bigint (bigint (read-biginteger! s))
|
||||
|
||||
id-float (.readFloat s)
|
||||
id-double (.readDouble s)
|
||||
id-bigdec (BigDecimal. (read-biginteger! s) (.readInt s))
|
||||
|
||||
id-ratio (/ (bigint (read-biginteger! s))
|
||||
(bigint (read-biginteger! s)))
|
||||
|
||||
(throw (Exception. (str "Failed to thaw unknown type ID: " type-id))))))
|
||||
|
||||
;; TODO Scheduled for Carmine version 1.0.0
|
||||
;; (defn thaw-from-stream!
|
||||
;; "Deserializes an object from given input stream."
|
||||
;; [data-input-stream]
|
||||
;; (binding [*read-eval* false] ; For `read-string` injection safety - NB!!!
|
||||
;; (let [schema-header (thaw-from-stream!* data-input-stream)]
|
||||
;; (thaw-from-stream!* data-input-stream))))
|
||||
|
||||
;; DEPRECATED: Includes temporary support for older versions of serialization
|
||||
;; schema that didn't include a version header. This is for people that used
|
||||
;; Carmine < 0.8.3 and haven't yet migrated their databases.
|
||||
(defn thaw-from-stream!
|
||||
"Deserializes an object from given input stream."
|
||||
[data-input-stream]
|
||||
(binding [*read-eval* false] ; For `read-string` injection safety - NB!!!
|
||||
(let [maybe-schema-header (thaw-from-stream!* data-input-stream)]
|
||||
(if (and (string? maybe-schema-header)
|
||||
(.startsWith ^String maybe-schema-header "\u0000~"))
|
||||
(thaw-from-stream!* data-input-stream)
|
||||
maybe-schema-header))))
|
||||
|
||||
(defn thaw-from-bytes
|
||||
"Deserializes an object from given byte array."
|
||||
([ba] (thaw-from-bytes ba true))
|
||||
([ba compressed?]
|
||||
(->> (if compressed? (Snappy/uncompress ba) ba)
|
||||
(ByteArrayInputStream.)
|
||||
(DataInputStream.)
|
||||
(thaw-from-stream!))))
|
||||
|
||||
(def stress-data
|
||||
"Reference data used for tests & benchmarks."
|
||||
{;; Breaks reader, roundtrip equality
|
||||
:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
|
||||
|
||||
:nil nil
|
||||
:boolean true
|
||||
|
||||
:char-utf8 \ಬ
|
||||
:string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
|
||||
:string-long (apply str (range 1000))
|
||||
:keyword :keyword
|
||||
|
||||
:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10)))
|
||||
:list-quoted '(1 2 3 4 5 (6 7 8 (9 10)))
|
||||
:list-empty (list)
|
||||
:vector [1 2 3 4 5 [6 7 8 [9 10]]]
|
||||
:vector-empty []
|
||||
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}}
|
||||
:map-empty {}
|
||||
:set #{1 2 3 4 5 #{6 7 8 #{9 10}}}
|
||||
:set-empty #{}
|
||||
:meta (with-meta {:a :A} {:metakey :metaval})
|
||||
|
||||
;; Breaks reader
|
||||
:queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g))
|
||||
:queue-empty (PersistentQueue/EMPTY)
|
||||
|
||||
:coll (repeatedly 1000 rand)
|
||||
|
||||
:byte (byte 16)
|
||||
:short (short 42)
|
||||
:integer (int 3)
|
||||
:long (long 3)
|
||||
:bigint (bigint 31415926535897932384626433832795)
|
||||
|
||||
:float (float 3.14)
|
||||
:double (double 3.14)
|
||||
:bigdec (bigdec 3.1415926535897932384626433832795)
|
||||
|
||||
:ratio 22/7})
|
||||
13
src/taoensso/nippy/utils.clj
Normal file
13
src/taoensso/nippy/utils.clj
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
(ns taoensso.nippy.utils
|
||||
{:author "Peter Taoussanis"})
|
||||
|
||||
(defmacro case-eval
|
||||
"Like `case` but evaluates test constants for their compile-time value."
|
||||
[e & clauses]
|
||||
(let [;; Don't evaluate default expression!
|
||||
default (when (odd? (count clauses)) (last clauses))
|
||||
clauses (if default (butlast clauses) clauses)]
|
||||
`(case ~e
|
||||
~@(map-indexed (fn [i# form#] (if (even? i#) (eval form#) form#))
|
||||
clauses)
|
||||
~(when default default))))
|
||||
|
|
@ -1,4 +1,10 @@
|
|||
(ns test-nippy.main
|
||||
(:use [clojure.test]))
|
||||
(:use [clojure.test])
|
||||
(:require [taoensso.nippy :as nippy]))
|
||||
|
||||
(deftest test-nothing)
|
||||
;; Remove stuff from stress-data that breaks roundtrip equality
|
||||
(def test-data (dissoc nippy/stress-data :bytes))
|
||||
|
||||
(def roundtrip (comp nippy/thaw-from-bytes nippy/freeze-to-bytes))
|
||||
|
||||
(deftest test-roundtrip (is (= test-data (roundtrip test-data))))
|
||||
Loading…
Reference in a new issue