nippy/src/taoensso/nippy.clj

302 lines
10 KiB
Clojure
Raw Normal View History

2012-07-06 19:12:59 +00:00
(ns taoensso.nippy
"Simple, high-performance Clojure serialization library. Adapted from
Deep-Freeze."
{:author "Peter Taoussanis"}
(:require [taoensso.nippy.utils :as utils])
(:import [java.io DataInputStream DataOutputStream ByteArrayOutputStream
ByteArrayInputStream]
[org.xerial.snappy Snappy]
[clojure.lang IPersistentList IPersistentVector IPersistentMap
IPersistentSet PersistentQueue IPersistentCollection Keyword
BigInt Ratio]))
;;;; Define type IDs
(def ^:const schema-header "\u0000~0.9.0")
;; 1
2012-07-06 19:12:59 +00:00
(def ^:const id-bytes (int 2))
(def ^:const id-nil (int 3))
(def ^:const id-boolean (int 4))
(def ^:const id-reader (int 5)) ; Fallback: *print-dup* pr-str output
2012-07-06 19:12:59 +00:00
(def ^:const id-char (int 10))
;; 11
2012-07-06 19:12:59 +00:00
(def ^:const id-keyword (int 12))
(def ^:const id-string (int 13))
2012-07-06 19:12:59 +00:00
(def ^:const id-list (int 20))
(def ^:const id-vector (int 21))
;; 22
2012-07-06 19:12:59 +00:00
(def ^:const id-set (int 23))
(def ^:const id-coll (int 24)) ; Fallback: non-specific collection
(def ^:const id-meta (int 25))
(def ^:const id-queue (int 26))
(def ^:const id-map (int 27))
(def ^:const id-byte (int 40))
(def ^:const id-short (int 41))
(def ^:const id-integer (int 42))
(def ^:const id-long (int 43))
(def ^:const id-bigint (int 44))
(def ^:const id-float (int 60))
(def ^:const id-double (int 61))
(def ^:const id-bigdec (int 62))
(def ^:const id-ratio (int 70))
2012-07-20 18:56:30 +00:00
;;; DEPRECATED (old types will be supported only for thawing)
(def ^:const id-old-reader (int 1)) ; as of 0.9.2, for +64k support
(def ^:const id-old-string (int 11)) ; as of 0.9.2, for +64k support
(def ^:const id-old-map (int 22)) ; as of 0.9.0, for more efficient thaw
2012-07-06 19:12:59 +00:00
;;;; Shared low-level stream stuff
(defn- write-id! [^DataOutputStream stream ^Integer id] (.writeByte stream id))
(defn- write-bytes!
"Writes arbitrary byte data, preceded by its length."
2012-07-06 19:12:59 +00:00
[^DataOutputStream stream ^bytes ba]
(let [size (alength ba)]
(.writeInt stream size) ; Encode size of byte array
(.write stream ba 0 size)))
(defn- write-biginteger!
"Wrapper around `write-bytes!` for common case of writing a BigInteger."
[^DataOutputStream stream ^BigInteger x]
(write-bytes! stream (.toByteArray x)))
2012-07-06 19:12:59 +00:00
(defn- read-bytes!
"Reads arbitrary byte data, preceded by its length."
2012-07-06 19:12:59 +00:00
^bytes [^DataInputStream stream]
(let [size (.readInt stream)
ba (byte-array size)]
(.read stream ba 0 size) ba))
(defn- read-biginteger!
"Wrapper around `read-bytes!` for common case of reading a BigInteger.
2012-07-06 19:12:59 +00:00
Note that as of Clojure 1.3, java.math.BigInteger clojure.lang.BigInt."
^BigInteger [^DataInputStream stream]
(BigInteger. (read-bytes! stream)))
;;;; Freezing
(defprotocol Freezable (freeze [this stream]))
(comment (meta '^:DataOutputStream s))
(defmacro freezer
"Helper to extend Freezable protocol."
[type id & body]
`(extend-type ~type
~'Freezable
(~'freeze [~'x ~(with-meta 's {:tag 'DataOutputStream})]
(write-id! ~'s ~id)
~@body)))
(defmacro coll-freezer
2012-07-07 12:08:42 +00:00
"Helper to extend Freezable protocol to simple collection types."
2012-07-06 19:12:59 +00:00
[type id & body]
`(freezer
~type ~id
(.writeInt ~'s (count ~'x)) ; Encode collection length
(doseq [i# ~'x] (freeze-to-stream!* ~'s i#))))
(freezer (Class/forName "[B") id-bytes (write-bytes! s x))
(freezer nil id-nil)
(freezer Boolean id-boolean (.writeBoolean s x))
(freezer Character id-char (.writeChar s (int x)))
(freezer String id-string (write-bytes! s (.getBytes x "UTF-8")))
2012-07-08 06:00:34 +00:00
(freezer Keyword id-keyword (.writeUTF s (if-let [ns (namespace x)]
(str ns "/" (name x))
(name x))))
2012-07-06 19:12:59 +00:00
(declare freeze-to-stream!*)
(coll-freezer IPersistentList id-list)
(coll-freezer IPersistentVector id-vector)
(freezer IPersistentMap id-map
(.writeInt s (* 2 (count x))) ; Encode num kvs
(doseq [[k v] x]
(freeze-to-stream!* s k)
(freeze-to-stream!* s v)))
(coll-freezer IPersistentSet id-set)
(coll-freezer PersistentQueue id-queue)
(coll-freezer IPersistentCollection id-coll) ; Must be LAST collection freezer!
(freezer Byte id-byte (.writeByte s x))
(freezer Short id-short (.writeShort s x))
(freezer Integer id-integer (.writeInt s x))
(freezer Long id-long (.writeLong s x))
(freezer BigInt id-bigint (write-biginteger! s (.toBigInteger x)))
(freezer BigInteger id-bigint (write-biginteger! s x))
2012-07-06 19:12:59 +00:00
(freezer Float id-float (.writeFloat s x))
(freezer Double id-double (.writeDouble s x))
(freezer BigDecimal id-bigdec
(write-biginteger! s (.unscaledValue x))
2012-07-06 19:12:59 +00:00
(.writeInt s (.scale x)))
(freezer Ratio id-ratio
(write-biginteger! s (.numerator x))
(write-biginteger! s (.denominator x)))
2012-07-06 19:12:59 +00:00
;; Use Clojure's own reader as final fallback
(freezer Object id-reader (write-bytes! s (.getBytes (pr-str x) "UTF-8")))
2012-07-06 19:12:59 +00:00
(defn- freeze-to-stream!* [^DataOutputStream s x]
(if-let [m (meta x)]
(do (write-id! s id-meta)
(freeze-to-stream!* s m)))
(freeze x s))
(defn freeze-to-stream!
"Serializes x to given output stream."
[data-output-stream x]
(binding [*print-dup* true] ; For `pr-str`
(freeze-to-stream!* data-output-stream schema-header)
(freeze-to-stream!* data-output-stream x)))
(defn freeze-to-bytes
"Serializes x to a byte array and returns the array."
(^bytes [x] (freeze-to-bytes x true))
(^bytes [x compress?]
(let [ba (ByteArrayOutputStream.)
stream (DataOutputStream. ba)]
(freeze-to-stream! stream x)
(let [ba (.toByteArray ba)]
(if compress? (Snappy/compress ba) ba)))))
;;;; Thawing
(declare thaw-from-stream!*)
(defn coll-thaw!
"Helper to thaw simple collection types."
[^DataInputStream s]
(repeatedly (.readInt s) (partial thaw-from-stream!* s)))
(defn- thaw-from-stream!*
[^DataInputStream s]
(let [type-id (.readByte s)]
(utils/case-eval
type-id
id-reader (read-string (String. (read-bytes! s) "UTF-8"))
2012-07-06 19:12:59 +00:00
id-bytes (read-bytes! s)
id-nil nil
id-boolean (.readBoolean s)
id-char (.readChar s)
id-string (String. (read-bytes! s) "UTF-8")
2012-07-06 19:12:59 +00:00
id-keyword (keyword (.readUTF s))
id-list (apply list (coll-thaw! s))
id-vector (into [] (coll-thaw! s))
id-set (into #{} (coll-thaw! s))
id-map (apply hash-map (coll-thaw! s))
id-coll (doall (coll-thaw! s))
id-queue (into (PersistentQueue/EMPTY) (coll-thaw! s))
id-meta (let [m (thaw-from-stream!* s)] (with-meta (thaw-from-stream!* s) m))
id-byte (.readByte s)
id-short (.readShort s)
id-integer (.readInt s)
id-long (.readLong s)
id-bigint (bigint (read-biginteger! s))
id-float (.readFloat s)
id-double (.readDouble s)
id-bigdec (BigDecimal. (read-biginteger! s) (.readInt s))
id-ratio (/ (bigint (read-biginteger! s))
(bigint (read-biginteger! s)))
;;; DEPRECATED
id-old-reader (read-string (.readUTF s))
id-old-string (.readUTF s)
id-old-map (apply hash-map (repeatedly (* 2 (.readInt s))
(partial thaw-from-stream!* s)))
2012-07-06 19:12:59 +00:00
(throw (Exception. (str "Failed to thaw unknown type ID: " type-id))))))
;; TODO Scheduled for Carmine version 1.0.0
;; (defn thaw-from-stream!
;; "Deserializes an object from given input stream."
;; [data-input-stream]
;; (binding [*read-eval* false] ; For `read-string` injection safety - NB!!!
;; (let [schema-header (thaw-from-stream!* data-input-stream)]
;; (thaw-from-stream!* data-input-stream))))
;; DEPRECATED: Includes temporary support for older versions of serialization
;; schema that didn't include a version header. This is for people that used
;; Carmine < 0.8.3 and haven't yet migrated their databases.
(defn thaw-from-stream!
"Deserializes an object from given input stream."
[data-input-stream]
(binding [*read-eval* false] ; For `read-string` injection safety - NB!!!
(let [maybe-schema-header (thaw-from-stream!* data-input-stream)]
(if (and (string? maybe-schema-header)
(.startsWith ^String maybe-schema-header "\u0000~"))
(thaw-from-stream!* data-input-stream)
maybe-schema-header))))
(defn thaw-from-bytes
"Deserializes an object from given byte array."
([ba] (thaw-from-bytes ba true))
([ba compressed?]
(->> (if compressed? (Snappy/uncompress ba) ba)
(ByteArrayInputStream.)
(DataInputStream.)
(thaw-from-stream!))))
(def stress-data
"Reference data used for tests & benchmarks."
{;; Breaks reader, roundtrip equality
2012-07-07 12:08:42 +00:00
:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
2012-07-06 19:12:59 +00:00
:nil nil
:boolean true
:char-utf8 \ಬ
:string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
:string-long (apply str (range 1000))
:keyword :keyword
2012-07-08 11:34:45 +00:00
:keyword-ns ::keyword
2012-07-06 19:12:59 +00:00
:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10)))
:list-quoted '(1 2 3 4 5 (6 7 8 (9 10)))
:list-empty (list)
:vector [1 2 3 4 5 [6 7 8 [9 10]]]
:vector-empty []
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}}
:map-empty {}
:set #{1 2 3 4 5 #{6 7 8 #{9 10}}}
:set-empty #{}
:meta (with-meta {:a :A} {:metakey :metaval})
;; Breaks reader
:queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g))
:queue-empty (PersistentQueue/EMPTY)
:coll (repeatedly 1000 rand)
:byte (byte 16)
:short (short 42)
:integer (int 3)
:long (long 3)
:bigint (bigint 31415926535897932384626433832795)
:float (float 3.14)
:double (double 3.14)
:bigdec (bigdec 3.1415926535897932384626433832795)
2012-07-08 05:49:22 +00:00
:ratio 22/7
;; Clojure 1.4+
;; :tagged-uuid (java.util.UUID/randomUUID)
;; :tagged-date (java.util.Date.)
})