nippy/src/taoensso/nippy.clj

940 lines
36 KiB
Clojure
Raw Normal View History

2012-07-06 19:12:59 +00:00
(ns taoensso.nippy
2015-04-19 03:48:01 +00:00
"High-performance JVM Clojure serialization library. Originally adapted from
2015-09-17 03:55:09 +00:00
Deep-Freeze (https://goo.gl/OePPGr)."
{:author "Peter Taoussanis (@ptaoussanis)"}
2015-09-29 07:30:25 +00:00
(:require [taoensso.encore :as enc]
[taoensso.nippy
(utils :as utils)
(compression :as compression)
(encryption :as encryption)])
2013-10-19 05:50:21 +00:00
(:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream
DataOutputStream Serializable ObjectOutputStream ObjectInputStream
DataOutput DataInput]
2013-08-06 16:56:43 +00:00
[java.lang.reflect Method]
[java.util Date UUID]
[clojure.lang Keyword BigInt Ratio
APersistentMap APersistentVector APersistentSet
IPersistentMap ; IPersistentVector IPersistentSet IPersistentList
PersistentQueue PersistentTreeMap PersistentTreeSet PersistentList ; LazySeq
IRecord ISeq]))
2012-07-06 19:12:59 +00:00
2015-09-29 07:30:25 +00:00
(if (vector? enc/encore-version)
(enc/assert-min-encore-version [2 16 0])
(enc/assert-min-encore-version 2.16))
2015-02-18 10:22:37 +00:00
;;;; Nippy data format
2015-09-28 09:25:43 +00:00
;; * 4-byte header (Nippy v2.x+) (may be disabled but incl. by default) [1]
;; { * 1-byte type id.
;; * Arb-length payload. } ...
;;
;; [1] Inclusion of header is *strongly* recommended. Purpose:
2015-09-28 09:25:43 +00:00
;; * Sanity check (confirm that data appears to be Nippy data)
;; * Nippy version check (=> supports changes to data schema over time)
;; * Supports :auto thaw compressor, encryptor
2015-04-19 03:48:01 +00:00
;; * Supports :auto freeze compressor (since this depends on :auto thaw
2015-09-28 09:25:43 +00:00
;; compressor)
;;
(def ^:private ^:const head-version 1)
2013-06-13 15:40:44 +00:00
(def ^:private head-sig (.getBytes "NPY" "UTF-8"))
2015-09-28 09:25:43 +00:00
(def ^:private ^:const head-meta "Final byte stores version-dependent metadata"
{(byte 0) {:version 1 :compressor-id nil :encryptor-id nil}
(byte 4) {:version 1 :compressor-id nil :encryptor-id :else}
(byte 5) {:version 1 :compressor-id :else :encryptor-id nil}
(byte 6) {:version 1 :compressor-id :else :encryptor-id :else}
;;
(byte 2) {:version 1 :compressor-id nil :encryptor-id :aes128-sha512}
;;
(byte 1) {:version 1 :compressor-id :snappy :encryptor-id nil}
(byte 3) {:version 1 :compressor-id :snappy :encryptor-id :aes128-sha512}
(byte 7) {:version 1 :compressor-id :snappy :encryptor-id :else}
;;
2015-04-19 03:48:01 +00:00
;;; :lz4 used for both lz4 and lz4hc compressor (the two are compatible)
(byte 8) {:version 1 :compressor-id :lz4 :encryptor-id nil}
(byte 9) {:version 1 :compressor-id :lz4 :encryptor-id :aes128-sha512}
(byte 10) {:version 1 :compressor-id :lz4 :encryptor-id :else}
;;
(byte 11) {:version 1 :compressor-id :lzma2 :encryptor-id nil}
(byte 12) {:version 1 :compressor-id :lzma2 :encryptor-id :aes128-sha512}
(byte 13) {:version 1 :compressor-id :lzma2 :encryptor-id :else}})
2013-06-12 18:14:46 +00:00
2015-09-29 13:10:09 +00:00
(defmacro when-debug [& body] (when #_true false `(do ~@body)))
2013-10-31 06:15:22 +00:00
2013-06-12 18:14:46 +00:00
;;;; Data type IDs
2012-07-06 19:12:59 +00:00
(do
;; ** Negative ids reserved for user-defined types **
;;
2015-09-29 12:59:53 +00:00
(def ^:const id-reserved (byte 0))
;; 1 ; Deprecated
(def ^:const id-bytes (byte 2))
(def ^:const id-nil (byte 3))
(def ^:const id-boolean (byte 4))
(def ^:const id-reader (byte 5)) ; Fallback #2
(def ^:const id-serializable (byte 6)) ; Fallback #1
(def ^:const id-char (byte 10))
;; 11 ; Deprecated
; ; 12 ; Deprecated
(def ^:const id-string (byte 13))
(def ^:const id-keyword (byte 14))
(def ^:const id-list (byte 20))
2015-09-29 13:10:09 +00:00
(def ^:const id-vec (byte 21))
2015-09-29 12:59:53 +00:00
;; 22 ; Deprecated
(def ^:const id-set (byte 23))
(def ^:const id-seq (byte 24))
(def ^:const id-meta (byte 25))
(def ^:const id-queue (byte 26))
;; 27 ; Deprecated
(def ^:const id-sorted-set (byte 28))
;; 29 ; Deprecated
(def ^:const id-map (byte 30))
(def ^:const id-sorted-map (byte 31))
(def ^:const id-byte (byte 40))
(def ^:const id-short (byte 41))
(def ^:const id-integer (byte 42))
(def ^:const id-long (byte 43))
(def ^:const id-bigint (byte 44))
(def ^:const id-biginteger (byte 45))
(def ^:const id-float (byte 60))
(def ^:const id-double (byte 61))
(def ^:const id-bigdec (byte 62))
(def ^:const id-ratio (byte 70))
(def ^:const id-record (byte 80))
;; (def ^:const id-type (byte 81)) ; TODO?
(def ^:const id-prefixed-custom (byte 82))
(def ^:const id-date (byte 90))
(def ^:const id-uuid (byte 91))
2014-01-22 07:37:38 +00:00
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
2015-09-29 12:59:53 +00:00
(def ^:const id-byte-as-long (byte 100)) ; 1 vs 8 bytes
(def ^:const id-short-as-long (byte 101)) ; 2 vs 8 bytes
(def ^:const id-int-as-long (byte 102)) ; 4 vs 8 bytes
2014-01-22 08:42:57 +00:00
;;
2015-09-29 12:59:53 +00:00
(def ^:const id-sm-string (byte 105)) ; 1 vs 4 byte length prefix
(def ^:const id-sm-keyword (byte 106)) ; ''
2014-01-22 08:42:57 +00:00
;;
2015-09-29 13:10:09 +00:00
(def ^:const id-sm-vec (byte 110)) ; ''
2015-09-29 12:59:53 +00:00
(def ^:const id-sm-set (byte 111)) ; ''
(def ^:const id-sm-map (byte 112)) ; ''
;;
;; TODO Additional optimizations (types) for 2-vecs and 3-vecs?
2014-01-22 08:42:57 +00:00
2014-01-22 07:37:38 +00:00
;;; DEPRECATED (old types will be supported only for thawing)
2015-09-29 12:59:53 +00:00
(def ^:const id-reader-depr1 (byte 1)) ; v0.9.2+ for +64k support
(def ^:const id-string-depr1 (byte 11)) ; v0.9.2+ for +64k support
(def ^:const id-map-depr1 (byte 22)) ; v0.9.0+ for more efficient thaw
(def ^:const id-keyword-depr1 (byte 12)) ; v2.0.0-alpha5+ for str consistecy
(def ^:const id-map-depr2 (byte 27)) ; v2.11+ for count/2
(def ^:const id-sorted-map-depr1 (byte 29)) ; v2.11+ for count/2
2014-01-22 07:37:38 +00:00
)
;;;; Ns imports (mostly for convenience of lib consumers)
(do
(enc/defalias compress compression/compress)
(enc/defalias decompress compression/decompress)
(enc/defalias snappy-compressor compression/snappy-compressor)
(enc/defalias lzma2-compressor compression/lzma2-compressor)
(enc/defalias lz4-compressor compression/lz4-compressor)
(enc/defalias lz4hc-compressor compression/lz4hc-compressor)
(enc/defalias encrypt encryption/encrypt)
(enc/defalias decrypt encryption/decrypt)
(enc/defalias aes128-encryptor encryption/aes128-encryptor)
(enc/defalias freezable? utils/freezable?))
;;;; Freezing
(defprotocol Freezable
"Implementation detail. Be careful about extending to interfaces,
Ref. http://goo.gl/6gGRlU."
(-freeze-to-out [this out]))
(defmacro write-id [out id] `(.writeByte ~out ~id))
(defn write-bytes [^DataOutput out ^bytes ba]
(let [len (alength ba)]
(.writeInt out len)
(.write out ba 0 len)))
(defn write-sm-bytes [^DataOutput out ^bytes ba]
(let [len (alength ba)]
(byte len) ; Safety check
(.writeByte out len)
(.write out ba 0 len)))
2015-09-29 13:10:09 +00:00
(defn write-biginteger [out ^BigInteger n] (write-bytes out (.toByteArray n)))
(defn write-utf8 [out ^String s] (write-bytes out (.getBytes s "UTF-8")))
(defn write-sm-utf8 [out ^String s] (write-sm-bytes out (.getBytes s "UTF-8")))
(defn byte-sized? [^long n] (<= n 127 #_Byte/MAX_VALUE))
(defn short-sized? [^long n] (<= n 32767 #_Short/MAX_VALUE))
(defn write-ided-bytes [^DataOutput out id-sm id ^bytes ba]
(if (byte-sized? (alength ba))
(do (write-id out id-sm)
(write-sm-bytes out ba))
(do (write-id out id)
(write-bytes out ba))))
2015-09-29 13:10:09 +00:00
(defn write-ided-string [out ^String s]
(write-ided-bytes out id-sm-string id-string (.getBytes s "UTF-8")))
2015-09-29 13:10:09 +00:00
(defn write-ided-keyword [out kw]
(let [^String s (if-let [ns (namespace kw)] (str ns "/" (name kw)) (name kw))]
(write-ided-bytes out id-sm-keyword id-keyword (.getBytes s "UTF-8"))))
(defn write-ided-long [^DataOutput out ^long n]
(cond
2015-09-29 13:10:09 +00:00
(and (<= n 127 #_Byte/MAX_VALUE)
(>= n -128 #_Byte/MIN_VALUE))
(do (write-id out id-byte-as-long)
(.writeByte out n))
2015-09-29 13:10:09 +00:00
(and (<= n 32767 #_Short/MAX_VALUE)
(>= n -32768 #_Short/MIN_VALUE))
(do (write-id out id-short-as-long)
(.writeShort out n))
2015-09-29 13:05:41 +00:00
(and (<= n 2147483647 #_Integer/MAX_VALUE)
2015-09-29 13:10:09 +00:00
(>= -2147483648 #_Integer/MIN_VALUE))
(do (write-id out id-int-as-long)
(.writeInt out n))
:else
(do (write-id out id-long)
(.writeLong out n))))
(defn freeze-to-out!
"Serializes arg (any Clojure data type) to a DataOutput"
;; Basically just wraps `-freeze-to-out` with different arg order + metadata support
[^DataOutput data-output x]
(when-let [m (meta x)]
(write-id data-output id-meta)
(-freeze-to-out m data-output))
(-freeze-to-out x data-output))
2012-07-06 19:12:59 +00:00
(defn write-ided-coll [^DataOutput out ?id-sm id coll]
(if (counted? coll)
(let [cnt (count coll)]
(if (and ?id-sm (byte-sized? cnt))
(do (write-id out ?id-sm)
(.writeByte out cnt))
(do (write-id out id)
(.writeInt out cnt)))
(enc/run!* (fn [in] (freeze-to-out! out in)) coll))
(let [bas (ByteArrayOutputStream. 64)
sout (DataOutputStream. bas)
cnt (reduce (fn [^long cnt in]
(freeze-to-out! sout in)
(unchecked-inc cnt))
0 coll)
ba (.toByteArray bas)]
(if (and ?id-sm (byte-sized? cnt))
(do (write-id out ?id-sm)
(.writeByte out cnt))
(do (write-id out id)
(.writeInt out cnt)))
(.write out ba 0 (alength ba)))))
(defn write-ided-kvs [^DataOutput out ?id-sm id coll]
(let [cnt (count coll)]
(if (and ?id-sm (byte-sized? cnt))
(do (write-id out ?id-sm)
(.writeByte out cnt))
(do (write-id out id)
(.writeInt out cnt)))
(enc/run-kv!
(fn [k v]
(freeze-to-out! out k)
(freeze-to-out! out v))
coll)))
2015-09-29 13:10:09 +00:00
(defn write-ided-vec [out v] (write-ided-coll out id-sm-vec id-vec v))
(defn write-ided-set [out s] (write-ided-coll out id-sm-set id-set s))
(defn write-ided-map [out m] (write-ided-kvs out id-sm-map id-map m))
(defmacro ^:private freezer* [type & body]
`(extend-type ~type
Freezable
(~'-freeze-to-out [~'x ~(with-meta 'out {:tag 'DataOutput})]
~@body)))
(defmacro ^:private freezer [type id & body]
`(extend-type ~type
Freezable
(~'-freeze-to-out [~'x ~(with-meta 'out {:tag 'DataOutput})]
(write-id ~'out ~id)
~@body)))
2012-07-06 19:12:59 +00:00
(freezer nil id-nil)
(freezer (Class/forName "[B") id-bytes (write-bytes out x))
(freezer Boolean id-boolean (.writeBoolean out x))
(freezer Character id-char (.writeChar out (int x)))
(freezer* String (write-ided-string out x))
(freezer* Keyword (write-ided-keyword out x))
2015-09-29 13:10:09 +00:00
(freezer* PersistentQueue (write-ided-coll out nil id-queue x))
(freezer* PersistentTreeSet (write-ided-coll out nil id-sorted-set x))
(freezer* PersistentTreeMap (write-ided-kvs out nil id-sorted-map x))
(freezer* APersistentMap (write-ided-kvs out id-sm-map id-map x))
(freezer* APersistentVector (write-ided-coll out id-sm-vec id-vec x))
(freezer* APersistentSet (write-ided-coll out id-sm-set id-set x))
;; No APersistentList:
2015-09-29 13:10:09 +00:00
(freezer* PersistentList (write-ided-coll out nil id-list x))
(freezer* (type '()) (write-ided-coll out nil id-list x))
;; Nb low-level interface!! Acts as fallback for seqs that don't have a
;; concrete implementation. Will conflict with any other coll interfaces!
2015-09-29 13:10:09 +00:00
(freezer* ISeq (write-ided-coll out nil id-seq x))
(freezer IRecord id-record
(write-utf8 out (.getName (class x))) ; Reflect
2015-09-29 13:10:09 +00:00
(-freeze-to-out (into {} x) out))
2014-01-22 07:14:26 +00:00
(freezer Byte id-byte (.writeByte out x))
(freezer Short id-short (.writeShort out x))
(freezer Integer id-integer (.writeInt out x))
(freezer* Long (write-ided-long out x))
(freezer BigInt id-bigint (write-biginteger out (.toBigInteger x)))
(freezer BigInteger id-biginteger (write-biginteger out x))
(freezer Float id-float (.writeFloat out x))
(freezer Double id-double (.writeDouble out x))
(freezer BigDecimal id-bigdec
(write-biginteger out (.unscaledValue x))
(.writeInt out (.scale x)))
2012-07-06 19:12:59 +00:00
(freezer Ratio id-ratio
(write-biginteger out (.numerator x))
(write-biginteger out (.denominator x)))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(freezer Date id-date (.writeLong out (.getTime x)))
(freezer UUID id-uuid
(.writeLong out (.getMostSignificantBits x))
(.writeLong out (.getLeastSignificantBits x)))
2015-09-29 07:30:25 +00:00
(enc/defonce* ^:dynamic *final-freeze-fallback* nil)
(defn freeze-fallback-as-str [out x]
2015-09-29 07:30:25 +00:00
(-freeze-to-out {:nippy/unfreezable (enc/pr-edn x) :type (type x)} out))
(comment
(require '[clojure.core.async :as async])
(binding [*final-freeze-fallback* freeze-fallback-as-str]
(-> (async/chan) (freeze) (thaw))))
;; Fallbacks. Note that we'll extend *only* to (lowly) Object to prevent
;; interfering with higher-level implementations, Ref. http://goo.gl/6f7SKl
(extend-type Object
Freezable
(-freeze-to-out [x ^DataOutput out]
(cond
(utils/serializable? x) ; Fallback #1: Java's Serializable interface
2015-09-29 13:10:09 +00:00
(do (when-debug (println (format "DEBUG - Serializable fallback: %s" (type x))))
(write-id out id-serializable)
2014-01-22 07:14:26 +00:00
(write-utf8 out (.getName (class x))) ; Reflect
(.writeObject (ObjectOutputStream. out) x))
(utils/readable? x) ; Fallback #2: Clojure's Reader
2015-09-29 13:10:09 +00:00
(do (when-debug (println (format "DEBUG - Reader fallback: %s" (type x))))
(write-id out id-reader)
2015-09-29 07:30:25 +00:00
(write-utf8 out (enc/pr-edn x)))
:else ; Fallback #3: *final-freeze-fallback*
(if-let [ffb *final-freeze-fallback*]
(ffb x out)
(throw (ex-info (format "Unfreezable type: %s %s" (type x) (str x))
{:type (type x)
2015-09-29 07:30:25 +00:00
:as-str (enc/pr-edn x)}))))))
2013-08-07 09:19:11 +00:00
2013-06-13 15:40:44 +00:00
(def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta))
(def ^:private get-head-ba
(memoize
(fn [head-meta]
(when-let [meta-id (get head-meta-id (assoc head-meta :version head-version))]
2015-09-29 07:30:25 +00:00
(enc/ba-concat head-sig (byte-array [meta-id]))))))
2013-06-13 15:40:44 +00:00
(defn- wrap-header [data-ba head-meta]
(if-let [head-ba (get-head-ba head-meta)]
2015-09-29 07:30:25 +00:00
(enc/ba-concat head-ba data-ba)
(throw (ex-info (format "Unrecognized header meta: %s" head-meta)
{:head-meta head-meta}))))
2013-06-13 15:40:44 +00:00
(comment (wrap-header (.getBytes "foo") {:compressor-id :lz4
:encryptor-id nil}))
2015-04-19 03:48:01 +00:00
(defn default-freeze-compressor-selector
"Strategy:
* Prioritize speed, but allow lz4.
* Skip lz4 unless it's likely that lz4's space benefit will outweigh its
space overhead."
[^bytes ba]
(let [ba-len (alength ba)]
(cond
2015-09-28 09:25:43 +00:00
;; (> ba-len 8192) lzma2-compressor
;; (> ba-len 4098) lz4hc-compressor
(> ba-len 1024) lz4-compressor
2015-04-19 03:48:01 +00:00
:else nil)))
2015-09-29 07:30:25 +00:00
(enc/defonce* ^:dynamic *default-freeze-compressor-selector*
"(fn selector [^bytes ba])->compressor used by `(freeze <x> {:compressor :auto})."
default-freeze-compressor-selector)
(defn set-default-freeze-compressor-selector!
2015-09-28 09:25:43 +00:00
"Sets root binding of `*default-freeze-compressor-selector*`"
[selector]
(alter-var-root #'*default-freeze-compressor-selector* (constantly selector)))
(defn freeze
"Serializes arg (any Clojure data type) to a byte array. To freeze custom
types, extend the Clojure reader or see `extend-freeze`."
2015-09-26 04:31:49 +00:00
(^bytes [x] (freeze x nil))
(^bytes [x {:keys [compressor encryptor password skip-header?]
:or {compressor :auto
2015-09-29 13:10:09 +00:00
encryptor aes128-encryptor}}]
(let [encryptor (when password encryptor)
zero-copy-mode? (and (nil? compressor) (nil? encryptor))
baos (ByteArrayOutputStream. 64)
2015-09-26 04:31:49 +00:00
dos (DataOutputStream. baos)]
(if zero-copy-mode?
(do ; Optimized case
(when-not skip-header? ; Avoid `wrap-header`'s array copy:
(let [head-ba (get-head-ba {:compressor-id nil :encryptor-id nil})]
(.write dos head-ba 0 4)))
(freeze-to-out! dos x)
(.toByteArray baos))
(do
(freeze-to-out! dos x)
(let [ba (.toByteArray baos)
compressor
(if (identical? compressor :auto)
(if skip-header?
lz4-compressor
(*default-freeze-compressor-selector* ba))
(if (fn? compressor)
(compressor ba) ; Assume compressor selector fn
compressor ; Assume compressor
))
ba (if compressor (compress compressor ba) ba)
ba (if encryptor (encrypt encryptor password ba) ba)]
(if skip-header?
ba
(wrap-header ba
{:compressor-id (when-let [c compressor]
(or (compression/standard-header-ids
(compression/header-id c)) :else))
:encryptor-id (when-let [e encryptor]
(or (encryption/standard-header-ids
(encryption/header-id e)) :else))}))))))))
2012-07-06 19:12:59 +00:00
;;;; Thawing
(declare thaw-from-in!)
(defn read-bytes ^bytes [^DataInput in]
2015-09-29 13:10:09 +00:00
(let [len (.readInt in)
ba (byte-array len)]
(.readFully in ba 0 len)
ba))
(defn read-sm-bytes ^bytes [^DataInput in]
2015-09-29 13:10:09 +00:00
(let [len (.readByte in)
ba (byte-array len)]
(.readFully in ba 0 len)
ba))
(defn read-biginteger ^BigInteger [^DataInput in] (BigInteger. (read-bytes in)))
(defn read-utf8 ^String [^DataInput in] (String. (read-bytes in) "UTF-8"))
(defn read-sm-utf8 ^String [^DataInput in] (String. (read-sm-bytes in) "UTF-8"))
(defn read-coll [^DataInput in to-coll]
(enc/repeatedly-into to-coll (.readInt in) (fn [] (thaw-from-in! in))))
(defn read-sm-coll [^DataInput in to-coll]
(enc/repeatedly-into to-coll (.readByte in) (fn [] (thaw-from-in! in))))
(defn read-kvs [^DataInput in to-coll]
(enc/repeatedly-into to-coll (.readInt in)
(fn [] [(thaw-from-in! in) (thaw-from-in! in)])))
(defn read-sm-kvs [^DataInput in to-coll]
(enc/repeatedly-into to-coll (.readByte in)
(fn [] [(thaw-from-in! in) (thaw-from-in! in)])))
(defn read-kvs-depr1 [^DataInput in to-coll]
(enc/repeatedly-into to-coll (quot (.readInt in) 2)
(fn [] [(thaw-from-in! in) (thaw-from-in! in)])))
2015-04-17 12:33:55 +00:00
2015-05-29 07:13:35 +00:00
(def ^:private class-method-sig (into-array Class [IPersistentMap]))
2015-09-29 07:30:25 +00:00
(enc/defonce* ^:dynamic *custom-readers* "{<hash-or-byte-id> (fn [data-input])}" nil)
2015-06-01 04:53:55 +00:00
(defn swap-custom-readers! [f] (alter-var-root #'*custom-readers* f))
(defn- read-custom! [type-id in]
2015-06-01 04:07:50 +00:00
(if-let [custom-reader (get *custom-readers* type-id)]
(try
(custom-reader in)
(catch Exception e
(throw
(ex-info
(format "Reader exception for custom type with internal id: %s"
type-id) {:internal-type-id type-id} e))))
(throw
(ex-info
(format "No reader provided for custom type with internal id: %s"
type-id)
{:internal-type-id type-id}))))
(defn thaw-from-in!
"Deserializes a frozen object from given DataInput to its original Clojure
data type"
[^DataInput data-input]
(let [in data-input
type-id (.readByte in)]
2015-09-29 13:10:09 +00:00
(when-debug (println (format "DEBUG - thawing type-id: %s" type-id)))
(try
2015-09-29 07:30:25 +00:00
(enc/case-eval type-id
id-reader
2014-01-22 07:14:26 +00:00
(let [edn (read-utf8 in)]
(try
2015-09-29 07:30:25 +00:00
(enc/read-edn {:readers *data-readers*} edn)
(catch Exception e
{:type :reader
:throwable e
:nippy/unthawable edn})))
id-serializable
2014-01-22 07:14:26 +00:00
(let [class-name (read-utf8 in)]
(try
(let [content (.readObject (ObjectInputStream. in))]
(try
(let [class (Class/forName class-name)]
(cast class content))
(catch Exception e
{:type :serializable
:throwable e
:nippy/unthawable {:class-name class-name :content content}})))
(catch Exception e
{:type :serializable
:throwable e
:nippy/unthawable {:class-name class-name :content nil}})))
id-record
(let [class-name (read-utf8 in)
content (thaw-from-in! in)]
(try
(let [class (Class/forName class-name)
method (.getMethod class "create" class-method-sig)]
(.invoke method class (into-array Object [content])))
(catch Exception e
{:type :record
:throwable e
:nippy/unthawable {:class-name class-name :content content}})))
id-nil nil
id-bytes (read-bytes in)
2014-01-22 07:14:26 +00:00
id-boolean (.readBoolean in)
id-char (.readChar in)
2015-09-28 03:17:07 +00:00
id-string (read-utf8 in)
2014-01-22 07:14:26 +00:00
id-keyword (keyword (read-utf8 in))
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
id-sm-string (read-sm-utf8 in)
id-sm-keyword (keyword (read-sm-utf8 in))
2014-01-22 08:42:57 +00:00
2014-01-22 07:14:26 +00:00
id-queue (read-coll in (PersistentQueue/EMPTY))
id-sorted-set (read-coll in (sorted-set))
id-sorted-map (read-kvs in (sorted-map))
2015-09-29 13:10:09 +00:00
id-vec (read-coll in [])
id-sm-vec (read-sm-coll in [])
id-set (read-coll in #{})
id-sm-set (read-sm-coll in #{})
id-map (read-kvs in {})
id-sm-map (read-sm-kvs in {})
id-list (into '() (rseq (read-coll in [])))
id-seq (or (seq (read-coll in []))
(lazy-seq nil) ; Empty coll
)
id-meta (let [m (thaw-from-in! in)] (with-meta (thaw-from-in! in) m))
2014-01-22 07:14:26 +00:00
id-byte (.readByte in)
id-short (.readShort in)
id-integer (.readInt in)
id-long (.readLong in)
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
id-byte-as-long (long (.readByte in))
id-short-as-long (long (.readShort in))
id-int-as-long (long (.readInt in))
id-bigint (bigint (read-biginteger in))
id-biginteger (read-biginteger in)
2014-01-22 07:14:26 +00:00
id-float (.readFloat in)
id-double (.readDouble in)
id-bigdec (BigDecimal. (read-biginteger in) (.readInt in))
2015-09-17 07:05:00 +00:00
id-ratio (clojure.lang.Ratio.
(read-biginteger in)
(read-biginteger in))
2014-01-22 07:14:26 +00:00
id-date (Date. (.readLong in))
id-uuid (UUID. (.readLong in) (.readLong in))
;;; DEPRECATED
id-sorted-map-depr1 (read-kvs-depr1 in (sorted-map))
id-map-depr2 (read-kvs-depr1 in {})
2015-09-29 07:30:25 +00:00
id-reader-depr1 (enc/read-edn (.readUTF in))
id-string-depr1 (.readUTF in)
id-keyword-depr1 (keyword (.readUTF in))
id-map-depr1 (apply hash-map
(enc/repeatedly-into [] (* 2 (.readInt in))
(fn [] (thaw-from-in! in))))
id-prefixed-custom ; Prefixed custom type
(let [hash-id (.readShort in)]
(read-custom! hash-id in))
(read-custom! type-id in) ; Unprefixed custom type (catchall)
)
(catch Exception e
(throw (ex-info (format "Thaw failed against type-id: %s" type-id)
{:type-id type-id} e))))))
2012-07-06 19:12:59 +00:00
2013-06-13 15:40:44 +00:00
(defn- try-parse-header [ba]
2015-09-29 07:30:25 +00:00
(when-let [[head-ba data-ba] (enc/ba-split ba 4)]
(let [[head-sig* [meta-id]] (enc/ba-split head-ba 3)]
(when (enc/ba= head-sig* head-sig) ; Header appears to be well-formed
[data-ba (get head-meta meta-id {:unrecognized-meta? true})]))))
(defn- get-auto-compressor [compressor-id]
(case compressor-id
nil nil
:snappy snappy-compressor
:lzma2 lzma2-compressor
:lz4 lz4-compressor
:no-header (throw (ex-info ":auto not supported on headerless data." {}))
:else (throw (ex-info ":auto not supported for non-standard compressors." {}))
(throw (ex-info (format "Unrecognized :auto compressor id: %s" compressor-id)
{:compressor-id compressor-id}))))
(defn- get-auto-encryptor [encryptor-id]
(case encryptor-id
nil nil
:aes128-sha512 aes128-encryptor
:no-header (throw (ex-info ":auto not supported on headerless data." {}))
2014-11-03 09:43:41 +00:00
:else (throw (ex-info ":auto not supported for non-standard encryptors." {}))
(throw (ex-info (format "Unrecognized :auto encryptor id: %s" encryptor-id)
{:encryptor-id encryptor-id}))))
2013-06-13 15:40:44 +00:00
(defn thaw
"Deserializes a frozen object from given byte array to its original Clojure
data type. Supports data frozen with current and all previous versions of
Nippy. To thaw custom types, extend the Clojure reader or see `extend-thaw`.
Options include:
2015-09-28 09:25:43 +00:00
:compressor - An ICompressor, :auto (requires Nippy header), or nil
:encryptor - An IEncryptor, :auto (requires Nippy header), or nil"
2015-09-26 04:31:49 +00:00
([ba] (thaw ba nil))
([^bytes ba
{:keys [v1-compatibility? compressor encryptor password]
:or {v1-compatibility? true ; Recommend disabling when possible
compressor :auto
encryptor :auto}
:as opts}]
(assert (not (:headerless-meta opts))
":headerless-meta `thaw` opt removed in Nippy v2.7+")
(let [ex (fn [msg & [e]] (throw (ex-info (format "Thaw failed: %s" msg)
{:opts (merge opts
{:compressor compressor
:encryptor encryptor})}
e)))
thaw-data
(fn [data-ba compressor-id encryptor-id]
(let [compressor (if (identical? compressor :auto)
(get-auto-compressor compressor-id)
compressor)
encryptor (if (identical? encryptor :auto)
(get-auto-encryptor encryptor-id)
encryptor)]
(when (and encryptor (not password))
(ex "Password required for decryption."))
(try
(let [ba data-ba
ba (if encryptor (decrypt encryptor password ba) ba)
ba (if compressor (decompress compressor ba) ba)
dis (DataInputStream. (ByteArrayInputStream. ba))]
(thaw-from-in! dis))
(catch Exception e
(ex "Decryption/decompression failure, or data unfrozen/damaged."
e)))))
;; This is hackish and can actually currently result in JVM core dumps
;; due to buggy Snappy behaviour, Ref. http://goo.gl/mh7Rpy.
thaw-nippy-v1-data
(fn [data-ba]
(if-not v1-compatibility?
(throw (Exception. "v1 compatibility disabled"))
(try (thaw-data data-ba :snappy nil)
(catch Exception _
2015-09-26 04:31:49 +00:00
(thaw-data data-ba nil nil)))))]
(if-let [[data-ba {:keys [compressor-id encryptor-id unrecognized-meta?]
:as head-meta}] (try-parse-header ba)]
2015-09-26 04:31:49 +00:00
;; A well-formed header _appears_ to be present (it's possible though
;; unlikely that this is a fluke and data is actually headerless):
(try (thaw-data data-ba compressor-id encryptor-id)
(catch Exception e
(try (thaw-nippy-v1-data data-ba)
(catch Exception _
(if unrecognized-meta?
(ex "Unrecognized (but apparently well-formed) header. Data frozen with newer Nippy version?"
e)
(throw e))))))
;; Well-formed header definitely not present
(try (thaw-nippy-v1-data ba)
(catch Exception _
(thaw-data ba :no-header :no-header)))))))
(comment (thaw (freeze "hello"))
(thaw (freeze "hello" {:compressor nil}))
(thaw (freeze "hello" {:password [:salted "p"]})) ; ex: no pwd
(thaw (freeze "hello") {:password [:salted "p"]}))
2012-07-06 19:12:59 +00:00
;;;; Custom types
(defn- assert-custom-type-id [custom-type-id]
(assert (or (keyword? custom-type-id)
(and (integer? custom-type-id) (<= 1 custom-type-id 128)))))
2014-07-06 06:47:38 +00:00
(defn- coerce-custom-type-id
2015-09-28 09:25:43 +00:00
"* +ive byte id -> -ive byte id (for unprefixed custom types)
* Keyword id -> Short hash id (for prefixed custom types)"
2015-04-19 03:48:01 +00:00
[custom-type-id]
(assert-custom-type-id custom-type-id)
(if-not (keyword? custom-type-id)
2015-04-19 03:48:01 +00:00
(int (- ^long custom-type-id))
2015-04-17 12:33:55 +00:00
(let [^long hash-id (hash custom-type-id)
short-hash-id (if (pos? hash-id)
(mod hash-id Short/MAX_VALUE)
(mod hash-id Short/MIN_VALUE))]
;; Make sure hash ids can't collide with byte ids (unlikely anyway):
(assert (not (<= -128 short-hash-id -1))
"Custom type id hash collision; please choose a different id")
(int short-hash-id))))
(comment (coerce-custom-type-id 77)
(coerce-custom-type-id :foo/bar))
(defmacro extend-freeze
"Extends Nippy to support freezing of a custom type (ideally concrete) with
given id of form:
2015-09-28 09:25:43 +00:00
* Keyword - 2 byte overhead, resistent to id collisions
* Integer [1, 128] - no overhead, subject to id collisions
(defrecord MyType [data])
(extend-freeze MyType :foo/my-type [x data-output] ; Keyword id
(.writeUTF [data-output] (:data x)))
;; or
(extend-freeze MyType 1 [x data-output] ; Byte id
2014-01-22 07:14:26 +00:00
(.writeUTF [data-output] (:data x)))"
[type custom-type-id [x out] & body]
(assert-custom-type-id custom-type-id)
`(extend-type ~type Freezable
(~'-freeze-to-out [~x ~(with-meta out {:tag 'java.io.DataOutput})]
(if-not ~(keyword? custom-type-id)
;; Unprefixed [cust byte id][payload]:
(write-id ~out ~(coerce-custom-type-id custom-type-id))
;; Prefixed [const byte id][cust hash id][payload]:
(do (write-id ~out id-prefixed-custom)
(.writeShort ~out ~(coerce-custom-type-id custom-type-id))))
~@body)))
(defmacro extend-thaw
"Extends Nippy to support thawing of a custom type with given id:
(extend-thaw :foo/my-type [data-input] ; Keyword id
(->MyType (.readUTF data-input)))
;; or
(extend-thaw 1 [data-input] ; Byte id
2014-01-22 07:14:26 +00:00
(->MyType (.readUTF data-input)))"
[custom-type-id [in] & body]
(assert-custom-type-id custom-type-id)
`(do
(when (contains? *custom-readers* ~(coerce-custom-type-id custom-type-id))
(println (format "Warning: resetting Nippy thaw for custom type with id: %s"
~custom-type-id)))
(swap-custom-readers!
(fn [m#]
(assoc m#
~(coerce-custom-type-id custom-type-id)
(fn [~(with-meta in {:tag 'java.io.DataInput})]
~@body))))))
(comment
2015-06-01 04:53:55 +00:00
*custom-readers*
(defrecord MyType [data])
(extend-freeze MyType 1 [x out] (.writeUTF out (:data x)))
(extend-thaw 1 [in] (->MyType (.readUTF in)))
(thaw (freeze (->MyType "Joe"))))
2013-06-12 18:14:46 +00:00
;;;; Stress data
2012-07-06 19:12:59 +00:00
2013-10-24 06:33:54 +00:00
(defrecord StressRecord [data])
2015-09-28 09:25:43 +00:00
(def stress-data "Reference data used for tests & benchmarks"
{:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
:nil nil
:boolean true
:char-utf8 \ಬ
:string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
:string-long (apply str (range 1000))
:keyword :keyword
:keyword-ns ::keyword
;;; Try reflect real-world data:
:lotsa-small-numbers (vec (range 200))
:lotsa-small-keywords (->> (java.util.Locale/getISOLanguages)
(mapv keyword))
:lotsa-small-strings (->> (java.util.Locale/getISOCountries)
(mapv #(.getDisplayCountry
(java.util.Locale. "en" %))))
:queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g))
:queue-empty (PersistentQueue/EMPTY)
:sorted-set (sorted-set 1 2 3 4 5)
:sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3)
:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10)))
:list-quoted '(1 2 3 4 5 (6 7 8 (9 10)))
:list-empty (list)
:vector [1 2 3 4 5 [6 7 8 [9 10]]]
:vector-empty []
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}}
:map-empty {}
:set #{1 2 3 4 5 #{6 7 8 #{9 10}}}
:set-empty #{}
:meta (with-meta {:a :A} {:metakey :metaval})
:lazy-seq (repeatedly 1000 rand)
:lazy-seq-empty (map identity '())
:byte (byte 16)
:short (short 42)
:integer (int 3)
:long (long 3)
:bigint (bigint 31415926535897932384626433832795)
:float (float 3.14)
:double (double 3.14)
:bigdec (bigdec 3.1415926535897932384626433832795)
:ratio 22/7
:uuid (java.util.UUID/randomUUID)
:date (java.util.Date.)
:stress-record (->StressRecord "data")
;; Serializable
:throwable (Throwable. "Yolo")
:exception (try (/ 1 0) (catch Exception e e))
:ex-info (ex-info "ExInfo" {:data "data"})})
2013-06-12 18:14:46 +00:00
(def stress-data-comparable
2015-09-28 09:25:43 +00:00
"Reference data with stuff removed that breaks roundtrip equality"
(dissoc stress-data :bytes :throwable :exception :ex-info))
(def stress-data-benchable
"Reference data with stuff removed that breaks reader or other utils we'll
2015-09-28 09:25:43 +00:00
be benching against"
2014-01-21 07:44:53 +00:00
(dissoc stress-data :bytes :throwable :exception :ex-info :queue :queue-empty
2014-01-22 08:42:57 +00:00
:byte :stress-record))
2014-02-14 16:06:53 +00:00
;;;; Tools
2015-09-28 09:25:43 +00:00
(defn inspect-ba "Alpha - subject to change"
2015-09-29 16:06:33 +00:00
([ba ] (inspect-ba ba nil))
([ba thaw-opts]
(when (enc/bytes? ba)
(let [[first2bytes nextbytes] (enc/ba-split ba 2)
?known-wrapper
(cond
(enc/ba= first2bytes (.getBytes "\u0000<" "UTF8")) :carmine/bin
(enc/ba= first2bytes (.getBytes "\u0000>" "UTF8")) :carmine/clj)
unwrapped-ba (if ?known-wrapper nextbytes ba)
[data-ba ?nippy-header] (or (try-parse-header unwrapped-ba)
[unwrapped-ba :no-header])]
{:?known-wrapper ?known-wrapper
:?header ?nippy-header
:thawable? (try (thaw unwrapped-ba thaw-opts) true
(catch Exception _ false))
:unwrapped-ba unwrapped-ba
:data-ba data-ba
:unwrapped-len (alength ^bytes unwrapped-ba)
:ba-len (alength ^bytes ba)
:data-len (alength ^bytes data-ba)}))))
(comment
(inspect-ba (freeze "hello"))
(seq (:data-ba (inspect-ba (freeze "hello")))))
;;;; Deprecated
;; Deprecated by :auto compressor selection
(defrecord Compressable-LZMA2 [value]) ; Why was this `LZMA2` instead of `lzma2`?
(extend-freeze Compressable-LZMA2 128 [x out]
(let [ba (freeze (:value x) {:skip-header? true :compressor nil})
ba-len (alength ba)
compress? (> ba-len 1024)]
(.writeBoolean out compress?)
(if compress?
(write-bytes out (compress lzma2-compressor ba))
(write-bytes out ba))))
(extend-thaw 128 [in]
(let [compressed? (.readBoolean in)
ba (read-bytes in)]
(thaw ba {:compressor (when compressed? lzma2-compressor)
:encryptor nil})))
(comment
(->> (apply str (repeatedly 1000 rand))
(->Compressable-LZMA2)
(freeze)
(thaw))
(count (->> (apply str (repeatedly 1000 rand)) (freeze)))
(count (->> (apply str (repeatedly 1000 rand))
(->Compressable-LZMA2)
(freeze))))