nippy/src/taoensso/nippy.clj

954 lines
36 KiB
Clojure
Raw Normal View History

2012-07-06 19:12:59 +00:00
(ns taoensso.nippy
2015-04-19 03:48:01 +00:00
"High-performance JVM Clojure serialization library. Originally adapted from
2015-09-17 03:55:09 +00:00
Deep-Freeze (https://goo.gl/OePPGr)."
{:author "Peter Taoussanis (@ptaoussanis)"}
2015-09-29 07:30:25 +00:00
(:require [taoensso.encore :as enc]
[taoensso.nippy
(utils :as utils)
(compression :as compression)
(encryption :as encryption)])
2013-10-19 05:50:21 +00:00
(:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream
DataOutputStream Serializable ObjectOutputStream ObjectInputStream
DataOutput DataInput]
2013-08-06 16:56:43 +00:00
[java.lang.reflect Method]
[java.util Date UUID]
[clojure.lang Keyword BigInt Ratio
APersistentMap APersistentVector APersistentSet
IPersistentMap ; IPersistentVector IPersistentSet IPersistentList
PersistentQueue PersistentTreeMap PersistentTreeSet PersistentList ; LazySeq
IRecord ISeq]))
2012-07-06 19:12:59 +00:00
2015-09-29 07:30:25 +00:00
(if (vector? enc/encore-version)
(enc/assert-min-encore-version [2 16 0])
(enc/assert-min-encore-version 2.16))
2015-02-18 10:22:37 +00:00
;;;; Nippy data format
2015-09-28 09:25:43 +00:00
;; * 4-byte header (Nippy v2.x+) (may be disabled but incl. by default) [1]
;; { * 1-byte type id.
;; * Arb-length payload. } ...
;;
;; [1] Inclusion of header is *strongly* recommended. Purpose:
2015-09-28 09:25:43 +00:00
;; * Sanity check (confirm that data appears to be Nippy data)
;; * Nippy version check (=> supports changes to data schema over time)
;; * Supports :auto thaw compressor, encryptor
2015-04-19 03:48:01 +00:00
;; * Supports :auto freeze compressor (since this depends on :auto thaw
2015-09-28 09:25:43 +00:00
;; compressor)
;;
(def ^:private ^:const head-version 1)
2013-06-13 15:40:44 +00:00
(def ^:private head-sig (.getBytes "NPY" "UTF-8"))
2015-09-28 09:25:43 +00:00
(def ^:private ^:const head-meta "Final byte stores version-dependent metadata"
{(byte 0) {:version 1 :compressor-id nil :encryptor-id nil}
(byte 4) {:version 1 :compressor-id nil :encryptor-id :else}
(byte 5) {:version 1 :compressor-id :else :encryptor-id nil}
(byte 6) {:version 1 :compressor-id :else :encryptor-id :else}
;;
(byte 2) {:version 1 :compressor-id nil :encryptor-id :aes128-sha512}
;;
(byte 1) {:version 1 :compressor-id :snappy :encryptor-id nil}
(byte 3) {:version 1 :compressor-id :snappy :encryptor-id :aes128-sha512}
(byte 7) {:version 1 :compressor-id :snappy :encryptor-id :else}
;;
2015-04-19 03:48:01 +00:00
;;; :lz4 used for both lz4 and lz4hc compressor (the two are compatible)
(byte 8) {:version 1 :compressor-id :lz4 :encryptor-id nil}
(byte 9) {:version 1 :compressor-id :lz4 :encryptor-id :aes128-sha512}
(byte 10) {:version 1 :compressor-id :lz4 :encryptor-id :else}
;;
(byte 11) {:version 1 :compressor-id :lzma2 :encryptor-id nil}
(byte 12) {:version 1 :compressor-id :lzma2 :encryptor-id :aes128-sha512}
(byte 13) {:version 1 :compressor-id :lzma2 :encryptor-id :else}})
2013-06-12 18:14:46 +00:00
(defmacro when-debug-mode [& body] (when #_true false `(do ~@body)))
2013-10-31 06:15:22 +00:00
2013-06-12 18:14:46 +00:00
;;;; Data type IDs
2012-07-06 19:12:59 +00:00
2014-01-22 07:37:38 +00:00
(do ; Just for easier IDE collapsing
;; ** Negative ids reserved for user-defined types **
;;
2015-09-28 09:25:43 +00:00
(def ^:const id-reserved (int 0))
;; 1 ; Deprecated
(def ^:const id-bytes (int 2))
(def ^:const id-nil (int 3))
(def ^:const id-boolean (int 4))
(def ^:const id-reader (int 5)) ; Fallback #2
(def ^:const id-serializable (int 6)) ; Fallback #1
(def ^:const id-char (int 10))
;; 11 ; Deprecated
;; 12 ; Deprecated
(def ^:const id-string (int 13))
(def ^:const id-keyword (int 14))
(def ^:const id-list (int 20))
(def ^:const id-vector (int 21))
;; 22 ; Deprecated
(def ^:const id-set (int 23))
(def ^:const id-seq (int 24))
(def ^:const id-meta (int 25))
(def ^:const id-queue (int 26))
;; 27 ; Deprecated
(def ^:const id-sorted-set (int 28))
;; 29 ; Deprecated
(def ^:const id-map (int 30))
(def ^:const id-sorted-map (int 31))
(def ^:const id-byte (int 40))
(def ^:const id-short (int 41))
(def ^:const id-integer (int 42))
(def ^:const id-long (int 43))
(def ^:const id-bigint (int 44))
(def ^:const id-biginteger (int 45))
(def ^:const id-float (int 60))
(def ^:const id-double (int 61))
(def ^:const id-bigdec (int 62))
(def ^:const id-ratio (int 70))
(def ^:const id-record (int 80))
;; (def ^:const id-type (int 81)) ; TODO?
(def ^:const id-prefixed-custom (int 82))
(def ^:const id-date (int 90))
(def ^:const id-uuid (int 91))
2014-01-22 07:37:38 +00:00
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
2015-09-28 09:25:43 +00:00
(def ^:const id-byte-as-long (int 100)) ; 1 vs 8 bytes
(def ^:const id-short-as-long (int 101)) ; 2 vs 8 bytes
(def ^:const id-int-as-long (int 102)) ; 4 vs 8 bytes
2014-01-22 08:42:57 +00:00
;;
2015-09-28 09:25:43 +00:00
(def ^:const id-sm-string (int 105)) ; 1 vs 4 byte length prefix
(def ^:const id-sm-keyword (int 106)) ; ''
2014-01-22 08:42:57 +00:00
;;
(def ^:const id-sm-vector (int 110)) ; ''
(def ^:const id-sm-set (int 111)) ; ''
(def ^:const id-sm-map (int 112)) ; ''
;;
;; TODO Additional optimizations (types) for 2-vecs and 3-vecs?
2014-01-22 08:42:57 +00:00
2014-01-22 07:37:38 +00:00
;;; DEPRECATED (old types will be supported only for thawing)
2015-09-28 09:25:43 +00:00
(def ^:const id-reader-depr1 (int 1)) ; v0.9.2+ for +64k support
(def ^:const id-string-depr1 (int 11)) ; v0.9.2+ for +64k support
(def ^:const id-map-depr1 (int 22)) ; v0.9.0+ for more efficient thaw
(def ^:const id-keyword-depr1 (int 12)) ; v2.0.0-alpha5+ for str consistecy
(def ^:const id-map-depr2 (int 27)) ; v2.11+ for count/2
(def ^:const id-sorted-map-depr1 (int 29)) ; v2.11+ for count/2
2014-01-22 07:37:38 +00:00
)
;;;; Ns imports (mostly for convenience of lib consumers)
2015-09-29 07:30:25 +00:00
(enc/defalias compress compression/compress)
(enc/defalias decompress compression/decompress)
(enc/defalias snappy-compressor compression/snappy-compressor)
(enc/defalias lzma2-compressor compression/lzma2-compressor)
(enc/defalias lz4-compressor compression/lz4-compressor)
(enc/defalias lz4hc-compressor compression/lz4hc-compressor)
2015-09-29 07:30:25 +00:00
(enc/defalias encrypt encryption/encrypt)
(enc/defalias decrypt encryption/decrypt)
(enc/defalias aes128-encryptor encryption/aes128-encryptor)
2015-09-29 07:30:25 +00:00
(enc/defalias freezable? utils/freezable?)
;;;; Freezing
(defprotocol Freezable
2015-09-28 09:25:43 +00:00
"Be careful about extending to interfaces, Ref. http://goo.gl/6gGRlU"
(-freeze-to-out [this out]))
(defn small-count? [n] (<= (long n) 127 #_Byte/MAX_VALUE))
(defmacro write-id [out id] `(.writeByte ~out ~id))
(defmacro write-bytes [out ba & [small?]]
(let [wc (if small? 'writeByte 'writeInt)
out (with-meta out {:tag 'java.io.DataOutput})
2015-09-15 17:45:33 +00:00
ba (with-meta ba {:tag 'bytes})]
`(let [out# ~out
ba# ~ba
size# (alength ba#)]
(. out# ~wc size#)
(.write out# ba# 0 size#))))
2014-01-22 07:14:26 +00:00
(defmacro write-biginteger [out x]
(let [x (with-meta x {:tag 'java.math.BigInteger})]
`(write-bytes ~out (.toByteArray ~x))))
(defmacro write-utf8 [out x & [small?]]
(let [x (with-meta x {:tag 'String})]
`(write-bytes ~out (.getBytes ~x "UTF-8") ~small?)))
(defn freeze-to-out!
"Serializes arg (any Clojure data type) to a DataOutput"
;; Basically just wraps `-freeze-to-out` with different arg order + metadata support
[^DataOutput data-output x]
(when-let [m (meta x)]
(write-id data-output id-meta)
(-freeze-to-out m data-output))
(-freeze-to-out x data-output))
2012-07-06 19:12:59 +00:00
(defmacro write-coll [out x & [small?]]
(let [wc (if small? 'writeByte 'writeInt)]
`(if (counted? ~'x)
(do
(. ~'out ~wc (count ~'x))
2015-09-29 07:30:25 +00:00
(enc/run!* (fn [i#] (freeze-to-out! ~'out i#)) ~'x))
(let [bas# (ByteArrayOutputStream. 64)
2014-01-22 07:14:26 +00:00
sout# (DataOutputStream. bas#)
2015-04-17 12:33:55 +00:00
cnt# (reduce (fn [^long cnt# i#]
(freeze-to-out! sout# i#)
2014-01-22 07:14:26 +00:00
(unchecked-inc cnt#))
0 ~'x)
ba# (.toByteArray bas#)]
(. ~'out ~wc cnt#)
2014-01-22 07:14:26 +00:00
(.write ~'out ba# 0 (alength ba#))))))
2012-07-06 19:12:59 +00:00
(defmacro write-kvs [out x & [small?]]
(let [wc (if small? 'writeByte 'writeInt)]
`(do
(. ~'out ~wc (count ~'x))
2015-09-29 07:30:25 +00:00
(enc/run-kv!
(fn [k# v#]
(freeze-to-out! ~'out k#)
(freeze-to-out! ~'out v#))
~'x))))
(defmacro ^:private freezer [type id & body]
`(extend-type ~type
Freezable
(~'-freeze-to-out [~'x ~(with-meta 'out {:tag 'DataOutput})]
(write-id ~'out ~id)
~@body)))
2012-07-06 19:12:59 +00:00
(defmacro ^:private freezer-coll [type id & [id-sm]]
(if-not id-sm
`(freezer ~type ~id (write-coll ~'out ~'x))
`(extend-type ~type
Freezable
(~'-freeze-to-out [~'x ~(with-meta 'out {:tag 'DataOutput})]
(if (small-count? (count ~'x))
(do
(write-id ~'out ~id-sm)
(write-coll ~'out ~'x :small))
(do
(write-id ~'out ~id)
(write-coll ~'out ~'x)))))))
(defmacro ^:private freezer-kvs [type id & [id-sm]]
(if-not id-sm
`(freezer ~type ~id (write-kvs ~'out ~'x))
`(extend-type ~type
Freezable
(~'-freeze-to-out [~'x ~(with-meta 'out {:tag 'DataOutput})]
(if (small-count? (count ~'x))
(do
(write-id ~'out ~id-sm)
(write-kvs ~'out ~'x :small))
(do
(write-id ~'out ~id)
(write-kvs ~'out ~'x)))))))
(freezer (Class/forName "[B") id-bytes (write-bytes out ^bytes x))
(freezer nil id-nil)
(freezer Boolean id-boolean (.writeBoolean out x))
(freezer Character id-char (.writeChar out (int x)))
2014-01-22 08:42:57 +00:00
(extend-type String
2014-01-22 08:42:57 +00:00
Freezable
(-freeze-to-out [x ^DataOutput out]
(let [ba (.getBytes x "UTF-8")]
(if (small-count? (alength ^bytes ba))
2015-09-28 09:25:43 +00:00
(do (write-id out id-sm-string)
2014-01-22 08:42:57 +00:00
(write-bytes out ba :small))
(do (write-id out id-string)
(write-bytes out ba))))))
(extend-type Keyword
2014-01-22 08:42:57 +00:00
Freezable
(-freeze-to-out [x ^DataOutput out]
(let [s (if-let [ns (namespace x)] (str ns "/" (name x)) (name x))
2014-01-22 08:42:57 +00:00
ba (.getBytes s "UTF-8")]
(if (small-count? Byte/MAX_VALUE)
2015-09-28 09:25:43 +00:00
(do (write-id out id-sm-keyword)
2014-01-22 08:42:57 +00:00
(write-bytes out ba :small))
(do (write-id out id-keyword)
(write-bytes out ba))))))
2012-07-06 19:12:59 +00:00
2013-10-23 18:25:46 +00:00
(freezer-coll PersistentQueue id-queue)
(freezer-coll PersistentTreeSet id-sorted-set)
(freezer-kvs PersistentTreeMap id-sorted-map)
(freezer-kvs APersistentMap id-map id-sm-map)
(freezer-coll APersistentVector id-vector id-sm-vector)
(freezer-coll APersistentSet id-set id-sm-set)
(freezer-coll PersistentList id-list) ; No APersistentList
(freezer-coll (type '()) id-list)
;; Nb low-level interface!! Acts as fallback for seqs that don't have a
;; concrete implementation. Will conflict with any other coll interfaces!
(freezer-coll ISeq id-seq)
(freezer IRecord id-record
2014-01-22 07:14:26 +00:00
(write-utf8 out (.getName (class x))) ; Reflect
(freeze-to-out! out (into {} x)))
2014-01-22 07:14:26 +00:00
(freezer Byte id-byte (.writeByte out x))
(freezer Short id-short (.writeShort out x))
(freezer Integer id-integer (.writeInt out x))
;;(freezer Long id-long (.writeLong out x))
(extend-type Long
2014-01-22 08:42:57 +00:00
Freezable
(-freeze-to-out [x ^DataOutput out]
(let [^long x x]
(cond
2015-09-30 04:57:00 +00:00
(and (<= x #_Byte/MAX_VALUE 127)
(<= #_Byte/MIN_VALUE -128 x))
(do (write-id out id-byte-as-long)
(.writeByte out x))
(and (<= x #_Short/MAX_VALUE 32767)
(<= #_Short/MIN_VALUE -32768 x))
(do (write-id out id-short-as-long)
(.writeShort out x))
(and (<= x #_Integer/MAX_VALUE 2147483647)
(<= #_Integer/MIN_VALUE -2147483648 x))
(do (write-id out id-int-as-long)
(.writeInt out x))
:else (do (write-id out id-long)
(.writeLong out x))))))
2014-01-22 08:42:57 +00:00
(freezer BigInt id-bigint (write-biginteger out (.toBigInteger x)))
(freezer BigInteger id-biginteger (write-biginteger out x))
2014-01-22 07:14:26 +00:00
(freezer Float id-float (.writeFloat out x))
(freezer Double id-double (.writeDouble out x))
2012-07-06 19:12:59 +00:00
(freezer BigDecimal id-bigdec
2014-01-22 07:14:26 +00:00
(write-biginteger out (.unscaledValue x))
(.writeInt out (.scale x)))
2012-07-06 19:12:59 +00:00
(freezer Ratio id-ratio
2014-01-22 07:14:26 +00:00
(write-biginteger out (.numerator x))
(write-biginteger out (.denominator x)))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(freezer Date id-date (.writeLong out (.getTime x)))
(freezer UUID id-uuid
2014-01-22 07:14:26 +00:00
(.writeLong out (.getMostSignificantBits x))
(.writeLong out (.getLeastSignificantBits x)))
2015-09-29 07:30:25 +00:00
(enc/defonce* ^:dynamic *final-freeze-fallback* nil)
(defn freeze-fallback-as-str [out x]
2015-09-29 07:30:25 +00:00
(-freeze-to-out {:nippy/unfreezable (enc/pr-edn x) :type (type x)} out))
(comment
(require '[clojure.core.async :as async])
(binding [*final-freeze-fallback* freeze-fallback-as-str]
(-> (async/chan) (freeze) (thaw))))
;; Fallbacks. Note that we'll extend *only* to (lowly) Object to prevent
;; interfering with higher-level implementations, Ref. http://goo.gl/6f7SKl
(extend-type Object
Freezable
(-freeze-to-out [x ^DataOutput out]
(cond
(utils/serializable? x) ; Fallback #1: Java's Serializable interface
(do (when-debug-mode
2013-10-31 06:15:22 +00:00
(println (format "DEBUG - Serializable fallback: %s" (type x))))
2014-01-22 07:14:26 +00:00
(write-id out id-serializable)
(write-utf8 out (.getName (class x))) ; Reflect
(.writeObject (ObjectOutputStream. out) x))
(utils/readable? x) ; Fallback #2: Clojure's Reader
(do (when-debug-mode
(println (format "DEBUG - Reader fallback: %s" (type x))))
2014-01-22 07:14:26 +00:00
(write-id out id-reader)
2015-09-29 07:30:25 +00:00
(write-utf8 out (enc/pr-edn x)))
:else ; Fallback #3: *final-freeze-fallback*
(if-let [ffb *final-freeze-fallback*]
(ffb x out)
(throw (ex-info (format "Unfreezable type: %s %s" (type x) (str x))
{:type (type x)
2015-09-29 07:30:25 +00:00
:as-str (enc/pr-edn x)}))))))
2013-08-07 09:19:11 +00:00
2013-06-13 15:40:44 +00:00
(def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta))
(def ^:private get-head-ba
(memoize
(fn [head-meta]
(when-let [meta-id (get head-meta-id (assoc head-meta :version head-version))]
2015-09-29 07:30:25 +00:00
(enc/ba-concat head-sig (byte-array [meta-id]))))))
2013-06-13 15:40:44 +00:00
(defn- wrap-header [data-ba head-meta]
(if-let [head-ba (get-head-ba head-meta)]
2015-09-29 07:30:25 +00:00
(enc/ba-concat head-ba data-ba)
(throw (ex-info (format "Unrecognized header meta: %s" head-meta)
{:head-meta head-meta}))))
2013-06-13 15:40:44 +00:00
(comment (wrap-header (.getBytes "foo") {:compressor-id :lz4
:encryptor-id nil}))
2015-04-19 03:48:01 +00:00
(defn default-freeze-compressor-selector
"Strategy:
* Prioritize speed, but allow lz4.
* Skip lz4 unless it's likely that lz4's space benefit will outweigh its
space overhead."
[^bytes ba]
(let [ba-len (alength ba)]
(cond
2015-09-28 09:25:43 +00:00
;; (> ba-len 8192) lzma2-compressor
;; (> ba-len 4098) lz4hc-compressor
(> ba-len 1024) lz4-compressor
2015-04-19 03:48:01 +00:00
:else nil)))
2015-09-29 07:30:25 +00:00
(enc/defonce* ^:dynamic *default-freeze-compressor-selector*
"(fn selector [^bytes ba])->compressor used by `(freeze <x> {:compressor :auto})."
default-freeze-compressor-selector)
(defn set-default-freeze-compressor-selector!
2015-09-28 09:25:43 +00:00
"Sets root binding of `*default-freeze-compressor-selector*`"
[selector]
(alter-var-root #'*default-freeze-compressor-selector* (constantly selector)))
(defn freeze
"Serializes arg (any Clojure data type) to a byte array. To freeze custom
types, extend the Clojure reader or see `extend-freeze`."
2015-09-26 04:31:49 +00:00
(^bytes [x] (freeze x nil))
(^bytes [x {:keys [compressor encryptor password skip-header?]
:or {compressor :auto
encryptor aes128-encryptor}
:as opts}]
(let [encryptor (when password encryptor)
zero-copy-mode? (and (nil? compressor) (nil? encryptor))
baos (ByteArrayOutputStream. 64)
2015-09-26 04:31:49 +00:00
dos (DataOutputStream. baos)]
(if zero-copy-mode?
(do ; Optimized case
(when-not skip-header? ; Avoid `wrap-header`'s array copy:
(let [head-ba (get-head-ba {:compressor-id nil :encryptor-id nil})]
(.write dos head-ba 0 4)))
(freeze-to-out! dos x)
(.toByteArray baos))
(do
(freeze-to-out! dos x)
(let [ba (.toByteArray baos)
compressor
(if (identical? compressor :auto)
(if skip-header?
lz4-compressor
(*default-freeze-compressor-selector* ba))
(if (fn? compressor)
(compressor ba) ; Assume compressor selector fn
compressor ; Assume compressor
))
ba (if compressor (compress compressor ba) ba)
ba (if encryptor (encrypt encryptor password ba) ba)]
(if skip-header?
ba
(wrap-header ba
{:compressor-id (when-let [c compressor]
(or (compression/standard-header-ids
(compression/header-id c)) :else))
:encryptor-id (when-let [e encryptor]
(or (encryption/standard-header-ids
(encryption/header-id e)) :else))}))))))))
2012-07-06 19:12:59 +00:00
;;;; Thawing
2014-01-22 07:14:26 +00:00
(declare thaw-from-in)
2012-07-06 19:12:59 +00:00
(defmacro read-bytes [in & [small?]]
(let [rc (if small? 'readByte 'readInt)]
`(let [in# ~in
size# (. in# ~rc)
ba# (byte-array size#)]
(.readFully in# ba# 0 size#)
ba#)))
(defmacro read-biginteger [in] `(BigInteger. (read-bytes ~in)))
(defmacro read-utf8 [in & [small?]]
`(String. (read-bytes ~in ~small?) "UTF-8"))
(defmacro ^:private read-coll [in coll & [small?]]
(let [rc (if small? 'readByte 'readInt)]
`(let [in# ~in]
2015-09-29 07:30:25 +00:00
(enc/repeatedly-into ~coll (. in# ~rc)
(fn [] (thaw-from-in in#))))))
2012-07-06 19:12:59 +00:00
(defmacro ^:private read-kvs [in coll & [small?]]
(let [rc (if small? 'readByte 'readInt)]
`(let [in# ~in]
2015-09-29 07:30:25 +00:00
(enc/repeatedly-into ~coll (. in# ~rc)
(fn [] [(thaw-from-in in#) (thaw-from-in in#)])))))
(defmacro ^:private read-kvs-depr1 [in coll]
`(let [in# ~in]
2015-09-29 07:30:25 +00:00
(enc/repeatedly-into ~coll (quot (.readInt in#) 2)
2015-09-19 04:04:33 +00:00
(fn [] [(thaw-from-in in#) (thaw-from-in in#)]))))
2015-04-17 12:33:55 +00:00
2015-05-29 07:13:35 +00:00
(def ^:private class-method-sig (into-array Class [IPersistentMap]))
2015-09-29 07:30:25 +00:00
(enc/defonce* ^:dynamic *custom-readers* "{<hash-or-byte-id> (fn [data-input])}" nil)
2015-06-01 04:53:55 +00:00
(defn swap-custom-readers! [f] (alter-var-root #'*custom-readers* f))
(defn- read-custom! [type-id in]
2015-06-01 04:07:50 +00:00
(if-let [custom-reader (get *custom-readers* type-id)]
(try
(custom-reader in)
(catch Exception e
(throw
(ex-info
(format "Reader exception for custom type with internal id: %s"
type-id) {:internal-type-id type-id} e))))
(throw
(ex-info
(format "No reader provided for custom type with internal id: %s"
type-id)
{:internal-type-id type-id}))))
2014-01-22 07:14:26 +00:00
(defn- thaw-from-in
[^DataInput in]
(let [type-id (.readByte in)]
(try
(when-debug-mode
(println (format "DEBUG - thawing type-id: %s" type-id)))
2015-09-29 07:30:25 +00:00
(enc/case-eval type-id
id-reader
2014-01-22 07:14:26 +00:00
(let [edn (read-utf8 in)]
(try
2015-09-29 07:30:25 +00:00
(enc/read-edn {:readers *data-readers*} edn)
(catch Exception e
{:type :reader
:throwable e
:nippy/unthawable edn})))
id-serializable
2014-01-22 07:14:26 +00:00
(let [class-name (read-utf8 in)]
(try
(let [content (.readObject (ObjectInputStream. in))]
(try
(let [class (Class/forName class-name)]
(cast class content))
(catch Exception e
{:type :serializable
:throwable e
:nippy/unthawable {:class-name class-name :content content}})))
(catch Exception e
{:type :serializable
:throwable e
:nippy/unthawable {:class-name class-name :content nil}})))
id-record
(let [class-name (read-utf8 in)
content (thaw-from-in in)]
(try
(let [class (Class/forName class-name)
method (.getMethod class "create" class-method-sig)]
(.invoke method class (into-array Object [content])))
(catch Exception e
{:type :record
:throwable e
:nippy/unthawable {:class-name class-name :content content}})))
2014-01-22 07:14:26 +00:00
id-bytes (read-bytes in)
id-nil nil
2014-01-22 07:14:26 +00:00
id-boolean (.readBoolean in)
2014-01-22 07:14:26 +00:00
id-char (.readChar in)
2015-09-28 03:17:07 +00:00
id-string (read-utf8 in)
2014-01-22 07:14:26 +00:00
id-keyword (keyword (read-utf8 in))
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
2015-09-28 09:25:43 +00:00
id-sm-string (read-utf8 in :small)
id-sm-keyword (keyword (read-utf8 in :small))
2014-01-22 08:42:57 +00:00
2014-01-22 07:14:26 +00:00
id-queue (read-coll in (PersistentQueue/EMPTY))
id-sorted-set (read-coll in (sorted-set))
id-sorted-map (read-kvs in (sorted-map))
id-vector (read-coll in [])
id-sm-vector (read-coll in [] :small)
id-set (read-coll in #{})
id-sm-set (read-coll in #{} :small)
id-map (read-kvs in {})
id-sm-map (read-kvs in {} :small)
id-list (into '() (rseq (read-coll in [])))
id-seq (or (seq (read-coll in []))
(lazy-seq nil) ; Empty coll
)
2014-01-22 07:14:26 +00:00
id-meta (let [m (thaw-from-in in)] (with-meta (thaw-from-in in) m))
2014-01-22 07:14:26 +00:00
id-byte (.readByte in)
id-short (.readShort in)
id-integer (.readInt in)
id-long (.readLong in)
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
id-byte-as-long (long (.readByte in))
id-short-as-long (long (.readShort in))
id-int-as-long (long (.readInt in))
id-bigint (bigint (read-biginteger in))
id-biginteger (read-biginteger in)
2014-01-22 07:14:26 +00:00
id-float (.readFloat in)
id-double (.readDouble in)
id-bigdec (BigDecimal. (read-biginteger in) (.readInt in))
2015-09-17 07:05:00 +00:00
;; id-ratio (/ (bigint (read-biginteger in))
;; (bigint (read-biginteger in)))
id-ratio (clojure.lang.Ratio.
(read-biginteger in)
(read-biginteger in))
2014-01-22 07:14:26 +00:00
id-date (Date. (.readLong in))
id-uuid (UUID. (.readLong in) (.readLong in))
;;; DEPRECATED
id-sorted-map-depr1 (read-kvs-depr1 in (sorted-map))
id-map-depr2 (read-kvs-depr1 in {})
2015-09-29 07:30:25 +00:00
id-reader-depr1 (enc/read-edn (.readUTF in))
id-string-depr1 (.readUTF in)
2015-09-29 07:30:25 +00:00
id-map-depr1 (apply hash-map (enc/repeatedly-into [] (* 2 (.readInt in))
2015-09-19 04:04:33 +00:00
(fn [] (thaw-from-in in))))
id-keyword-depr1 (keyword (.readUTF in))
id-prefixed-custom ; Prefixed custom type
(let [hash-id (.readShort in)]
(read-custom! hash-id in))
(read-custom! type-id in) ; Unprefixed custom type (catchall)
)
(catch Exception e
(throw (ex-info (format "Thaw failed against type-id: %s" type-id)
{:type-id type-id} e))))))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(defn thaw-from-in!
"Low-level API. Deserializes a frozen object from given DataInput to its
original Clojure data type."
2015-09-26 04:31:49 +00:00
[data-input]
2014-01-22 07:14:26 +00:00
(thaw-from-in data-input))
2013-06-13 15:40:44 +00:00
(defn- try-parse-header [ba]
2015-09-29 07:30:25 +00:00
(when-let [[head-ba data-ba] (enc/ba-split ba 4)]
(let [[head-sig* [meta-id]] (enc/ba-split head-ba 3)]
(when (enc/ba= head-sig* head-sig) ; Header appears to be well-formed
[data-ba (get head-meta meta-id {:unrecognized-meta? true})]))))
(defn- get-auto-compressor [compressor-id]
(case compressor-id
nil nil
:snappy snappy-compressor
:lzma2 lzma2-compressor
:lz4 lz4-compressor
:no-header (throw (ex-info ":auto not supported on headerless data." {}))
:else (throw (ex-info ":auto not supported for non-standard compressors." {}))
(throw (ex-info (format "Unrecognized :auto compressor id: %s" compressor-id)
{:compressor-id compressor-id}))))
(defn- get-auto-encryptor [encryptor-id]
(case encryptor-id
nil nil
:aes128-sha512 aes128-encryptor
:no-header (throw (ex-info ":auto not supported on headerless data." {}))
2014-11-03 09:43:41 +00:00
:else (throw (ex-info ":auto not supported for non-standard encryptors." {}))
(throw (ex-info (format "Unrecognized :auto encryptor id: %s" encryptor-id)
{:encryptor-id encryptor-id}))))
2013-06-13 15:40:44 +00:00
(defn thaw
"Deserializes a frozen object from given byte array to its original Clojure
data type. Supports data frozen with current and all previous versions of
Nippy. To thaw custom types, extend the Clojure reader or see `extend-thaw`.
Options include:
2015-09-28 09:25:43 +00:00
:compressor - An ICompressor, :auto (requires Nippy header), or nil
:encryptor - An IEncryptor, :auto (requires Nippy header), or nil"
2015-09-26 04:31:49 +00:00
([ba] (thaw ba nil))
([^bytes ba
{:keys [v1-compatibility? compressor encryptor password]
:or {v1-compatibility? true ; Recommend disabling when possible
compressor :auto
encryptor :auto}
:as opts}]
(assert (not (:headerless-meta opts))
":headerless-meta `thaw` opt removed in Nippy v2.7+")
(let [ex (fn [msg & [e]] (throw (ex-info (format "Thaw failed: %s" msg)
{:opts (merge opts
{:compressor compressor
:encryptor encryptor})}
e)))
thaw-data
(fn [data-ba compressor-id encryptor-id]
(let [compressor (if (identical? compressor :auto)
(get-auto-compressor compressor-id)
compressor)
encryptor (if (identical? encryptor :auto)
(get-auto-encryptor encryptor-id)
encryptor)]
(when (and encryptor (not password))
(ex "Password required for decryption."))
(try
(let [ba data-ba
ba (if encryptor (decrypt encryptor password ba) ba)
ba (if compressor (decompress compressor ba) ba)
dis (DataInputStream. (ByteArrayInputStream. ba))]
(thaw-from-in! dis))
(catch Exception e
(ex "Decryption/decompression failure, or data unfrozen/damaged."
e)))))
;; This is hackish and can actually currently result in JVM core dumps
;; due to buggy Snappy behaviour, Ref. http://goo.gl/mh7Rpy.
thaw-nippy-v1-data
(fn [data-ba]
(if-not v1-compatibility?
(throw (Exception. "v1 compatibility disabled"))
(try (thaw-data data-ba :snappy nil)
(catch Exception _
2015-09-26 04:31:49 +00:00
(thaw-data data-ba nil nil)))))]
(if-let [[data-ba {:keys [compressor-id encryptor-id unrecognized-meta?]
:as head-meta}] (try-parse-header ba)]
2015-09-26 04:31:49 +00:00
;; A well-formed header _appears_ to be present (it's possible though
;; unlikely that this is a fluke and data is actually headerless):
(try (thaw-data data-ba compressor-id encryptor-id)
(catch Exception e
(try (thaw-nippy-v1-data data-ba)
(catch Exception _
(if unrecognized-meta?
(ex "Unrecognized (but apparently well-formed) header. Data frozen with newer Nippy version?"
e)
(throw e))))))
;; Well-formed header definitely not present
(try (thaw-nippy-v1-data ba)
(catch Exception _
(thaw-data ba :no-header :no-header)))))))
(comment (thaw (freeze "hello"))
(thaw (freeze "hello" {:compressor nil}))
(thaw (freeze "hello" {:password [:salted "p"]})) ; ex: no pwd
(thaw (freeze "hello") {:password [:salted "p"]}))
2012-07-06 19:12:59 +00:00
;;;; Custom types
(defn- assert-custom-type-id [custom-type-id]
(assert (or (keyword? custom-type-id)
(and (integer? custom-type-id) (<= 1 custom-type-id 128)))))
2014-07-06 06:47:38 +00:00
(defn- coerce-custom-type-id
2015-09-28 09:25:43 +00:00
"* +ive byte id -> -ive byte id (for unprefixed custom types)
* Keyword id -> Short hash id (for prefixed custom types)"
2015-04-19 03:48:01 +00:00
[custom-type-id]
(assert-custom-type-id custom-type-id)
(if-not (keyword? custom-type-id)
2015-04-19 03:48:01 +00:00
(int (- ^long custom-type-id))
2015-04-17 12:33:55 +00:00
(let [^long hash-id (hash custom-type-id)
short-hash-id (if (pos? hash-id)
(mod hash-id Short/MAX_VALUE)
(mod hash-id Short/MIN_VALUE))]
;; Make sure hash ids can't collide with byte ids (unlikely anyway):
(assert (not (<= -128 short-hash-id -1))
"Custom type id hash collision; please choose a different id")
(int short-hash-id))))
(comment (coerce-custom-type-id 77)
(coerce-custom-type-id :foo/bar))
(defmacro extend-freeze
"Extends Nippy to support freezing of a custom type (ideally concrete) with
given id of form:
2015-09-28 09:25:43 +00:00
* Keyword - 2 byte overhead, resistent to id collisions
* Integer [1, 128] - no overhead, subject to id collisions
(defrecord MyType [data])
(extend-freeze MyType :foo/my-type [x data-output] ; Keyword id
(.writeUTF [data-output] (:data x)))
;; or
(extend-freeze MyType 1 [x data-output] ; Byte id
2014-01-22 07:14:26 +00:00
(.writeUTF [data-output] (:data x)))"
[type custom-type-id [x out] & body]
(assert-custom-type-id custom-type-id)
`(extend-type ~type Freezable
(~'-freeze-to-out [~x ~(with-meta out {:tag 'java.io.DataOutput})]
(if-not ~(keyword? custom-type-id)
;; Unprefixed [cust byte id][payload]:
(write-id ~out ~(coerce-custom-type-id custom-type-id))
;; Prefixed [const byte id][cust hash id][payload]:
(do (write-id ~out id-prefixed-custom)
(.writeShort ~out ~(coerce-custom-type-id custom-type-id))))
~@body)))
(defmacro extend-thaw
"Extends Nippy to support thawing of a custom type with given id:
(extend-thaw :foo/my-type [data-input] ; Keyword id
(->MyType (.readUTF data-input)))
;; or
(extend-thaw 1 [data-input] ; Byte id
2014-01-22 07:14:26 +00:00
(->MyType (.readUTF data-input)))"
[custom-type-id [in] & body]
(assert-custom-type-id custom-type-id)
`(do
(when (contains? *custom-readers* ~(coerce-custom-type-id custom-type-id))
(println (format "Warning: resetting Nippy thaw for custom type with id: %s"
~custom-type-id)))
(swap-custom-readers!
(fn [m#]
(assoc m#
~(coerce-custom-type-id custom-type-id)
(fn [~(with-meta in {:tag 'java.io.DataInput})]
~@body))))))
(comment
2015-06-01 04:53:55 +00:00
*custom-readers*
(defrecord MyType [data])
(extend-freeze MyType 1 [x out] (.writeUTF out (:data x)))
(extend-thaw 1 [in] (->MyType (.readUTF in)))
(thaw (freeze (->MyType "Joe"))))
;;; Some useful custom types - EXPERIMENTAL
2015-04-19 03:48:01 +00:00
;; Mostly deprecated by :auto compressor selection
2015-09-28 09:25:43 +00:00
(defrecord Compressable-LZMA2 [value]) ; Why was this `LZMA2` instead of `lzma2`?
2014-01-22 07:14:26 +00:00
(extend-freeze Compressable-LZMA2 128 [x out]
(let [ba (freeze (:value x) {:skip-header? true :compressor nil})
ba-len (alength ba)
compress? (> ba-len 1024)]
2014-01-22 07:14:26 +00:00
(.writeBoolean out compress?)
(if compress?
(write-bytes out (compress lzma2-compressor ba))
(write-bytes out ba))))
2014-01-22 07:14:26 +00:00
(extend-thaw 128 [in]
(let [compressed? (.readBoolean in)
ba (read-bytes in)]
(thaw ba {:compressor (when compressed? lzma2-compressor)
:encryptor nil})))
(comment
(->> (apply str (repeatedly 1000 rand))
(->Compressable-LZMA2)
(freeze)
(thaw))
(count (->> (apply str (repeatedly 1000 rand)) (freeze)))
(count (->> (apply str (repeatedly 1000 rand))
(->Compressable-LZMA2)
(freeze))))
2013-06-12 18:14:46 +00:00
;;;; Stress data
2012-07-06 19:12:59 +00:00
2013-10-24 06:33:54 +00:00
(defrecord StressRecord [data])
2015-09-28 09:25:43 +00:00
(def stress-data "Reference data used for tests & benchmarks"
2013-06-12 17:15:16 +00:00
(let []
{:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
:nil nil
:boolean true
:char-utf8 \ಬ
:string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
:string-long (apply str (range 1000))
:keyword :keyword
:keyword-ns ::keyword
2014-01-22 08:42:57 +00:00
;;; Try reflect real-world data:
:lotsa-small-numbers (vec (range 200))
:lotsa-small-keywords (->> (java.util.Locale/getISOLanguages)
(mapv keyword))
:lotsa-small-strings (->> (java.util.Locale/getISOCountries)
(mapv #(.getDisplayCountry
(java.util.Locale. "en" %))))
:queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g))
:queue-empty (PersistentQueue/EMPTY)
:sorted-set (sorted-set 1 2 3 4 5)
:sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3)
:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10)))
:list-quoted '(1 2 3 4 5 (6 7 8 (9 10)))
:list-empty (list)
:vector [1 2 3 4 5 [6 7 8 [9 10]]]
:vector-empty []
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}}
:map-empty {}
:set #{1 2 3 4 5 #{6 7 8 #{9 10}}}
:set-empty #{}
:meta (with-meta {:a :A} {:metakey :metaval})
2013-10-31 06:16:26 +00:00
:lazy-seq (repeatedly 1000 rand)
:lazy-seq-empty (map identity '())
:byte (byte 16)
:short (short 42)
:integer (int 3)
:long (long 3)
:bigint (bigint 31415926535897932384626433832795)
:float (float 3.14)
:double (double 3.14)
:bigdec (bigdec 3.1415926535897932384626433832795)
:ratio 22/7
:uuid (java.util.UUID/randomUUID)
2013-10-24 06:33:54 +00:00
:date (java.util.Date.)
:stress-record (->StressRecord "data")
;; Serializable
:throwable (Throwable. "Yolo")
:exception (try (/ 1 0) (catch Exception e e))
:ex-info (ex-info "ExInfo" {:data "data"})}))
2013-06-12 18:14:46 +00:00
(def stress-data-comparable
2015-09-28 09:25:43 +00:00
"Reference data with stuff removed that breaks roundtrip equality"
(dissoc stress-data :bytes :throwable :exception :ex-info))
(def stress-data-benchable
"Reference data with stuff removed that breaks reader or other utils we'll
2015-09-28 09:25:43 +00:00
be benching against"
2014-01-21 07:44:53 +00:00
(dissoc stress-data :bytes :throwable :exception :ex-info :queue :queue-empty
2014-01-22 08:42:57 +00:00
:byte :stress-record))
2014-02-14 16:06:53 +00:00
;;;; Tools
2015-09-28 09:25:43 +00:00
(defn inspect-ba "Alpha - subject to change"
[ba & [thaw-opts]]
2015-09-29 07:30:25 +00:00
(if-not (enc/bytes? ba) :not-ba
(let [[first2bytes nextbytes] (enc/ba-split ba 2)
known-wrapper
(cond
2015-09-29 07:30:25 +00:00
(enc/ba= first2bytes (.getBytes "\u0000<" "UTF8")) :carmine/bin
(enc/ba= first2bytes (.getBytes "\u0000>" "UTF8")) :carmine/clj)
unwrapped-ba (if known-wrapper nextbytes ba)
[data-ba nippy-header] (or (try-parse-header unwrapped-ba)
[unwrapped-ba :no-header])]
{:known-wrapper known-wrapper
:nippy-v2-header nippy-header ; Nippy v1.x didn't have a header
:thawable? (try (thaw unwrapped-ba thaw-opts) true
(catch Exception _ false))
:unwrapped-ba unwrapped-ba
:data-ba data-ba
:unwrapped-size (alength ^bytes unwrapped-ba)
:ba-size (alength ^bytes ba)
:data-size (alength ^bytes data-ba)})))
(comment (inspect-ba (freeze "hello"))
(seq (:data-ba (inspect-ba (freeze "hello")))))