nippy/src/taoensso/nippy.clj

839 lines
32 KiB
Clojure
Raw Normal View History

2012-07-06 19:12:59 +00:00
(ns taoensso.nippy
"Simple, high-performance Clojure serialization library. Originally adapted
from Deep-Freeze."
2012-07-06 19:12:59 +00:00
{:author "Peter Taoussanis"}
(:require [clojure.tools.reader.edn :as edn]
2014-02-23 11:52:50 +00:00
[taoensso.encore :as encore]
[taoensso.nippy
(utils :as utils)
(compression :as compression)
(encryption :as encryption)])
2013-10-19 05:50:21 +00:00
(:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream
DataOutputStream Serializable ObjectOutputStream ObjectInputStream
DataOutput DataInput]
2013-08-06 16:56:43 +00:00
[java.lang.reflect Method]
[java.util Date UUID]
[clojure.lang Keyword BigInt Ratio
APersistentMap APersistentVector APersistentSet
IPersistentMap ; IPersistentVector IPersistentSet IPersistentList
PersistentQueue PersistentTreeMap PersistentTreeSet PersistentList ; LazySeq
IRecord ISeq]))
2012-07-06 19:12:59 +00:00
;;;; Nippy data format
;; * 4-byte header (Nippy v2.x+) (may be disabled but incl. by default) [1].
;; { * 1-byte type id.
;; * Arb-length payload. } ...
;;
;; [1] Inclusion of header is strongly recommended. Purpose:
;; * Sanity check (confirm that data appears to be Nippy data).
;; * Nippy version check (=> supports changes to data schema over time).
;; * Supports :auto thaw compressor, encryptor.
;;
(def ^:private ^:const head-version 1)
2013-06-13 15:40:44 +00:00
(def ^:private head-sig (.getBytes "NPY" "UTF-8"))
2013-07-25 08:41:13 +00:00
(def ^:private ^:const head-meta "Final byte stores version-dependent metadata."
{(byte 0) {:version 1 :compressor-id nil :encryptor-id nil}
(byte 4) {:version 1 :compressor-id nil :encryptor-id :else}
(byte 5) {:version 1 :compressor-id :else :encryptor-id nil}
(byte 6) {:version 1 :compressor-id :else :encryptor-id :else}
;;
(byte 2) {:version 1 :compressor-id nil :encryptor-id :aes128-sha512}
;;
(byte 1) {:version 1 :compressor-id :snappy :encryptor-id nil}
(byte 3) {:version 1 :compressor-id :snappy :encryptor-id :aes128-sha512}
(byte 7) {:version 1 :compressor-id :snappy :encryptor-id :else}
;;
(byte 8) {:version 1 :compressor-id :lz4 :encryptor-id nil}
(byte 9) {:version 1 :compressor-id :lz4 :encryptor-id :aes128-sha512}
(byte 10) {:version 1 :compressor-id :lz4 :encryptor-id :else}
;;
(byte 11) {:version 1 :compressor-id :lzma2 :encryptor-id nil}
(byte 12) {:version 1 :compressor-id :lzma2 :encryptor-id :aes128-sha512}
(byte 13) {:version 1 :compressor-id :lzma2 :encryptor-id :else}})
2013-06-12 18:14:46 +00:00
(defmacro when-debug-mode [& body] (when #_true false `(do ~@body)))
2013-10-31 06:15:22 +00:00
2013-06-12 18:14:46 +00:00
;;;; Data type IDs
2012-07-06 19:12:59 +00:00
2014-01-22 07:37:38 +00:00
(do ; Just for easier IDE collapsing
;; ** Negative ids reserved for user-defined types **
;;
2014-01-22 07:37:38 +00:00
(def ^:const id-reserved (int 0))
;; 1
(def ^:const id-bytes (int 2))
(def ^:const id-nil (int 3))
(def ^:const id-boolean (int 4))
(def ^:const id-reader (int 5)) ; Fallback #2: pr-str output
(def ^:const id-serializable (int 6)) ; Fallback #1
(def ^:const id-char (int 10))
;; 11
;; 12
(def ^:const id-string (int 13))
(def ^:const id-keyword (int 14))
(def ^:const id-list (int 20))
(def ^:const id-vector (int 21))
;; 22
(def ^:const id-set (int 23))
(def ^:const id-seq (int 24))
(def ^:const id-meta (int 25))
(def ^:const id-queue (int 26))
(def ^:const id-map (int 27))
(def ^:const id-sorted-set (int 28))
(def ^:const id-sorted-map (int 29))
(def ^:const id-byte (int 40))
(def ^:const id-short (int 41))
(def ^:const id-integer (int 42))
(def ^:const id-long (int 43))
(def ^:const id-bigint (int 44))
(def ^:const id-biginteger (int 45))
2014-01-22 07:37:38 +00:00
(def ^:const id-float (int 60))
(def ^:const id-double (int 61))
(def ^:const id-bigdec (int 62))
(def ^:const id-ratio (int 70))
(def ^:const id-record (int 80))
;; (def ^:const id-type (int 81)) ; TODO?
(def ^:const id-prefixed-custom (int 82))
2014-01-22 07:37:38 +00:00
(def ^:const id-date (int 90))
(def ^:const id-uuid (int 91))
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
(def ^:const id-byte-as-long (int 100)) ; 1 vs 8 bytes
(def ^:const id-short-as-long (int 101)) ; 2 vs 8 bytes
(def ^:const id-int-as-long (int 102)) ; 4 vs 8 bytes
;; (def ^:const id-compact-long (int 103)) ; 6->7 vs 8 bytes
2014-01-22 08:42:57 +00:00
;;
(def ^:const id-string-small (int 105)) ; 1 vs 4 byte length prefix
(def ^:const id-keyword-small (int 106)) ; ''
2014-01-22 08:42:57 +00:00
;;
;; (def ^:const id-vector-small (int 110)) ; ''
;; (def ^:const id-set-small (int 111)) ; ''
;; (def ^:const id-map-small (int 112)) ; ''
2014-01-22 08:42:57 +00:00
2014-01-22 07:37:38 +00:00
;;; DEPRECATED (old types will be supported only for thawing)
(def ^:const id-old-reader (int 1)) ; as of 0.9.2, for +64k support
(def ^:const id-old-string (int 11)) ; as of 0.9.2, for +64k support
(def ^:const id-old-map (int 22)) ; as of 0.9.0, for more efficient thaw
(def ^:const id-old-keyword (int 12)) ; as of 2.0.0-alpha5, for str consistecy
)
;;;; Ns imports (mostly for convenience of lib consumers)
(encore/defalias compress compression/compress)
(encore/defalias decompress compression/decompress)
(encore/defalias snappy-compressor compression/snappy-compressor)
(encore/defalias lzma2-compressor compression/lzma2-compressor)
(encore/defalias lz4-compressor compression/lz4-compressor)
(encore/defalias lz4hc-compressor compression/lz4hc-compressor)
(encore/defalias encrypt encryption/encrypt)
(encore/defalias decrypt encryption/decrypt)
(encore/defalias aes128-encryptor encryption/aes128-encryptor)
(encore/defalias freezable? utils/freezable?)
;;;; Freezing
(defprotocol Freezable
"Be careful about extending to interfaces, Ref. http://goo.gl/6gGRlU."
(freeze-to-out* [this out]))
(defmacro write-id [out id] `(.writeByte ~out ~id))
(defmacro write-bytes [out ba & [small?]]
(let [out (with-meta out {:tag 'java.io.DataOutput})
ba (with-meta ba {:tag 'bytes})]
`(let [out# ~out, ba# ~ba
size# (alength ba#)]
(if ~small? ; Optimization, must be known before id's written
(.writeByte out# (byte size#)) ; `byte` to throw on range error
(.writeInt out# (int size#)) ; `int` ''
)
(.write out# ba# 0 size#))))
2014-01-22 07:14:26 +00:00
(defmacro write-biginteger [out x]
(let [x (with-meta x {:tag 'java.math.BigInteger})]
`(write-bytes ~out (.toByteArray ~x))))
(defmacro write-utf8 [out x & [small?]]
(let [x (with-meta x {:tag 'String})]
`(write-bytes ~out (.getBytes ~x "UTF-8") ~small?)))
(defmacro write-compact-long "Uses 2->9 bytes." [out x]
`(write-bytes ~out (.toByteArray (java.math.BigInteger/valueOf (long ~x)))
:small))
(comment (alength (.toByteArray (java.math.BigInteger/valueOf Long/MAX_VALUE))))
2014-01-22 08:42:57 +00:00
2014-01-22 07:14:26 +00:00
(defmacro ^:private freeze-to-out
"Like `freeze-to-out*` but with metadata support."
[out x]
`(let [out# ~out, x# ~x]
(when-let [m# (meta x#)]
2014-01-22 07:14:26 +00:00
(write-id out# ~id-meta)
(freeze-to-out* m# out#))
(freeze-to-out* x# out#)))
2012-07-06 19:12:59 +00:00
2013-10-23 18:25:46 +00:00
(defmacro ^:private freezer [type id & body]
2012-07-06 19:12:59 +00:00
`(extend-type ~type
Freezable
2014-01-22 07:14:26 +00:00
(~'freeze-to-out* [~'x ~(with-meta 'out {:tag 'DataOutput})]
(write-id ~'out ~id)
2012-07-06 19:12:59 +00:00
~@body)))
2013-10-23 18:25:46 +00:00
(defmacro ^:private freezer-coll [type id & body]
`(freezer ~type ~id
(when-debug-mode
(when (instance? ISeq ~type)
(println (format "DEBUG - freezer-coll: %s for %s" ~type (type ~'x)))))
(if (counted? ~'x)
2014-01-22 07:14:26 +00:00
(do (.writeInt ~'out (count ~'x))
(doseq [i# ~'x] (freeze-to-out ~'out i#)))
(let [bas# (ByteArrayOutputStream.)
sout# (DataOutputStream. bas#)
cnt# (reduce (fn [cnt# i#]
(freeze-to-out sout# i#)
(unchecked-inc cnt#))
0 ~'x)
ba# (.toByteArray bas#)]
2014-01-22 07:14:26 +00:00
(.writeInt ~'out cnt#)
(.write ~'out ba# 0 (alength ba#))))))
2012-07-06 19:12:59 +00:00
2013-10-23 18:25:46 +00:00
(defmacro ^:private freezer-kvs [type id & body]
`(freezer ~type ~id
2014-01-22 07:14:26 +00:00
(.writeInt ~'out (* 2 (count ~'x)))
(doseq [kv# ~'x]
2014-01-22 07:14:26 +00:00
(freeze-to-out ~'out (key kv#))
(freeze-to-out ~'out (val kv#)))))
2014-01-22 07:14:26 +00:00
(freezer (Class/forName "[B") id-bytes (write-bytes out ^bytes x))
2012-07-06 19:12:59 +00:00
(freezer nil id-nil)
2014-01-22 07:14:26 +00:00
(freezer Boolean id-boolean (.writeBoolean out x))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(freezer Character id-char (.writeChar out (int x)))
2014-01-22 08:42:57 +00:00
;; (freezer String id-string (write-utf8 out x))
(extend-type String ; Optimized common-case type
Freezable
(freeze-to-out* [x ^DataOutput out]
(let [ba (.getBytes x "UTF-8")]
(if (<= (alength ^bytes ba) Byte/MAX_VALUE)
2014-01-22 08:42:57 +00:00
(do (write-id out id-string-small)
(write-bytes out ba :small))
(do (write-id out id-string)
(write-bytes out ba))))))
(extend-type Keyword ; Optimized common-case type
Freezable
(freeze-to-out* [x ^DataOutput out]
(let [s (if-let [ns (namespace x)]
(str ns "/" (name x))
(name x))
ba (.getBytes s "UTF-8")]
(if (<= (alength ^bytes ba) Byte/MAX_VALUE)
2014-01-22 08:42:57 +00:00
(do (write-id out id-keyword-small)
(write-bytes out ba :small))
(do (write-id out id-keyword)
(write-bytes out ba))))))
2012-07-06 19:12:59 +00:00
2013-10-23 18:25:46 +00:00
(freezer-coll PersistentQueue id-queue)
(freezer-coll PersistentTreeSet id-sorted-set)
(freezer-kvs PersistentTreeMap id-sorted-map)
2013-10-23 18:25:46 +00:00
(freezer-kvs APersistentMap id-map)
(freezer-coll APersistentVector id-vector)
(freezer-coll APersistentSet id-set)
(freezer-coll PersistentList id-list) ; No APersistentList
(freezer-coll (type '()) id-list)
;; Nb low-level interface!! Acts as fallback for seqs that don't have a
;; concrete implementation. Will conflict with any other coll interfaces!
(freezer-coll ISeq id-seq)
(freezer IRecord id-record
2014-01-22 07:14:26 +00:00
(write-utf8 out (.getName (class x))) ; Reflect
(freeze-to-out out (into {} x)))
(freezer Byte id-byte (.writeByte out x))
(freezer Short id-short (.writeShort out x))
(freezer Integer id-integer (.writeInt out x))
2014-01-22 08:42:57 +00:00
;;(freezer Long id-long (.writeLong out x))
(extend-type Long ; Optimized common-case type
Freezable
(freeze-to-out* [x ^DataOutput out]
(cond
(<= Byte/MIN_VALUE x Byte/MAX_VALUE)
2014-01-22 08:42:57 +00:00
(do (write-id out id-byte-as-long) (.writeByte out x))
(<= Short/MIN_VALUE x Short/MAX_VALUE)
2014-01-22 08:42:57 +00:00
(do (write-id out id-short-as-long) (.writeShort out x))
(<= Integer/MIN_VALUE x Integer/MAX_VALUE)
2014-01-22 08:42:57 +00:00
(do (write-id out id-int-as-long) (.writeInt out x))
:else (do (write-id out id-long) (.writeLong out x)))))
;;
2014-01-22 08:42:57 +00:00
(freezer BigInt id-bigint (write-biginteger out (.toBigInteger x)))
(freezer BigInteger id-biginteger (write-biginteger out x))
2014-01-22 07:14:26 +00:00
(freezer Float id-float (.writeFloat out x))
(freezer Double id-double (.writeDouble out x))
2012-07-06 19:12:59 +00:00
(freezer BigDecimal id-bigdec
2014-01-22 07:14:26 +00:00
(write-biginteger out (.unscaledValue x))
(.writeInt out (.scale x)))
2012-07-06 19:12:59 +00:00
(freezer Ratio id-ratio
2014-01-22 07:14:26 +00:00
(write-biginteger out (.numerator x))
(write-biginteger out (.denominator x)))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(freezer Date id-date (.writeLong out (.getTime x)))
(freezer UUID id-uuid
2014-01-22 07:14:26 +00:00
(.writeLong out (.getMostSignificantBits x))
(.writeLong out (.getLeastSignificantBits x)))
(def ^:dynamic *final-freeze-fallback* "Alpha - subject to change." nil)
2014-01-22 07:14:26 +00:00
(defn freeze-fallback-as-str "Alpha-subject to change." [x out]
(freeze-to-out* {:nippy/unfreezable (pr-str x) :type (type x)} out))
(comment
(require '[clojure.core.async :as async])
(binding [*final-freeze-fallback* freeze-fallback-as-str]
(-> (async/chan) (freeze) (thaw))))
;; Fallbacks. Note that we'll extend *only* to (lowly) Object to prevent
;; interfering with higher-level implementations, Ref. http://goo.gl/6f7SKl
(extend-type Object
Freezable
2014-01-22 07:14:26 +00:00
(freeze-to-out* [x ^DataOutput out]
(cond
(utils/serializable? x) ; Fallback #1: Java's Serializable interface
(do (when-debug-mode
2013-10-31 06:15:22 +00:00
(println (format "DEBUG - Serializable fallback: %s" (type x))))
2014-01-22 07:14:26 +00:00
(write-id out id-serializable)
(write-utf8 out (.getName (class x))) ; Reflect
(.writeObject (ObjectOutputStream. out) x))
(utils/readable? x) ; Fallback #2: Clojure's Reader
(do (when-debug-mode
(println (format "DEBUG - Reader fallback: %s" (type x))))
2014-01-22 07:14:26 +00:00
(write-id out id-reader)
(write-utf8 out (pr-str x)))
:else ; Fallback #3: *final-freeze-fallback*
2014-01-22 07:14:26 +00:00
(if-let [ffb *final-freeze-fallback*] (ffb x out)
(throw (ex-info (format "Unfreezable type: %s %s" (type x) (str x))
{:type (type x)
:as-str (pr-str x)}))))))
2013-08-07 09:19:11 +00:00
2013-06-13 15:40:44 +00:00
(def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta))
(def ^:private get-head-ba
(memoize
(fn [head-meta]
(when-let [meta-id (get head-meta-id (assoc head-meta :version head-version))]
(encore/ba-concat head-sig (byte-array [meta-id]))))))
2013-06-13 15:40:44 +00:00
(defn- wrap-header [data-ba head-meta]
(if-let [head-ba (get-head-ba head-meta)]
(encore/ba-concat head-ba data-ba)
(throw (ex-info (format "Unrecognized header meta: %s" head-meta)
{:head-meta head-meta}))))
2013-06-13 15:40:44 +00:00
(comment (wrap-header (.getBytes "foo") {:compressor-id :lz4
:encryptor-id nil}))
2014-01-22 07:14:26 +00:00
(defn freeze-to-out!
"Low-level API. Serializes arg (any Clojure data type) to a DataOutput."
[^DataOutput data-output x & _]
2014-01-22 07:14:26 +00:00
(freeze-to-out data-output x))
(defn freeze
"Serializes arg (any Clojure data type) to a byte array. To freeze custom
types, extend the Clojure reader or see `extend-freeze`."
^bytes [x & [{:keys [compressor encryptor password skip-header?]
:or {compressor lz4-compressor
encryptor aes128-encryptor}
:as opts}]]
(let [legacy-mode? (:legacy-mode opts) ; DEPRECATED Nippy v1-compatible freeze
compressor (if-not legacy-mode? compressor snappy-compressor)
encryptor (when password (if-not legacy-mode? encryptor nil))
skip-header? (or skip-header? legacy-mode?)
baos (ByteArrayOutputStream.)
dos (DataOutputStream. baos)]
(freeze-to-out! dos x)
(let [ba (.toByteArray baos)
ba (if-not compressor ba (compress compressor ba))
ba (if-not encryptor ba (encrypt encryptor password ba))]
(if skip-header? ba
(wrap-header ba
{:compressor-id (when-let [c compressor]
(or (compression/standard-header-ids
(compression/header-id c)) :else))
:encryptor-id (when-let [e encryptor]
(or (encryption/standard-header-ids
(encryption/header-id e)) :else))})))))
2012-07-06 19:12:59 +00:00
;;;; Thawing
2014-01-22 07:14:26 +00:00
(declare thaw-from-in)
2012-07-06 19:12:59 +00:00
(defmacro read-bytes [in & [small?]]
`(let [in# ~in
2014-01-22 08:42:57 +00:00
size# (if ~small? ; Optimization, must be known before id's written
(.readByte in#)
(.readInt in#))
ba# (byte-array size#)]
(.readFully in# ba# 0 size#) ba#))
(defmacro read-biginteger [in] `(BigInteger. (read-bytes ~in)))
(defmacro read-utf8 [in & [small?]]
`(String. (read-bytes ~in ~small?) "UTF-8"))
(defmacro read-compact-long [in] `(long (BigInteger. (read-bytes ~in :small))))
2014-01-22 07:14:26 +00:00
(defmacro ^:private read-coll [in coll]
2014-02-23 11:52:50 +00:00
`(let [in# ~in] (encore/repeatedly-into* ~coll (.readInt in#) (thaw-from-in in#))))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(defmacro ^:private read-kvs [in coll]
2014-02-23 11:52:50 +00:00
`(let [in# ~in] (encore/repeatedly-into* ~coll (/ (.readInt in#) 2)
2014-01-22 07:14:26 +00:00
[(thaw-from-in in#) (thaw-from-in in#)])))
(declare ^:private custom-readers)
(defn- read-custom! [type-id in]
(if-let [custom-reader (get @custom-readers type-id)]
(try
(custom-reader in)
(catch Exception e
(throw
(ex-info
(format "Reader exception for custom type with internal id: %s"
type-id) {:internal-type-id type-id} e))))
(throw
(ex-info
(format "No reader provided for custom type with internal id: %s"
type-id)
{:internal-type-id type-id}))))
2014-01-22 07:14:26 +00:00
(defn- thaw-from-in
[^DataInput in]
(let [type-id (.readByte in)]
(try
(when-debug-mode
(println (format "DEBUG - thawing type-id: %s" type-id)))
2014-02-23 11:52:50 +00:00
(encore/case-eval type-id
id-reader
2014-01-22 07:14:26 +00:00
(let [edn (read-utf8 in)]
(try (edn/read-string {:readers *data-readers*} edn)
(catch Exception e {:nippy/unthawable edn
:type :reader
:throwable e})))
id-serializable
2014-01-22 07:14:26 +00:00
(let [class-name (read-utf8 in)]
2013-12-06 18:55:00 +00:00
(try (let [;; .readObject _before_ Class/forName: it'll always read
;; all data before throwing
2014-01-22 07:14:26 +00:00
object (.readObject (ObjectInputStream. in))
2014-10-06 09:12:16 +00:00
^Class class (Class/forName class-name)]
(cast class object))
(catch Exception e {:nippy/unthawable class-name
:type :serializable
:throwable e})))
2014-01-22 07:14:26 +00:00
id-bytes (read-bytes in)
id-nil nil
2014-01-22 07:14:26 +00:00
id-boolean (.readBoolean in)
2014-01-22 07:14:26 +00:00
id-char (.readChar in)
id-string (read-utf8 in)
id-keyword (keyword (read-utf8 in))
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
id-string-small (String. (read-bytes in :small) "UTF-8")
id-keyword-small (keyword (String. (read-bytes in :small) "UTF-8"))
2014-01-22 07:14:26 +00:00
id-queue (read-coll in (PersistentQueue/EMPTY))
id-sorted-set (read-coll in (sorted-set))
id-sorted-map (read-kvs in (sorted-map))
2014-01-22 07:14:26 +00:00
id-list (into '() (rseq (read-coll in [])))
id-vector (read-coll in [])
id-set (read-coll in #{})
id-map (read-kvs in {})
id-seq (or (seq (read-coll in []))
(lazy-seq nil) ; Empty coll
)
2014-01-22 07:14:26 +00:00
id-meta (let [m (thaw-from-in in)] (with-meta (thaw-from-in in) m))
2014-01-22 07:14:26 +00:00
id-byte (.readByte in)
id-short (.readShort in)
id-integer (.readInt in)
id-long (.readLong in)
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
id-byte-as-long (long (.readByte in))
id-short-as-long (long (.readShort in))
id-int-as-long (long (.readInt in))
;; id-compact-long (read-compact-long in)
2014-01-22 08:42:57 +00:00
id-bigint (bigint (read-biginteger in))
id-biginteger (read-biginteger in)
2014-01-22 07:14:26 +00:00
id-float (.readFloat in)
id-double (.readDouble in)
id-bigdec (BigDecimal. (read-biginteger in) (.readInt in))
2014-01-22 07:14:26 +00:00
id-ratio (/ (bigint (read-biginteger in))
(bigint (read-biginteger in)))
id-record
2014-01-22 07:14:26 +00:00
(let [class ^Class (Class/forName (read-utf8 in))
meth-sig (into-array Class [IPersistentMap])
method ^Method (.getMethod class "create" meth-sig)]
2014-01-22 07:14:26 +00:00
(.invoke method class (into-array Object [(thaw-from-in in)])))
2014-01-22 07:14:26 +00:00
id-date (Date. (.readLong in))
id-uuid (UUID. (.readLong in) (.readLong in))
;;; DEPRECATED
2014-01-22 07:14:26 +00:00
id-old-reader (edn/read-string (.readUTF in))
id-old-string (.readUTF in)
2014-02-23 11:52:50 +00:00
id-old-map (apply hash-map (encore/repeatedly-into* []
2014-01-22 07:14:26 +00:00
(* 2 (.readInt in)) (thaw-from-in in)))
id-old-keyword (keyword (.readUTF in))
id-prefixed-custom ; Prefixed custom type
(let [hash-id (.readShort in)]
(read-custom! hash-id in))
(read-custom! type-id in) ; Unprefixed custom type (catchall)
)
(catch Exception e
(throw (ex-info (format "Thaw failed against type-id: %s" type-id)
{:type-id type-id} e))))))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(defn thaw-from-in!
"Low-level API. Deserializes a frozen object from given DataInput to its
original Clojure data type."
[data-input & _]
2014-01-22 07:14:26 +00:00
(thaw-from-in data-input))
2013-06-13 15:40:44 +00:00
(defn- try-parse-header [ba]
2014-02-23 11:52:50 +00:00
(when-let [[head-ba data-ba] (encore/ba-split ba 4)]
(let [[head-sig* [meta-id]] (encore/ba-split head-ba 3)]
(when (encore/ba= head-sig* head-sig) ; Header appears to be well-formed
[data-ba (get head-meta meta-id {:unrecognized-meta? true})]))))
(defn- get-auto-compressor [compressor-id]
(case compressor-id
nil nil
:snappy snappy-compressor
:lzma2 lzma2-compressor
:lz4 lz4-compressor
:no-header (throw (ex-info ":auto not supported on headerless data." {}))
:else (throw (ex-info ":auto not supported for non-standard compressors." {}))
(throw (ex-info (format "Unrecognized :auto compressor id: %s" compressor-id)
{:compressor-id compressor-id}))))
(defn- get-auto-encryptor [encryptor-id]
(case encryptor-id
nil nil
:aes128-sha512 aes128-encryptor
:no-header (throw (ex-info ":auto not supported on headerless data." {}))
2014-11-03 09:43:41 +00:00
:else (throw (ex-info ":auto not supported for non-standard encryptors." {}))
(throw (ex-info (format "Unrecognized :auto encryptor id: %s" encryptor-id)
{:encryptor-id encryptor-id}))))
2013-06-13 15:40:44 +00:00
(defn thaw
"Deserializes a frozen object from given byte array to its original Clojure
data type. Supports data frozen with current and all previous versions of
Nippy. To thaw custom types, extend the Clojure reader or see `extend-thaw`.
Options include:
:compressor - An ICompressor, :auto (requires Nippy header), or nil.
:encryptor - An IEncryptor, :auto (requires Nippy header), or nil."
[^bytes ba & [{:keys [compressor encryptor password]
:or {compressor :auto
encryptor :auto}
2013-06-17 14:59:52 +00:00
:as opts}]]
(assert (not (contains? opts :headerless-meta))
":headerless-meta `thaw` option removed as of Nippy v2.7.")
(let [ex (fn [msg & [e]] (throw (ex-info (format "Thaw failed: %s" msg)
{:opts (merge opts
{:compressor compressor
:encryptor encryptor})}
e)))
thaw-data
(fn [data-ba compressor-id encryptor-id]
(let [compressor (if-not (identical? compressor :auto) compressor
(get-auto-compressor compressor-id))
encryptor (if-not (identical? encryptor :auto) encryptor
(get-auto-encryptor encryptor-id))]
(when (and encryptor (not password))
(ex "Password required for decryption."))
2013-06-13 15:40:44 +00:00
(try
(let [ba data-ba
ba (if-not encryptor ba (decrypt encryptor password ba))
ba (if-not compressor ba (decompress compressor ba))
dis (DataInputStream. (ByteArrayInputStream. ba))]
(thaw-from-in! dis))
2013-07-29 08:22:31 +00:00
2013-06-13 15:40:44 +00:00
(catch Exception e
(ex "Decryption/decompression failure, or data unfrozen/damaged.")))))
thaw-nippy-v1-data ; A little hackish, but necessary
(fn [data-ba]
(try (thaw-data data-ba :snappy nil)
(catch Exception _
(thaw-data data-ba nil nil))))]
(if-let [[data-ba {:keys [compressor-id encryptor-id unrecognized-meta?]
:as head-meta}] (try-parse-header ba)]
;; A well-formed header _appears_ to be present (it's possible though
;; unlikely that this is a fluke and data is actually headerless):
(try (thaw-data data-ba compressor-id encryptor-id)
(catch Exception e
(try (thaw-nippy-v1-data)
(catch Exception _
(if unrecognized-meta?
(ex "Unrecognized (but apparently well-formed) header. Data frozen with newer Nippy version?"
e)
(throw e))))))
;; Well-formed header definitely not present
(try (thaw-nippy-v1-data ba)
(catch Exception _
(thaw-data ba :no-header :no-header))))))
(comment (thaw (freeze "hello"))
(thaw (freeze "hello" {:compressor nil}))
(thaw (freeze "hello" {:password [:salted "p"]})) ; ex: no pwd
(thaw (freeze "hello") {:password [:salted "p"]}))
2012-07-06 19:12:59 +00:00
;;;; Custom types
(defn- assert-custom-type-id [custom-type-id]
(assert (or (keyword? custom-type-id)
(and (integer? custom-type-id) (<= 1 custom-type-id 128)))))
2014-07-06 06:47:38 +00:00
(defn- coerce-custom-type-id
"* +ive byte id -> -ive byte id (for unprefixed custom types).
* Keyword id -> Short hash id (for prefixed custom types)."
[custom-type-id]
(assert-custom-type-id custom-type-id)
(if-not (keyword? custom-type-id)
(int (- custom-type-id))
(let [hash-id (hash custom-type-id)
short-hash-id (if (pos? hash-id)
(mod hash-id Short/MAX_VALUE)
(mod hash-id Short/MIN_VALUE))]
;; Make sure hash ids can't collide with byte ids (unlikely anyway):
(assert (not (<= -128 short-hash-id -1))
"Custom type id hash collision; please choose a different id")
(int short-hash-id))))
(comment (coerce-custom-type-id 77)
(coerce-custom-type-id :foo/bar))
(defmacro extend-freeze
"Extends Nippy to support freezing of a custom type (ideally concrete) with
given id of form:
* Keyword - 2 byte overhead, resistent to id collisions.
* Byte [1, 128] - no overhead, subject to id collisions.
(defrecord MyType [data])
(extend-freeze MyType :foo/my-type [x data-output] ; Keyword id
(.writeUTF [data-output] (:data x)))
;; or
(extend-freeze MyType 1 [x data-output] ; Byte id
2014-01-22 07:14:26 +00:00
(.writeUTF [data-output] (:data x)))"
[type custom-type-id [x out] & body]
(assert-custom-type-id custom-type-id)
`(extend-type ~type Freezable
2014-01-22 07:14:26 +00:00
(~'freeze-to-out* [~x ~(with-meta out {:tag 'java.io.DataOutput})]
(if-not ~(keyword? custom-type-id)
;; Unprefixed [cust byte id][payload]:
(write-id ~out ~(coerce-custom-type-id custom-type-id))
;; Prefixed [const byte id][cust hash id][payload]:
(do (write-id ~out id-prefixed-custom)
(.writeShort ~out ~(coerce-custom-type-id custom-type-id))))
~@body)))
(defonce custom-readers (atom {})) ; {<hash-or-byte-id> (fn [data-input]) ...}
(defmacro extend-thaw
"Extends Nippy to support thawing of a custom type with given id:
(extend-thaw :foo/my-type [data-input] ; Keyword id
(->MyType (.readUTF data-input)))
;; or
(extend-thaw 1 [data-input] ; Byte id
2014-01-22 07:14:26 +00:00
(->MyType (.readUTF data-input)))"
[custom-type-id [in] & body]
(assert-custom-type-id custom-type-id)
(when (contains? @custom-readers (coerce-custom-type-id custom-type-id))
(println (format "Warning: resetting Nippy thaw for custom type with id: %s"
custom-type-id)))
`(swap! custom-readers assoc
~(coerce-custom-type-id custom-type-id)
(fn [~(with-meta in {:tag 'java.io.DataInput})]
~@body)))
(comment (defrecord MyType [data])
2014-01-22 07:14:26 +00:00
(extend-freeze MyType 1 [x out] (.writeUTF out (:data x)))
(extend-thaw 1 [in] (->MyType (.readUTF in)))
(thaw (freeze (->MyType "Joe"))))
;;; Some useful custom types - EXPERIMENTAL
(defrecord Compressable-LZMA2 [value])
2014-01-22 07:14:26 +00:00
(extend-freeze Compressable-LZMA2 128 [x out]
(let [ba (freeze (:value x) {:skip-header? true :compressor nil})
ba-len (alength ba)
compress? (> ba-len 1024)]
2014-01-22 07:14:26 +00:00
(.writeBoolean out compress?)
(if-not compress? (write-bytes out ba)
(let [ba* (compress lzma2-compressor ba)]
2014-01-22 07:14:26 +00:00
(write-bytes out ba*)))))
2014-01-22 07:14:26 +00:00
(extend-thaw 128 [in]
(let [compressed? (.readBoolean in)
ba (read-bytes in)]
(thaw ba {:compressor (when compressed? lzma2-compressor)
:encryptor nil})))
(comment
(->> (apply str (repeatedly 1000 rand))
(->Compressable-LZMA2)
(freeze)
(thaw))
(count (->> (apply str (repeatedly 1000 rand)) (freeze)))
(count (->> (apply str (repeatedly 1000 rand))
(->Compressable-LZMA2)
(freeze))))
2013-06-12 18:14:46 +00:00
;;;; Stress data
2012-07-06 19:12:59 +00:00
2013-10-24 06:33:54 +00:00
(defrecord StressRecord [data])
2013-06-12 17:15:16 +00:00
(def stress-data "Reference data used for tests & benchmarks."
(let []
{:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
:nil nil
:boolean true
:char-utf8 \ಬ
:string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
:string-long (apply str (range 1000))
:keyword :keyword
:keyword-ns ::keyword
2014-01-22 08:42:57 +00:00
;;; Try reflect real-world data:
:lotsa-small-numbers (vec (range 200))
:lotsa-small-keywords (->> (java.util.Locale/getISOLanguages)
(mapv keyword))
:lotsa-small-strings (->> (java.util.Locale/getISOCountries)
(mapv #(.getDisplayCountry
(java.util.Locale. "en" %))))
:queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g))
:queue-empty (PersistentQueue/EMPTY)
:sorted-set (sorted-set 1 2 3 4 5)
:sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3)
:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10)))
:list-quoted '(1 2 3 4 5 (6 7 8 (9 10)))
:list-empty (list)
:vector [1 2 3 4 5 [6 7 8 [9 10]]]
:vector-empty []
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}}
:map-empty {}
:set #{1 2 3 4 5 #{6 7 8 #{9 10}}}
:set-empty #{}
:meta (with-meta {:a :A} {:metakey :metaval})
2013-10-31 06:16:26 +00:00
:lazy-seq (repeatedly 1000 rand)
:lazy-seq-empty (map identity '())
:byte (byte 16)
:short (short 42)
:integer (int 3)
:long (long 3)
:bigint (bigint 31415926535897932384626433832795)
:float (float 3.14)
:double (double 3.14)
:bigdec (bigdec 3.1415926535897932384626433832795)
:ratio 22/7
:uuid (java.util.UUID/randomUUID)
2013-10-24 06:33:54 +00:00
:date (java.util.Date.)
:stress-record (->StressRecord "data")
;; Serializable
:throwable (Throwable. "Yolo")
:exception (try (/ 1 0) (catch Exception e e))
:ex-info (ex-info "ExInfo" {:data "data"})}))
2013-06-12 18:14:46 +00:00
(def stress-data-comparable
"Reference data with stuff removed that breaks roundtrip equality."
(dissoc stress-data :bytes :throwable :exception :ex-info))
(def stress-data-benchable
"Reference data with stuff removed that breaks reader or other utils we'll
be benching against."
2014-01-21 07:44:53 +00:00
(dissoc stress-data :bytes :throwable :exception :ex-info :queue :queue-empty
2014-01-22 08:42:57 +00:00
:byte :stress-record))
2014-02-14 16:06:53 +00:00
;;;; Tools
(defn inspect-ba "Alpha - subject to change."
[ba & [thaw-opts]]
2014-02-23 11:52:50 +00:00
(if-not (encore/bytes? ba) :not-ba
(let [[first2bytes nextbytes] (encore/ba-split ba 2)
known-wrapper
(cond
2014-02-23 11:52:50 +00:00
(encore/ba= first2bytes (.getBytes "\u0000<" "UTF8")) :carmine/bin
(encore/ba= first2bytes (.getBytes "\u0000>" "UTF8")) :carmine/clj)
unwrapped-ba (if known-wrapper nextbytes ba)
[data-ba nippy-header] (or (try-parse-header unwrapped-ba)
[unwrapped-ba :no-header])]
{:known-wrapper known-wrapper
:nippy-v2-header nippy-header ; Nippy v1.x didn't have a header
:thawable? (try (thaw unwrapped-ba thaw-opts) true
(catch Exception _ false))
:unwrapped-ba unwrapped-ba
:data-ba data-ba
:unwrapped-size (alength ^bytes unwrapped-ba)
:ba-size (alength ^bytes ba)
:data-size (alength ^bytes data-ba)})))
(comment (inspect-ba (freeze "hello"))
(seq (:data-ba (inspect-ba (freeze "hello")))))
2013-06-12 18:14:46 +00:00
;;;; Deprecated API
2014-01-22 07:14:26 +00:00
(def freeze-to-stream! "DEPRECATED: Use `freeze-to-out!` instead."
freeze-to-out!)
(def thaw-from-stream! "DEPRECATED: Use `thaw-from-in!` instead."
thaw-from-in!)