nippy/src/taoensso/nippy.clj

727 lines
28 KiB
Clojure
Raw Normal View History

2012-07-06 19:12:59 +00:00
(ns taoensso.nippy
"Simple, high-performance Clojure serialization library. Originally adapted
from Deep-Freeze."
2012-07-06 19:12:59 +00:00
{:author "Peter Taoussanis"}
(:require [clojure.tools.reader.edn :as edn]
[taoensso.nippy
(utils :as utils)
2013-06-16 10:53:43 +00:00
(compression :as compression :refer (snappy-compressor))
(encryption :as encryption :refer (aes128-encryptor))])
2013-10-19 05:50:21 +00:00
(:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream
DataOutputStream Serializable ObjectOutputStream ObjectInputStream
DataOutput DataInput]
2013-08-06 16:56:43 +00:00
[java.lang.reflect Method]
[java.util Date UUID]
[clojure.lang Keyword BigInt Ratio
APersistentMap APersistentVector APersistentSet
IPersistentMap ; IPersistentVector IPersistentSet IPersistentList
PersistentQueue PersistentTreeMap PersistentTreeSet PersistentList ; LazySeq
IRecord ISeq]))
2012-07-06 19:12:59 +00:00
2013-06-13 15:40:44 +00:00
;;;; Nippy 2.x+ header spec (4 bytes)
;; Header is optional but recommended + enabled by default. Uses:
;; * Sanity check (data appears to be Nippy data).
;; * Nippy version check (=> supports changes to data schema over time).
;; * Encrypted &/or compressed data identification.
;;
(def ^:private ^:const head-version 1)
2013-06-13 15:40:44 +00:00
(def ^:private head-sig (.getBytes "NPY" "UTF-8"))
2013-07-25 08:41:13 +00:00
(def ^:private ^:const head-meta "Final byte stores version-dependent metadata."
2013-06-13 15:40:44 +00:00
{(byte 0) {:version 1 :compressed? false :encrypted? false}
(byte 1) {:version 1 :compressed? true :encrypted? false}
(byte 2) {:version 1 :compressed? false :encrypted? true}
(byte 3) {:version 1 :compressed? true :encrypted? true}})
2013-06-12 18:14:46 +00:00
(defmacro when-debug-mode [& body] (when #_true false `(do ~@body)))
2013-10-31 06:15:22 +00:00
2013-06-12 18:14:46 +00:00
;;;; Data type IDs
2012-07-06 19:12:59 +00:00
2013-07-29 08:22:31 +00:00
;; **Negative ids reserved for user-defined types**
2014-01-22 07:37:38 +00:00
(do ; Just for easier IDE collapsing
(def ^:const id-reserved (int 0))
;; 1
(def ^:const id-bytes (int 2))
(def ^:const id-nil (int 3))
(def ^:const id-boolean (int 4))
(def ^:const id-reader (int 5)) ; Fallback #2: pr-str output
(def ^:const id-serializable (int 6)) ; Fallback #1
(def ^:const id-char (int 10))
;; 11
;; 12
(def ^:const id-string (int 13))
(def ^:const id-keyword (int 14))
(def ^:const id-list (int 20))
(def ^:const id-vector (int 21))
;; 22
(def ^:const id-set (int 23))
(def ^:const id-seq (int 24))
(def ^:const id-meta (int 25))
(def ^:const id-queue (int 26))
(def ^:const id-map (int 27))
(def ^:const id-sorted-set (int 28))
(def ^:const id-sorted-map (int 29))
(def ^:const id-byte (int 40))
(def ^:const id-short (int 41))
(def ^:const id-integer (int 42))
(def ^:const id-long (int 43))
(def ^:const id-bigint (int 44))
(def ^:const id-float (int 60))
(def ^:const id-double (int 61))
(def ^:const id-bigdec (int 62))
(def ^:const id-ratio (int 70))
(def ^:const id-record (int 80))
;; (def ^:const id-type (int 81)) ; TODO
(def ^:const id-date (int 90))
(def ^:const id-uuid (int 91))
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
(def ^:const id-byte-as-long (int 100)) ; 1 vs 8 byte storage
(def ^:const id-short-as-long (int 101)) ; 2 vs 8 byte storage
(def ^:const id-int-as-long (int 102)) ; 4 vs 8 byte storage
;;
(def ^:const id-string-small (int 103)) ; 1 vs 4 byte overhead
(def ^:const id-keyword-small (int 104)) ; ''
;;
;; (def ^:const id-vector-small (int 105)) ; ''
;; (def ^:const id-set-small (int 106)) ; ''
;; (def ^:const id-map-small (int 107)) ; ''
2014-01-22 07:37:38 +00:00
;;; DEPRECATED (old types will be supported only for thawing)
(def ^:const id-old-reader (int 1)) ; as of 0.9.2, for +64k support
(def ^:const id-old-string (int 11)) ; as of 0.9.2, for +64k support
(def ^:const id-old-map (int 22)) ; as of 0.9.0, for more efficient thaw
(def ^:const id-old-keyword (int 12)) ; as of 2.0.0-alpha5, for str consistecy
)
;;;; Freezing
(defprotocol Freezable
"Be careful about extending to interfaces, Ref. http://goo.gl/6gGRlU."
2014-01-22 07:14:26 +00:00
(freeze-to-out* [this out]))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(defmacro write-id [out id] `(.writeByte ~out ~id))
2014-01-22 08:42:57 +00:00
(defmacro ^:private write-bytes [out ba & [small?]]
`(let [out# ~out, ba# ~ba]
(let [size# (alength ba#)]
2014-01-22 08:42:57 +00:00
(if ~small? ; Optimization, must be known before id's written
(.writeByte out# size#)
(.writeInt out# size#))
(.write out# ba# 0 size#))))
2014-01-22 07:14:26 +00:00
(defmacro ^:private write-biginteger [out x] `(write-bytes ~out (.toByteArray ~x)))
(defmacro ^:private write-utf8 [out x] `(write-bytes ~out (.getBytes ~x "UTF-8")))
2014-01-22 08:42:57 +00:00
2014-01-22 07:14:26 +00:00
(defmacro ^:private freeze-to-out
"Like `freeze-to-out*` but with metadata support."
[out x]
`(let [out# ~out, x# ~x]
(when-let [m# (meta x#)]
2014-01-22 07:14:26 +00:00
(write-id out# ~id-meta)
(freeze-to-out* m# out#))
(freeze-to-out* x# out#)))
2012-07-06 19:12:59 +00:00
2013-10-23 18:25:46 +00:00
(defmacro ^:private freezer [type id & body]
2012-07-06 19:12:59 +00:00
`(extend-type ~type
Freezable
2014-01-22 07:14:26 +00:00
(~'freeze-to-out* [~'x ~(with-meta 'out {:tag 'DataOutput})]
(write-id ~'out ~id)
2012-07-06 19:12:59 +00:00
~@body)))
2013-10-23 18:25:46 +00:00
(defmacro ^:private freezer-coll [type id & body]
`(freezer ~type ~id
(when-debug-mode
(when (instance? ISeq ~type)
(println (format "DEBUG - freezer-coll: %s for %s" ~type (type ~'x)))))
(if (counted? ~'x)
2014-01-22 07:14:26 +00:00
(do (.writeInt ~'out (count ~'x))
(doseq [i# ~'x] (freeze-to-out ~'out i#)))
(let [bas# (ByteArrayOutputStream.)
sout# (DataOutputStream. bas#)
cnt# (reduce (fn [cnt# i#]
(freeze-to-out sout# i#)
(unchecked-inc cnt#))
0 ~'x)
ba# (.toByteArray bas#)]
2014-01-22 07:14:26 +00:00
(.writeInt ~'out cnt#)
(.write ~'out ba# 0 (alength ba#))))))
2012-07-06 19:12:59 +00:00
2013-10-23 18:25:46 +00:00
(defmacro ^:private freezer-kvs [type id & body]
`(freezer ~type ~id
2014-01-22 07:14:26 +00:00
(.writeInt ~'out (* 2 (count ~'x)))
(doseq [kv# ~'x]
2014-01-22 07:14:26 +00:00
(freeze-to-out ~'out (key kv#))
(freeze-to-out ~'out (val kv#)))))
2014-01-22 07:14:26 +00:00
(freezer (Class/forName "[B") id-bytes (write-bytes out ^bytes x))
2012-07-06 19:12:59 +00:00
(freezer nil id-nil)
2014-01-22 07:14:26 +00:00
(freezer Boolean id-boolean (.writeBoolean out x))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(freezer Character id-char (.writeChar out (int x)))
2014-01-22 08:42:57 +00:00
;; (freezer String id-string (write-utf8 out x))
(extend-type String ; Optimized common-case type
Freezable
(freeze-to-out* [x ^DataOutput out]
(let [ba (.getBytes x)]
(if (<= (alength ^bytes ba) java.lang.Byte/MAX_VALUE)
(do (write-id out id-string-small)
(write-bytes out ba :small))
(do (write-id out id-string)
(write-bytes out ba))))))
(extend-type Keyword ; Optimized common-case type
Freezable
(freeze-to-out* [x ^DataOutput out]
(let [s (if-let [ns (namespace x)]
(str ns "/" (name x))
(name x))
ba (.getBytes s "UTF-8")]
(if (<= (alength ^bytes ba) java.lang.Byte/MAX_VALUE)
(do (write-id out id-keyword-small)
(write-bytes out ba :small))
(do (write-id out id-keyword)
(write-bytes out ba))))))
2012-07-06 19:12:59 +00:00
2013-10-23 18:25:46 +00:00
(freezer-coll PersistentQueue id-queue)
(freezer-coll PersistentTreeSet id-sorted-set)
(freezer-kvs PersistentTreeMap id-sorted-map)
2013-10-23 18:25:46 +00:00
(freezer-kvs APersistentMap id-map)
(freezer-coll APersistentVector id-vector)
(freezer-coll APersistentSet id-set)
(freezer-coll PersistentList id-list) ; No APersistentList
(freezer-coll (type '()) id-list)
;; Nb low-level interface!! Acts as fallback for seqs that don't have a
;; concrete implementation. Will conflict with any other coll interfaces!
(freezer-coll ISeq id-seq)
(freezer IRecord id-record
2014-01-22 07:14:26 +00:00
(write-utf8 out (.getName (class x))) ; Reflect
(freeze-to-out out (into {} x)))
(freezer Byte id-byte (.writeByte out x))
(freezer Short id-short (.writeShort out x))
(freezer Integer id-integer (.writeInt out x))
2014-01-22 08:42:57 +00:00
;;(freezer Long id-long (.writeLong out x))
(extend-type Long ; Optimized common-case type
Freezable
(freeze-to-out* [x ^DataOutput out]
(cond
(<= java.lang.Byte/MIN_VALUE x java.lang.Byte/MAX_VALUE)
(do (write-id out id-byte-as-long) (.writeByte out x))
(<= java.lang.Short/MIN_VALUE x java.lang.Short/MAX_VALUE)
(do (write-id out id-short-as-long) (.writeShort out x))
(<= java.lang.Integer/MIN_VALUE x java.lang.Integer/MAX_VALUE)
(do (write-id out id-int-as-long) (.writeInt out x))
:else (do (write-id out id-long) (.writeLong out x)))))
2014-01-22 07:14:26 +00:00
(freezer BigInt id-bigint (write-biginteger out (.toBigInteger x)))
(freezer BigInteger id-bigint (write-biginteger out x))
(freezer Float id-float (.writeFloat out x))
(freezer Double id-double (.writeDouble out x))
2012-07-06 19:12:59 +00:00
(freezer BigDecimal id-bigdec
2014-01-22 07:14:26 +00:00
(write-biginteger out (.unscaledValue x))
(.writeInt out (.scale x)))
2012-07-06 19:12:59 +00:00
(freezer Ratio id-ratio
2014-01-22 07:14:26 +00:00
(write-biginteger out (.numerator x))
(write-biginteger out (.denominator x)))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(freezer Date id-date (.writeLong out (.getTime x)))
(freezer UUID id-uuid
2014-01-22 07:14:26 +00:00
(.writeLong out (.getMostSignificantBits x))
(.writeLong out (.getLeastSignificantBits x)))
(def ^:dynamic *final-freeze-fallback* "Alpha - subject to change." nil)
2014-01-22 07:14:26 +00:00
(defn freeze-fallback-as-str "Alpha-subject to change." [x out]
(freeze-to-out* {:nippy/unfreezable (pr-str x) :type (type x)} out))
(comment
(require '[clojure.core.async :as async])
(binding [*final-freeze-fallback* freeze-fallback-as-str]
(-> (async/chan) (freeze) (thaw))))
;; Fallbacks. Note that we'll extend *only* to (lowly) Object to prevent
;; interfering with higher-level implementations, Ref. http://goo.gl/6f7SKl
(extend-type Object
Freezable
2014-01-22 07:14:26 +00:00
(freeze-to-out* [x ^DataOutput out]
(cond
(utils/serializable? x) ; Fallback #1: Java's Serializable interface
(do (when-debug-mode
2013-10-31 06:15:22 +00:00
(println (format "DEBUG - Serializable fallback: %s" (type x))))
2014-01-22 07:14:26 +00:00
(write-id out id-serializable)
(write-utf8 out (.getName (class x))) ; Reflect
(.writeObject (ObjectOutputStream. out) x))
(utils/readable? x) ; Fallback #2: Clojure's Reader
(do (when-debug-mode
(println (format "DEBUG - Reader fallback: %s" (type x))))
2014-01-22 07:14:26 +00:00
(write-id out id-reader)
(write-utf8 out (pr-str x)))
:else ; Fallback #3: *final-freeze-fallback*
2014-01-22 07:14:26 +00:00
(if-let [ffb *final-freeze-fallback*] (ffb x out)
(throw (Exception. (format "Unfreezable type: %s %s"
(type x) (str x))))))))
2013-08-07 09:19:11 +00:00
2013-06-13 15:40:44 +00:00
(def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta))
(defn- wrap-header [data-ba metadata]
(if-let [meta-id (head-meta-id (assoc metadata :version head-version))]
(let [head-ba (utils/ba-concat head-sig (byte-array [meta-id]))]
(utils/ba-concat head-ba data-ba))
(throw (Exception. (str "Unrecognized header metadata: " metadata)))))
(comment (wrap-header (.getBytes "foo") {:compressed? true
:encrypted? false}))
(declare assert-legacy-args) ; Deprecated
2014-01-22 07:14:26 +00:00
(defn freeze-to-out!
"Low-level API. Serializes arg (any Clojure data type) to a DataOutput."
[^DataOutput data-output x & _]
2014-01-22 07:14:26 +00:00
(freeze-to-out data-output x))
(defn freeze
"Serializes arg (any Clojure data type) to a byte array. For custom types
extend the Clojure reader or see `extend-freeze`."
^bytes [x & [{:keys [password compressor encryptor skip-header?]
:or {compressor snappy-compressor
encryptor aes128-encryptor}
:as opts}]]
(when (:legacy-mode opts) ; Deprecated
(assert-legacy-args compressor password))
(let [skip-header? (or skip-header? (:legacy-mode opts)) ; Deprecated
2014-01-22 07:14:26 +00:00
bas (ByteArrayOutputStream.)
sout (DataOutputStream. bas)]
(freeze-to-out! sout x)
2013-10-19 05:50:21 +00:00
(let [ba (.toByteArray bas)
ba (if compressor (compression/compress compressor ba) ba)
ba (if password (encryption/encrypt encryptor password ba) ba)]
(if skip-header? ba
2013-06-13 15:40:44 +00:00
(wrap-header ba {:compressed? (boolean compressor)
:encrypted? (boolean password)})))))
2012-07-06 19:12:59 +00:00
;;;; Thawing
2014-01-22 07:14:26 +00:00
(declare thaw-from-in)
2012-07-06 19:12:59 +00:00
2014-01-22 08:42:57 +00:00
(defmacro ^:private read-bytes [in & [small?]]
`(let [in# ~in
2014-01-22 08:42:57 +00:00
size# (if ~small? ; Optimization, must be known before id's written
(.readByte in#)
(.readInt in#))
ba# (byte-array size#)]
(.readFully in# ba# 0 size#) ba#))
2014-01-22 07:14:26 +00:00
(defmacro ^:private read-biginteger [in] `(BigInteger. (read-bytes ~in)))
(defmacro ^:private read-utf8 [in] `(String. (read-bytes ~in) "UTF-8"))
2014-01-22 07:14:26 +00:00
(defmacro ^:private read-coll [in coll]
`(let [in# ~in] (utils/repeatedly-into ~coll (.readInt in#) (thaw-from-in in#))))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(defmacro ^:private read-kvs [in coll]
`(let [in# ~in] (utils/repeatedly-into ~coll (/ (.readInt in#) 2)
[(thaw-from-in in#) (thaw-from-in in#)])))
(declare ^:private custom-readers)
2014-01-22 07:14:26 +00:00
(defn- thaw-from-in
[^DataInput in]
(let [type-id (.readByte in)]
(try
(when-debug-mode
(println (format "DEBUG - thawing type-id: %s" type-id)))
(utils/case-eval type-id
id-reader
2014-01-22 07:14:26 +00:00
(let [edn (read-utf8 in)]
(try (edn/read-string {:readers *data-readers*} edn)
(catch Exception _ {:nippy/unthawable edn
:type :reader})))
id-serializable
2014-01-22 07:14:26 +00:00
(let [class-name (read-utf8 in)]
2013-12-06 18:55:00 +00:00
(try (let [;; .readObject _before_ Class/forName: it'll always read
;; all data before throwing
2014-01-22 07:14:26 +00:00
object (.readObject (ObjectInputStream. in))
2013-12-06 18:55:00 +00:00
class ^Class (Class/forName class-name)]
(cast class object))
2013-12-06 18:55:00 +00:00
(catch Exception _ {:nippy/unthawable class-name
:type :serializable})))
2014-01-22 07:14:26 +00:00
id-bytes (read-bytes in)
id-nil nil
2014-01-22 07:14:26 +00:00
id-boolean (.readBoolean in)
2014-01-22 07:14:26 +00:00
id-char (.readChar in)
id-string (read-utf8 in)
id-keyword (keyword (read-utf8 in))
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
id-string-small (String. (read-bytes in :small) "UTF-8")
id-keyword-small (keyword (String. (read-bytes in :small) "UTF-8"))
2014-01-22 07:14:26 +00:00
id-queue (read-coll in (PersistentQueue/EMPTY))
id-sorted-set (read-coll in (sorted-set))
id-sorted-map (read-kvs in (sorted-map))
2014-01-22 07:14:26 +00:00
id-list (into '() (rseq (read-coll in [])))
id-vector (read-coll in [])
id-set (read-coll in #{})
id-map (read-kvs in {})
id-seq (seq (read-coll in []))
2014-01-22 07:14:26 +00:00
id-meta (let [m (thaw-from-in in)] (with-meta (thaw-from-in in) m))
2014-01-22 07:14:26 +00:00
id-byte (.readByte in)
id-short (.readShort in)
id-integer (.readInt in)
id-long (.readLong in)
2014-01-22 08:42:57 +00:00
;;; Optimized, common-case types (v2.6+)
id-byte-as-long (long (.readByte in))
id-short-as-long (long (.readShort in))
id-int-as-long (long (.readInt in))
2014-01-22 07:14:26 +00:00
id-bigint (bigint (read-biginteger in))
2014-01-22 07:14:26 +00:00
id-float (.readFloat in)
id-double (.readDouble in)
id-bigdec (BigDecimal. (read-biginteger in) (.readInt in))
2014-01-22 07:14:26 +00:00
id-ratio (/ (bigint (read-biginteger in))
(bigint (read-biginteger in)))
id-record
2014-01-22 07:14:26 +00:00
(let [class ^Class (Class/forName (read-utf8 in))
meth-sig (into-array Class [IPersistentMap])
method ^Method (.getMethod class "create" meth-sig)]
2014-01-22 07:14:26 +00:00
(.invoke method class (into-array Object [(thaw-from-in in)])))
2014-01-22 07:14:26 +00:00
id-date (Date. (.readLong in))
id-uuid (UUID. (.readLong in) (.readLong in))
;;; DEPRECATED
2014-01-22 07:14:26 +00:00
id-old-reader (edn/read-string (.readUTF in))
id-old-string (.readUTF in)
id-old-map (apply hash-map (utils/repeatedly-into []
2014-01-22 07:14:26 +00:00
(* 2 (.readInt in)) (thaw-from-in in)))
id-old-keyword (keyword (.readUTF in))
(if-not (neg? type-id)
(throw (Exception. (str "Unknown type ID: " type-id)))
;; Custom types
(if-let [reader (get @custom-readers type-id)]
2014-01-22 07:14:26 +00:00
(try (reader in)
(catch Exception e
(throw (Exception. (str "Reader exception for custom type ID: "
(- type-id)) e))))
(throw (Exception. (str "No reader provided for custom type ID: "
(- type-id)))))))
(catch Exception e
(throw (Exception. (format "Thaw failed against type-id: %s" type-id) e))))))
2012-07-06 19:12:59 +00:00
2014-01-22 07:14:26 +00:00
(defn thaw-from-in!
"Low-level API. Deserializes a frozen object from given DataInput to its
original Clojure data type."
[data-input & _]
2014-01-22 07:14:26 +00:00
(thaw-from-in data-input))
2013-06-13 15:40:44 +00:00
(defn- try-parse-header [ba]
(when-let [[head-ba data-ba] (utils/ba-split ba 4)]
(let [[head-sig* [meta-id]] (utils/ba-split head-ba 3)]
(when (utils/ba= head-sig* head-sig) ; Appears to be well-formed
[data-ba (head-meta meta-id {:unrecognized-meta? true})]))))
2013-06-13 15:40:44 +00:00
(defn thaw
"Deserializes a frozen object from given byte array to its original Clojure
data type. By default[1] supports data frozen with current and all previous
versions of Nippy. For custom types extend the Clojure reader or see
`extend-thaw`.
[1] :headerless-meta provides a fallback facility for data frozen without a
standard Nippy header (notably all Nippy v1 data). A default is provided for
Nippy v1 thaw compatibility, but it's recommended that you _disable_ this
fallback (`{:headerless-meta nil}`) if you're certain you won't be thawing
headerless data."
[^bytes ba & [{:keys [password compressor encryptor headerless-meta]
:or {compressor snappy-compressor
encryptor aes128-encryptor
headerless-meta ; Recommend set to nil when possible
{:version 1
:compressed? true
:encrypted? false}}
2013-06-17 14:59:52 +00:00
:as opts}]]
(let [headerless-meta (merge headerless-meta (:legacy-opts opts)) ; Deprecated
ex (fn [msg & [e]] (throw (Exception. (str "Thaw failed: " msg) e)))
try-thaw-data
(fn [data-ba {:keys [compressed? encrypted?] :as _head-or-headerless-meta}]
(let [password (when encrypted? password)
compressor (when compressed? compressor)]
2013-06-13 15:40:44 +00:00
(try
(let [ba data-ba
ba (if password (encryption/decrypt encryptor password ba) ba)
ba (if compressor (compression/decompress compressor ba) ba)
2014-01-22 07:14:26 +00:00
sin (DataInputStream. (ByteArrayInputStream. ba))]
(thaw-from-in! sin))
2013-07-29 08:22:31 +00:00
2013-06-13 15:40:44 +00:00
(catch Exception e
2013-06-16 04:50:36 +00:00
(cond
password (if head-meta (ex "Wrong password/encryptor?" e)
(ex "Unencrypted data?" e))
compressor (if head-meta (ex "Encrypted data or wrong compressor?" e)
(ex "Uncompressed data?" e))
:else (if head-meta (ex "Corrupt data?" e)
(ex "Data may be unfrozen, corrupt, compressed &/or encrypted.")))))))]
(if-let [[data-ba {:keys [unrecognized-meta? compressed? encrypted?]
:as head-meta}] (try-parse-header ba)]
(cond ; A well-formed header _appears_ to be present
(and (not headerless-meta) ; Cautious. It's unlikely but possible the
; header sig match was a fluke and not an
; indication of a real, well-formed header.
; May really be headerless.
unrecognized-meta?)
(ex "Unrecognized (but apparently well-formed) header. Data frozen with newer Nippy version?")
;;; It's still possible below that the header match was a fluke, but it's
;;; _very_ unlikely. Therefore _not_ going to incl.
;;; `(not headerless-meta)` conditions below.
(and compressed? (not compressor))
(ex "Compressed data? Try again with compressor.")
(and encrypted? (not password))
2013-06-17 14:59:52 +00:00
(if (::tools-thaw? opts) ::need-password
(ex "Encrypted data? Try again with password."))
:else (try (try-thaw-data data-ba head-meta)
(catch Exception e
(if headerless-meta
(try (try-thaw-data ba headerless-meta)
2013-07-29 08:22:31 +00:00
(catch Exception _
(throw e)))
(throw e)))))
;; Well-formed header definitely not present
(if headerless-meta
(try-thaw-data ba headerless-meta)
(ex "Data may be unfrozen, corrupt, compressed &/or encrypted.")))))
(comment (thaw (freeze "hello"))
(thaw (freeze "hello" {:compressor nil}))
(thaw (freeze "hello" {:password [:salted "p"]})) ; ex
(thaw (freeze "hello") {:password [:salted "p"]}))
2012-07-06 19:12:59 +00:00
;;;; Custom types
(defmacro extend-freeze
"Alpha - subject to change.
Extends Nippy to support freezing of a custom type (ideally concrete) with
id [1, 128]:
(defrecord MyType [data])
2014-01-22 07:14:26 +00:00
(extend-freeze MyType 1 [x data-output]
(.writeUTF [data-output] (:data x)))"
[type custom-type-id [x out] & body]
(assert (and (>= custom-type-id 1) (<= custom-type-id 128)))
`(extend-type ~type
Freezable
2014-01-22 07:14:26 +00:00
(~'freeze-to-out* [~x ~(with-meta out {:tag 'java.io.DataOutput})]
(write-id ~out ~(int (- custom-type-id)))
~@body)))
2014-01-22 07:14:26 +00:00
(defonce custom-readers (atom {})) ; {<custom-type-id> (fn [data-input]) ...}
(defmacro extend-thaw
"Alpha - subject to change.
Extends Nippy to support thawing of a custom type with id [1, 128]:
2014-01-22 07:14:26 +00:00
(extend-thaw 1 [data-input]
(->MyType (.readUTF data-input)))"
[custom-type-id [in] & body]
(assert (and (>= custom-type-id 1) (<= custom-type-id 128)))
`(swap! custom-readers assoc ~(int (- custom-type-id))
2014-01-22 07:14:26 +00:00
(fn [~(with-meta in {:tag 'java.io.DataInput})]
~@body)))
(comment (defrecord MyType [data])
2014-01-22 07:14:26 +00:00
(extend-freeze MyType 1 [x out] (.writeUTF out (:data x)))
(extend-thaw 1 [in] (->MyType (.readUTF in)))
(thaw (freeze (->MyType "Joe"))))
;;; Some useful custom types - EXPERIMENTAL
(defrecord Compressable-LZMA2 [value])
2014-01-22 07:14:26 +00:00
(extend-freeze Compressable-LZMA2 128 [x out]
(let [[_ ^bytes ba] (-> (freeze (:value x) {:compressor nil})
(utils/ba-split 4))
ba-len (alength ba)
compress? (> ba-len 1024)]
2014-01-22 07:14:26 +00:00
(.writeBoolean out compress?)
(if-not compress? (write-bytes out ba)
(let [ba* (compression/compress compression/lzma2-compressor ba)]
2014-01-22 07:14:26 +00:00
(write-bytes out ba*)))))
2014-01-22 07:14:26 +00:00
(extend-thaw 128 [in]
(let [compressed? (.readBoolean in)
ba (read-bytes in)]
(thaw (wrap-header ba {:compressed? compressed? :encrypted? false})
{:compressor compression/lzma2-compressor})))
(comment
(->> (apply str (repeatedly 1000 rand))
(->Compressable-LZMA2)
(freeze)
(thaw))
(count (->> (apply str (repeatedly 1000 rand)) (freeze)))
(count (->> (apply str (repeatedly 1000 rand))
(->Compressable-LZMA2)
(freeze))))
2013-06-12 18:14:46 +00:00
;;;; Stress data
2012-07-06 19:12:59 +00:00
2013-10-24 06:33:54 +00:00
(defrecord StressRecord [data])
2013-06-12 17:15:16 +00:00
(def stress-data "Reference data used for tests & benchmarks."
(let []
{:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
:nil nil
:boolean true
:char-utf8 \ಬ
:string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
:string-long (apply str (range 1000))
:keyword :keyword
:keyword-ns ::keyword
2014-01-22 08:42:57 +00:00
;;; Try reflect real-world data:
:lotsa-small-numbers (vec (range 200))
:lotsa-small-keywords (->> (java.util.Locale/getISOLanguages)
(mapv keyword))
:lotsa-small-strings (->> (java.util.Locale/getISOCountries)
(mapv #(.getDisplayCountry
(java.util.Locale. "en" %))))
:queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g))
:queue-empty (PersistentQueue/EMPTY)
:sorted-set (sorted-set 1 2 3 4 5)
:sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3)
:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10)))
:list-quoted '(1 2 3 4 5 (6 7 8 (9 10)))
:list-empty (list)
:vector [1 2 3 4 5 [6 7 8 [9 10]]]
:vector-empty []
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}}
:map-empty {}
:set #{1 2 3 4 5 #{6 7 8 #{9 10}}}
:set-empty #{}
:meta (with-meta {:a :A} {:metakey :metaval})
2013-10-31 06:16:26 +00:00
:lazy-seq (repeatedly 1000 rand)
:byte (byte 16)
:short (short 42)
:integer (int 3)
:long (long 3)
:bigint (bigint 31415926535897932384626433832795)
:float (float 3.14)
:double (double 3.14)
:bigdec (bigdec 3.1415926535897932384626433832795)
:ratio 22/7
:uuid (java.util.UUID/randomUUID)
2013-10-24 06:33:54 +00:00
:date (java.util.Date.)
:stress-record (->StressRecord "data")
;; Serializable
:throwable (Throwable. "Yolo")
:exception (try (/ 1 0) (catch Exception e e))
:ex-info (ex-info "ExInfo" {:data "data"})}))
2013-06-12 18:14:46 +00:00
(def stress-data-comparable
"Reference data with stuff removed that breaks roundtrip equality."
(dissoc stress-data :bytes :throwable :exception :ex-info))
(def stress-data-benchable
"Reference data with stuff removed that breaks reader or other utils we'll
be benching against."
2014-01-21 07:44:53 +00:00
(dissoc stress-data :bytes :throwable :exception :ex-info :queue :queue-empty
2014-01-22 08:42:57 +00:00
:byte :stress-record))
;;;; Data recovery/analysis
(defn inspect-ba "Alpha - subject to change."
[ba & [thaw-opts]]
(if-not (utils/bytes? ba) :not-ba
(let [[first2bytes nextbytes] (utils/ba-split ba 2)
known-wrapper
(cond
(utils/ba= first2bytes (.getBytes "\u0000<" "UTF8")) :carmine/bin
(utils/ba= first2bytes (.getBytes "\u0000>" "UTF8")) :carmine/clj)
unwrapped-ba (if known-wrapper nextbytes ba)
[data-ba nippy-header] (or (try-parse-header unwrapped-ba)
[unwrapped-ba :no-header])]
{:known-wrapper known-wrapper
:nippy2-header nippy-header ; Nippy v1.x didn't have a header
:thawable? (try (thaw unwrapped-ba thaw-opts) true
(catch Exception _ false))
:unwrapped-ba unwrapped-ba
:data-ba data-ba
:unwrapped-size (alength ^bytes unwrapped-ba)
:ba-size (alength ^bytes ba)
:data-size (alength ^bytes data-ba)})))
(comment (inspect-ba (freeze "hello"))
(seq (:data-ba (inspect-ba (freeze "hello")))))
2013-06-12 18:14:46 +00:00
;;;; Deprecated API
2014-01-22 07:14:26 +00:00
(def freeze-to-stream! "DEPRECATED: Use `freeze-to-out!` instead."
freeze-to-out!)
(def thaw-from-stream! "DEPRECATED: Use `thaw-from-in!` instead."
thaw-from-in!)
(defn- assert-legacy-args [compressor password]
(when password
(throw (AssertionError. "Encryption not supported in legacy mode.")))
(when (and compressor (not= compressor snappy-compressor))
(throw (AssertionError. "Only Snappy compressor supported in legacy mode."))))
(defn freeze-to-bytes "DEPRECATED: Use `freeze` instead."
^bytes [x & {:keys [compress?]
:or {compress? true}}]
(freeze x {:skip-header? true
2013-06-16 10:53:43 +00:00
:compressor (when compress? snappy-compressor)
:password nil}))
2013-06-12 18:14:46 +00:00
(defn thaw-from-bytes "DEPRECATED: Use `thaw` instead."
[ba & {:keys [compressed?]
:or {compressed? true}}]
(thaw ba {:headerless-opts {:compressed? compressed?}
:compressor snappy-compressor
:password nil}))