nippy/src/taoensso/nippy.clj

401 lines
14 KiB
Clojure
Raw Normal View History

2012-07-06 19:12:59 +00:00
(ns taoensso.nippy
"Simple, high-performance Clojure serialization library. Originally adapted
from Deep-Freeze."
2012-07-06 19:12:59 +00:00
{:author "Peter Taoussanis"}
(:require [taoensso.nippy
(utils :as utils)
(compression :as compression)
(encryption :as encryption)])
2012-07-06 19:12:59 +00:00
(:import [java.io DataInputStream DataOutputStream ByteArrayOutputStream
ByteArrayInputStream]
[clojure.lang Keyword BigInt Ratio PersistentQueue PersistentTreeMap
PersistentTreeSet IPersistentList IPersistentVector IPersistentMap
IPersistentSet IPersistentCollection]))
2012-07-06 19:12:59 +00:00
;; TODO Allow ba or wrapped-ba input?
;; TODO Provide ToFreeze, Frozen, Encrypted, etc. tooling helpers
;;;; Header IDs
;; Nippy 2.x+ prefixes frozen data with a 5-byte header:
(def ^:const id-nippy-magic-prefix (byte 17))
(def ^:const id-nippy-header-ver (byte 0))
;; * Compressor id (0 if no compressor)
;; * Encryptor id (0 if no encryptor)
(def ^:const id-nippy-reserved (byte 0))
2013-06-12 18:14:46 +00:00
;;;; Data type IDs
2012-07-06 19:12:59 +00:00
;; 1
(def ^:const id-bytes (int 2))
(def ^:const id-nil (int 3))
(def ^:const id-boolean (int 4))
(def ^:const id-reader (int 5)) ; Fallback: *print-dup* pr-str output
(def ^:const id-char (int 10))
;; 11
(def ^:const id-keyword (int 12))
(def ^:const id-string (int 13))
(def ^:const id-list (int 20))
(def ^:const id-vector (int 21))
;; 22
(def ^:const id-set (int 23))
(def ^:const id-coll (int 24)) ; Fallback: non-specific collection
(def ^:const id-meta (int 25))
(def ^:const id-queue (int 26))
(def ^:const id-map (int 27))
(def ^:const id-sorted-set (int 28))
(def ^:const id-sorted-map (int 29))
(def ^:const id-byte (int 40))
(def ^:const id-short (int 41))
(def ^:const id-integer (int 42))
(def ^:const id-long (int 43))
(def ^:const id-bigint (int 44))
(def ^:const id-float (int 60))
(def ^:const id-double (int 61))
(def ^:const id-bigdec (int 62))
(def ^:const id-ratio (int 70))
2012-07-06 19:12:59 +00:00
2012-07-20 18:56:30 +00:00
;;; DEPRECATED (old types will be supported only for thawing)
(def ^:const id-old-reader (int 1)) ; as of 0.9.2, for +64k support
(def ^:const id-old-string (int 11)) ; as of 0.9.2, for +64k support
(def ^:const id-old-map (int 22)) ; as of 0.9.0, for more efficient thaw
2012-07-06 19:12:59 +00:00
;;;; Shared low-level stream stuff
2013-06-12 18:14:46 +00:00
(defn- write-id [^DataOutputStream stream ^Integer id] (.writeByte stream id))
2012-07-06 19:12:59 +00:00
2013-06-12 18:14:46 +00:00
(defn- write-bytes
"Writes arbitrary byte data, preceded by its length."
2012-07-06 19:12:59 +00:00
[^DataOutputStream stream ^bytes ba]
(let [size (alength ba)]
(.writeInt stream size) ; Encode size of byte array
(.write stream ba 0 size)))
2013-06-12 18:14:46 +00:00
(defn- write-biginteger
"Wrapper around `write-bytes` for common case of writing a BigInteger."
[^DataOutputStream stream ^BigInteger x]
2013-06-12 18:14:46 +00:00
(write-bytes stream (.toByteArray x)))
2013-06-12 18:14:46 +00:00
(defn- read-bytes
"Reads arbitrary byte data, preceded by its length."
2012-07-06 19:12:59 +00:00
^bytes [^DataInputStream stream]
(let [size (.readInt stream)
ba (byte-array size)]
(.read stream ba 0 size) ba))
2013-06-12 18:14:46 +00:00
(defn- read-biginteger
"Wrapper around `read-bytes` for common case of reading a BigInteger.
2012-07-06 19:12:59 +00:00
Note that as of Clojure 1.3, java.math.BigInteger clojure.lang.BigInt."
^BigInteger [^DataInputStream stream]
2013-06-12 18:14:46 +00:00
(BigInteger. (read-bytes stream)))
2012-07-06 19:12:59 +00:00
;;;; Freezing
2013-06-12 18:14:46 +00:00
(defprotocol Freezable (freeze-to-stream* [this stream]))
(defn- freeze-to-stream
"Like `freeze-to-stream*` but with metadata support."
[x ^DataOutputStream s]
(if-let [m (meta x)]
(do (write-id s id-meta)
(freeze-to-stream m s)))
(freeze-to-stream* x s))
2012-07-06 19:12:59 +00:00
2013-06-12 18:14:46 +00:00
(defmacro ^:private freezer
2012-07-06 19:12:59 +00:00
"Helper to extend Freezable protocol."
[type id & body]
`(extend-type ~type
~'Freezable
2013-06-12 18:14:46 +00:00
(~'freeze-to-stream* [~'x ~(with-meta 's {:tag 'DataOutputStream})]
(write-id ~'s ~id)
2012-07-06 19:12:59 +00:00
~@body)))
2013-06-12 18:14:46 +00:00
(defmacro ^:private coll-freezer
"Extends Freezable to simple collection types."
2012-07-06 19:12:59 +00:00
[type id & body]
`(freezer ~type ~id
2013-06-12 18:14:46 +00:00
(.writeInt ~'s (count ~'x))
(doseq [i# ~'x] (freeze-to-stream i# ~'s))))
2012-07-06 19:12:59 +00:00
2013-06-12 18:14:46 +00:00
(defmacro ^:private kv-freezer
"Extends Freezable to key-value collection types."
[type id & body]
`(freezer ~type ~id
2013-06-12 18:14:46 +00:00
(.writeInt ~'s (* 2 (count ~'x)))
(doseq [[k# v#] ~'x]
2013-06-12 18:14:46 +00:00
(freeze-to-stream k# ~'s)
(freeze-to-stream v# ~'s))))
2013-06-12 18:14:46 +00:00
(freezer (Class/forName "[B") id-bytes (write-bytes s x))
2012-07-06 19:12:59 +00:00
(freezer nil id-nil)
(freezer Boolean id-boolean (.writeBoolean s x))
(freezer Character id-char (.writeChar s (int x)))
2013-06-12 18:14:46 +00:00
(freezer String id-string (write-bytes s (.getBytes x "UTF-8")))
2012-07-08 06:00:34 +00:00
(freezer Keyword id-keyword (.writeUTF s (if-let [ns (namespace x)]
(str ns "/" (name x))
(name x))))
2012-07-06 19:12:59 +00:00
(coll-freezer PersistentQueue id-queue)
(coll-freezer PersistentTreeSet id-sorted-set)
(kv-freezer PersistentTreeMap id-sorted-map)
2012-07-06 19:12:59 +00:00
(coll-freezer IPersistentList id-list)
(coll-freezer IPersistentVector id-vector)
(coll-freezer IPersistentSet id-set)
(kv-freezer IPersistentMap id-map)
2012-07-06 19:12:59 +00:00
(coll-freezer IPersistentCollection id-coll) ; Must be LAST collection freezer!
(freezer Byte id-byte (.writeByte s x))
(freezer Short id-short (.writeShort s x))
(freezer Integer id-integer (.writeInt s x))
(freezer Long id-long (.writeLong s x))
2013-06-12 18:14:46 +00:00
(freezer BigInt id-bigint (write-biginteger s (.toBigInteger x)))
(freezer BigInteger id-bigint (write-biginteger s x))
2012-07-06 19:12:59 +00:00
(freezer Float id-float (.writeFloat s x))
(freezer Double id-double (.writeDouble s x))
(freezer BigDecimal id-bigdec
2013-06-12 18:14:46 +00:00
(write-biginteger s (.unscaledValue x))
2012-07-06 19:12:59 +00:00
(.writeInt s (.scale x)))
(freezer Ratio id-ratio
2013-06-12 18:14:46 +00:00
(write-biginteger s (.numerator x))
(write-biginteger s (.denominator x)))
2012-07-06 19:12:59 +00:00
;; Use Clojure's own reader as final fallback
2013-06-12 18:14:46 +00:00
(freezer Object id-reader (write-bytes s (.getBytes (pr-str x) "UTF-8")))
2012-07-06 19:12:59 +00:00
(defn- wrap-nippy-header [data-ba compressor encryptor password]
(let [header-ba (byte-array
[id-nippy-magic-prefix
id-nippy-header-ver
(byte (if compressor (compression/header-id compressor) 0))
(byte (if password (encryption/header-id encryptor) 0))
id-nippy-reserved])]
(utils/ba-concat header-ba data-ba)))
(defn freeze
"Serializes arg (any Clojure data type) to a byte array. Enable
`:legacy-mode?` flag to produce bytes readable by Nippy < 2.x."
^bytes [x & [{:keys [print-dup? password compressor encryptor legacy-mode?]
:or {print-dup? true
compressor compression/default-snappy-compressor
encryptor encryption/default-aes128-encryptor}}]]
(let [ba (ByteArrayOutputStream.)
stream (DataOutputStream. ba)]
(binding [*print-dup* print-dup?] (freeze-to-stream x stream))
(let [ba (.toByteArray ba)
ba (if compressor (compression/compress compressor ba) ba)
ba (if password (encryption/encrypt encryptor password ba) ba)]
(if legacy-mode? ba (wrap-nippy-header ba compressor encryptor password)))))
2012-07-06 19:12:59 +00:00
;;;; Thawing
2013-06-12 18:14:46 +00:00
(declare thaw-from-stream)
2012-07-06 19:12:59 +00:00
2013-06-12 18:14:46 +00:00
(defn coll-thaw
"Thaws simple collection types."
[coll ^DataInputStream s]
(utils/repeatedly-into coll (.readInt s) #(thaw-from-stream s)))
2012-07-06 19:12:59 +00:00
2013-06-12 18:14:46 +00:00
(defn coll-thaw-kvs
"Thaws key-value collection types."
[coll ^DataInputStream s]
(utils/repeatedly-into coll (/ (.readInt s) 2)
(fn [] [(thaw-from-stream s) (thaw-from-stream s)])))
2013-06-12 18:14:46 +00:00
(defn- thaw-from-stream
2012-07-06 19:12:59 +00:00
[^DataInputStream s]
(let [type-id (.readByte s)]
(utils/case-eval
type-id
2013-06-12 18:14:46 +00:00
id-reader (read-string (String. (read-bytes s) "UTF-8"))
id-bytes (read-bytes s)
2012-07-06 19:12:59 +00:00
id-nil nil
id-boolean (.readBoolean s)
id-char (.readChar s)
2013-06-12 18:14:46 +00:00
id-string (String. (read-bytes s) "UTF-8")
2012-07-06 19:12:59 +00:00
id-keyword (keyword (.readUTF s))
id-queue (coll-thaw (PersistentQueue/EMPTY) s)
id-sorted-set (coll-thaw (sorted-set) s)
id-sorted-map (coll-thaw-kvs (sorted-map) s)
id-list (into '() (rseq (coll-thaw [] s)))
id-vector (coll-thaw [] s)
id-set (coll-thaw #{} s)
id-map (coll-thaw-kvs {} s)
id-coll (seq (coll-thaw [] s))
2012-07-06 19:12:59 +00:00
2013-06-12 18:14:46 +00:00
id-meta (let [m (thaw-from-stream s)] (with-meta (thaw-from-stream s) m))
2012-07-06 19:12:59 +00:00
id-byte (.readByte s)
id-short (.readShort s)
id-integer (.readInt s)
id-long (.readLong s)
2013-06-12 18:14:46 +00:00
id-bigint (bigint (read-biginteger s))
2012-07-06 19:12:59 +00:00
id-float (.readFloat s)
id-double (.readDouble s)
2013-06-12 18:14:46 +00:00
id-bigdec (BigDecimal. (read-biginteger s) (.readInt s))
2012-07-06 19:12:59 +00:00
2013-06-12 18:14:46 +00:00
id-ratio (/ (bigint (read-biginteger s))
(bigint (read-biginteger s)))
2012-07-06 19:12:59 +00:00
;;; DEPRECATED
id-old-reader (read-string (.readUTF s))
id-old-string (.readUTF s)
id-old-map (apply hash-map (utils/repeatedly-into [] (* 2 (.readInt s))
#(thaw-from-stream s)))
2012-07-06 19:12:59 +00:00
(throw (Exception. (str "Failed to thaw unknown type ID: " type-id))))))
(defn thaw
"Deserializes frozen bytes to their original Clojure data type. Enable
`:legacy-mode?` to read bytes written by Nippy < 2.x.
WARNING: Enabling `:read-eval?` can lead to security vulnerabilities unless
you are sure you know what you're doing."
[^bytes ba & [{:keys [read-eval? password compressor encryptor legacy-mode?
strict?]
:or {compressor compression/default-snappy-compressor
encryptor encryption/default-aes128-encryptor}}]]
(let [ex (fn [msg & [e]] (throw (Exception. (str "Thaw failed. " msg) e)))
thaw-data (fn [data-ba compressor password]
(let [ba data-ba
ba (if password (encryption/decrypt encryptor password ba) ba)
ba (if compressor (compression/decompress compressor ba) ba)
stream (DataInputStream. (ByteArrayInputStream. ba))]
(binding [*read-eval* read-eval?] (thaw-from-stream stream))))]
(if legacy-mode? ; Nippy < 2.x
(try (thaw-data ba compressor password)
(catch Exception e
(cond password (ex "Unencrypted data or wrong password?" e)
compressor (ex "Encrypted or uncompressed data?" e)
:else (ex "Encrypted and/or compressed data?" e))))
;; Nippy >= 2.x, we have a header!
(let [[[id-magic* id-header* id-comp* id-enc* _] data-ba]
(utils/ba-split ba 5)
compressed? (not (zero? id-comp*))
encrypted? (not (zero? id-enc*))]
(cond
(not= id-magic* id-nippy-magic-prefix)
(ex (str "Not Nippy data, data frozen with Nippy < 2.x, "
"or data may be corrupt?\n"
"Enable `:legacy-mode?` option for data frozen with Nippy < 2.x."))
(> id-header* id-nippy-header-ver)
(ex "Data frozen with newer Nippy version. Please upgrade.")
(and strict? (not encrypted?) password)
(ex (str "Data is not encrypted. Try again w/o password.\n"
"Disable `:strict?` option to ignore this error. "))
(and strict? (not compressed?) compressor)
(ex (str "Data is not compressed. Try again w/o compressor.\n"
"Disable `:strict?` option to ignore this error."))
(and encrypted? (not password))
(ex "Data is encrypted. Please try again with a password.")
(and encrypted? password
(not= id-enc* (encryption/header-id encryptor)))
(ex "Data encrypted with a different Encrypter.")
(and compressed? compressor
(not= id-comp* (compression/header-id compressor)))
(ex "Data compressed with a different Compressor.")
:else
(try (thaw-data data-ba (when compressed? compressor)
(when encrypted? password))
(catch Exception e
(if (and encrypted? password)
(ex "Wrong password, or data may be corrupt?" e)
(ex "Data may be corrupt?" e)))))))))
(comment (thaw (freeze "hello"))
(thaw (freeze "hello" {:compressor nil}))
(thaw (freeze "hello" {:compressor nil}) {:strict? true}) ; ex
(thaw (freeze "hello" {:password [:salted "p"]})) ; ex
(thaw (freeze "hello") {:password [:salted "p"]}))
2012-07-06 19:12:59 +00:00
2013-06-12 18:14:46 +00:00
;;;; Stress data
2012-07-06 19:12:59 +00:00
2013-06-12 17:15:16 +00:00
(def stress-data "Reference data used for tests & benchmarks."
(let []
{:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
:nil nil
:boolean true
:char-utf8 \ಬ
:string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
:string-long (apply str (range 1000))
:keyword :keyword
:keyword-ns ::keyword
:queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g))
:queue-empty (PersistentQueue/EMPTY)
:sorted-set (sorted-set 1 2 3 4 5)
:sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3)
:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10)))
:list-quoted '(1 2 3 4 5 (6 7 8 (9 10)))
:list-empty (list)
:vector [1 2 3 4 5 [6 7 8 [9 10]]]
:vector-empty []
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}}
:map-empty {}
:set #{1 2 3 4 5 #{6 7 8 #{9 10}}}
:set-empty #{}
:meta (with-meta {:a :A} {:metakey :metaval})
:coll (repeatedly 1000 rand)
:byte (byte 16)
:short (short 42)
:integer (int 3)
:long (long 3)
:bigint (bigint 31415926535897932384626433832795)
:float (float 3.14)
:double (double 3.14)
:bigdec (bigdec 3.1415926535897932384626433832795)
:ratio 22/7
2013-06-12 17:15:16 +00:00
;; Clojure 1.4+ tagged literals
:tagged-uuid (java.util.UUID/randomUUID)
2013-06-12 18:14:46 +00:00
:tagged-date (java.util.Date.)}))
;;;; Deprecated API
(defn freeze-to-bytes "DEPRECATED: Use `freeze` instead."
^bytes [x & {:keys [print-dup? compress? password]
:or {print-dup? true
compress? true}}]
(freeze x {:print-dup? print-dup?
:compressor (when compress? compression/default-snappy-compressor)
:password password
:legacy-mode? true}))
2013-06-12 18:14:46 +00:00
(defn thaw-from-bytes "DEPRECATED: Use `thaw` instead."
[ba & {:keys [read-eval? compressed? password]
:or {compressed? true}}]
(thaw ba {:read-eval? read-eval?
:compressor (when compressed? compression/default-snappy-compressor)
:password password
:legacy-mode? true}))