(ns taoensso.nippy "Simple, high-performance Clojure serialization library. Originally adapted from Deep-Freeze." {:author "Peter Taoussanis"} (:require [taoensso.nippy (utils :as utils) (compression :as compression :refer (snappy-compressor)) (encryption :as encryption :refer (aes128-encryptor))]) (:import [java.io DataInputStream DataOutputStream ByteArrayOutputStream ByteArrayInputStream] [clojure.lang Keyword BigInt Ratio PersistentQueue PersistentTreeMap PersistentTreeSet IPersistentList IPersistentVector IPersistentMap IPersistentSet IPersistentCollection])) ;;;; Nippy 2.x+ header spec (4 bytes) (def ^:private ^:const head-version 1) (def ^:private head-sig (.getBytes "NPY" "UTF-8")) (def ^:private ^:const head-meta "Final byte stores version-dependent metadata." {(byte 0) {:version 1 :compressed? false :encrypted? false} (byte 1) {:version 1 :compressed? true :encrypted? false} (byte 2) {:version 1 :compressed? false :encrypted? true} (byte 3) {:version 1 :compressed? true :encrypted? true}}) ;;;; Data type IDs ;; **Negative ids reserved for user-defined types** (def ^:const id-reserved (int 0)) ;; 1 (def ^:const id-bytes (int 2)) (def ^:const id-nil (int 3)) (def ^:const id-boolean (int 4)) (def ^:const id-reader (int 5)) ; Fallback: *print-dup* pr-str output (def ^:const id-char (int 10)) ;; 11 ;; 12 (def ^:const id-string (int 13)) (def ^:const id-keyword (int 14)) (def ^:const id-list (int 20)) (def ^:const id-vector (int 21)) ;; 22 (def ^:const id-set (int 23)) (def ^:const id-coll (int 24)) ; Fallback: non-specific collection (def ^:const id-meta (int 25)) (def ^:const id-queue (int 26)) (def ^:const id-map (int 27)) (def ^:const id-sorted-set (int 28)) (def ^:const id-sorted-map (int 29)) (def ^:const id-byte (int 40)) (def ^:const id-short (int 41)) (def ^:const id-integer (int 42)) (def ^:const id-long (int 43)) (def ^:const id-bigint (int 44)) (def ^:const id-float (int 60)) (def ^:const id-double (int 61)) (def ^:const id-bigdec (int 62)) (def ^:const id-ratio (int 70)) ;;; DEPRECATED (old types will be supported only for thawing) (def ^:const id-old-reader (int 1)) ; as of 0.9.2, for +64k support (def ^:const id-old-string (int 11)) ; as of 0.9.2, for +64k support (def ^:const id-old-map (int 22)) ; as of 0.9.0, for more efficient thaw (def ^:const id-old-keyword (int 12)) ; as of 2.0.0-alpha5, for str consistecy ;;;; Freezing (defprotocol Freezable (freeze-to-stream* [this stream])) (defmacro write-id [s id] `(.writeByte ~s ~id)) (defmacro ^:private write-bytes [s ba] `(let [s# ~s ba# ~ba] (let [size# (alength ba#)] (.writeInt s# size#) (.write s# ba# 0 size#)))) (defmacro ^:private write-biginteger [s x] `(write-bytes ~s (.toByteArray ~x))) (defmacro ^:private write-utf8 [s x] `(write-bytes ~s (.getBytes ~x "UTF-8"))) (defmacro ^:private freeze-to-stream "Like `freeze-to-stream*` but with metadata support." [s x] `(let [x# ~x s# ~s] (when-let [m# (meta x#)] (write-id s# ~id-meta) (freeze-to-stream* m# s#)) (try (freeze-to-stream* x# s#) (catch java.lang.IllegalArgumentException _# ;; Use Clojure reader as final fallback (after custom extensions) (write-id s# id-reader) (write-bytes s# (.getBytes (pr-str x#) "UTF-8")))))) (defn freeze-to-stream! "Low-level API. Serializes arg (any Clojure data type) to a DataOutputStream." [^DataOutputStream data-output-stream x & [{:keys [print-dup?] :or {print-dup? true}}]] (binding [*print-dup* print-dup?] (freeze-to-stream data-output-stream x))) (defmacro ^:private freezer "Helper to extend Freezable protocol." [type id & body] `(extend-type ~type Freezable (~'freeze-to-stream* [~'x ~(with-meta 's {:tag 'DataOutputStream})] (write-id ~'s ~id) ~@body))) (defmacro custom-freezer "Helper to extend Freezable protocol to custom types with id ∈[1, 128]: (defrecord MyType [data]) (custom-freezer MyType 1 x s (.writeUTF s (:data x)))" [type id x data-output-stream & body] (assert (and (>= id 1) (<= id 128))) `(extend-type ~type Freezable (~'freeze-to-stream* [~x ~(with-meta data-output-stream {:tag 'DataOutputStream})] (write-id ~data-output-stream ~(int (- id))) ~@body))) (comment (defrecord MyType [data]) (custom-freezer MyType 1 x s (.writeUTF s (:data x)))) (defmacro ^:private coll-freezer "Extends Freezable to simple collection types." [type id & body] `(freezer ~type ~id (.writeInt ~'s (count ~'x)) (doseq [i# ~'x] (freeze-to-stream ~'s i#)))) (defmacro ^:private kv-freezer "Extends Freezable to key-value collection types." [type id & body] `(freezer ~type ~id (.writeInt ~'s (* 2 (count ~'x))) (doseq [[k# v#] ~'x] (freeze-to-stream ~'s k#) (freeze-to-stream ~'s v#)))) (freezer (Class/forName "[B") id-bytes (write-bytes s ^bytes x)) (freezer nil id-nil) (freezer Boolean id-boolean (.writeBoolean s x)) (freezer Character id-char (.writeChar s (int x))) (freezer String id-string (write-utf8 s x)) (freezer Keyword id-keyword (write-utf8 s (if-let [ns (namespace x)] (str ns "/" (name x)) (name x)))) (coll-freezer PersistentQueue id-queue) (coll-freezer PersistentTreeSet id-sorted-set) (kv-freezer PersistentTreeMap id-sorted-map) (coll-freezer IPersistentList id-list) (coll-freezer IPersistentVector id-vector) (coll-freezer IPersistentSet id-set) (kv-freezer IPersistentMap id-map) (coll-freezer IPersistentCollection id-coll) ; Must be LAST collection freezer! (freezer Byte id-byte (.writeByte s x)) (freezer Short id-short (.writeShort s x)) (freezer Integer id-integer (.writeInt s x)) (freezer Long id-long (.writeLong s x)) (freezer BigInt id-bigint (write-biginteger s (.toBigInteger x))) (freezer BigInteger id-bigint (write-biginteger s x)) (freezer Float id-float (.writeFloat s x)) (freezer Double id-double (.writeDouble s x)) (freezer BigDecimal id-bigdec (write-biginteger s (.unscaledValue x)) (.writeInt s (.scale x))) (freezer Ratio id-ratio (write-biginteger s (.numerator x)) (write-biginteger s (.denominator x))) (def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta)) (defn- wrap-header [data-ba metadata] (if-let [meta-id (head-meta-id (assoc metadata :version head-version))] (let [head-ba (utils/ba-concat head-sig (byte-array [meta-id]))] (utils/ba-concat head-ba data-ba)) (throw (Exception. (str "Unrecognized header metadata: " metadata))))) (comment (wrap-header (.getBytes "foo") {:compressed? true :encrypted? false})) (declare assert-legacy-args) (defn freeze "Serializes arg (any Clojure data type) to a byte array. Set :legacy-mode to true to produce bytes readble by Nippy < 2.x. For custom types extend the Clojure reader or see `custom-freezer`." ^bytes [x & [{:keys [print-dup? password compressor encryptor legacy-mode] :or {print-dup? true compressor snappy-compressor encryptor aes128-encryptor}}]] (when legacy-mode (assert-legacy-args compressor password)) (let [ba (ByteArrayOutputStream.) stream (DataOutputStream. ba)] (freeze-to-stream! stream x {:print-dup? print-dup?}) (let [ba (.toByteArray ba) ba (if compressor (compression/compress compressor ba) ba) ba (if password (encryption/encrypt encryptor password ba) ba)] (if legacy-mode ba (wrap-header ba {:compressed? (boolean compressor) :encrypted? (boolean password)}))))) ;;;; Thawing (declare thaw-from-stream) (defmacro ^:private read-bytes [s] `(let [s# ~s size# (.readInt s#) ba# (byte-array size#)] (.read s# ba# 0 size#) ba#)) (defmacro ^:private read-biginteger [s] `(BigInteger. (read-bytes ~s))) (defmacro ^:private read-utf8 [s] `(String. (read-bytes ~s) "UTF-8")) (defmacro ^:private coll-thaw "Thaws simple collection types." [s coll] `(let [s# ~s] (utils/repeatedly-into ~coll (.readInt s#) (thaw-from-stream s#)))) (defmacro ^:private coll-thaw-kvs "Thaws key-value collection types." [s coll] `(let [s# ~s] (utils/repeatedly-into ~coll (/ (.readInt s#) 2) [(thaw-from-stream s#) (thaw-from-stream s#)]))) (defn- thaw-from-stream [^DataInputStream s & [readers]] (let [type-id (.readByte s)] (utils/case-eval type-id id-reader (read-string (read-utf8 s)) id-bytes (read-bytes s) id-nil nil id-boolean (.readBoolean s) id-char (.readChar s) id-string (read-utf8 s) id-keyword (keyword (read-utf8 s)) id-queue (coll-thaw s (PersistentQueue/EMPTY)) id-sorted-set (coll-thaw s (sorted-set)) id-sorted-map (coll-thaw-kvs s (sorted-map)) id-list (into '() (rseq (coll-thaw s []))) id-vector (coll-thaw s []) id-set (coll-thaw s #{}) id-map (coll-thaw-kvs s {}) id-coll (seq (coll-thaw s [])) id-meta (let [m (thaw-from-stream s)] (with-meta (thaw-from-stream s) m)) id-byte (.readByte s) id-short (.readShort s) id-integer (.readInt s) id-long (.readLong s) id-bigint (bigint (read-biginteger s)) id-float (.readFloat s) id-double (.readDouble s) id-bigdec (BigDecimal. (read-biginteger s) (.readInt s)) id-ratio (/ (bigint (read-biginteger s)) (bigint (read-biginteger s))) ;;; DEPRECATED id-old-reader (read-string (.readUTF s)) id-old-string (.readUTF s) id-old-map (apply hash-map (utils/repeatedly-into [] (* 2 (.readInt s)) (thaw-from-stream s))) id-old-keyword (keyword (.readUTF s)) ;;; Custom types (or (when-let [reader (get readers (- type-id))] (try (reader s) (catch Exception e (throw (Exception. (str "Reader exception for custom type ID: " (- type-id)) e))))) (if (neg? type-id) (throw (Exception. (str "No reader provided for custom type ID: " (- type-id)))) (throw (Exception. (str "Unknown type ID: " type-id)))))))) (defn thaw-from-stream! "Low-level API. Deserializes a frozen object from given DataInputStream to its original Clojure data type." [data-input-stream & [{:keys [read-eval? readers]}]] (binding [*read-eval* read-eval?] (thaw-from-stream data-input-stream readers))) (defn- try-parse-header [ba] (when-let [[head-ba data-ba] (utils/ba-split ba 4)] (let [[head-sig* [meta-id]] (utils/ba-split head-ba 3)] (when (utils/ba= head-sig* head-sig) [data-ba (head-meta meta-id {:unrecognized-header? true})])))) (defn thaw "Deserializes a frozen object from given byte array to its original Clojure data type. Supports data frozen with current and all previous versions of Nippy. For custom `Freezable` types provide a `:readers` arg: (thaw (freeze (MyType. \"Joe\")) {:readers {1 (fn [^DataInputStream stream] (.readUTF stream))}}) WARNING: Enabling `:read-eval?` can lead to security vulnerabilities unless you are sure you know what you're doing." [^bytes ba & [{:keys [read-eval? password compressor encryptor legacy-opts readers] :or {legacy-opts {:compressed? true} compressor snappy-compressor encryptor aes128-encryptor} :as opts}]] (let [ex (fn [msg & [e]] (throw (Exception. (str "Thaw failed: " msg) e))) try-thaw-data (fn [data-ba {:keys [compressed? encrypted?] :as head-meta}] (let [password (when encrypted? password) ; => also head-meta compressor (if head-meta (when compressed? compressor) (when (:compressed? legacy-opts) snappy-compressor))] (try (let [ba data-ba ba (if password (encryption/decrypt encryptor password ba) ba) ba (if compressor (compression/decompress compressor ba) ba) stream (DataInputStream. (ByteArrayInputStream. ba))] (thaw-from-stream! stream {:read-eval? read-eval? :readers readers})) (catch Exception e (cond password (ex "Wrong password/encryptor?" e) compressor (if head-meta (ex "Encrypted data or wrong compressor?" e) (ex "Uncompressed data?" e)) :else (if head-meta (ex "Corrupt data?" e) (ex "Compressed data?" e)))))))] (if-let [[data-ba {:keys [unrecognized-header? compressed? encrypted?] :as head-meta}] (try-parse-header ba)] (cond ; Header _appears_ okay (and (not legacy-opts) unrecognized-header?) ; Conservative (ex "Unrecognized header. Data frozen with newer Nippy version?") (and compressed? (not compressor)) (ex "Compressed data. Try again with compressor.") (and encrypted? (not password)) (if (::tools-thaw? opts) ::need-password (ex "Encrypted data. Try again with password.")) :else (try (try-thaw-data data-ba head-meta) (catch Exception e (if legacy-opts (try (try-thaw-data ba nil) (catch Exception _ (throw e))) (throw e))))) ;; Header definitely not okay (if legacy-opts (try-thaw-data ba nil) (ex "Unfrozen or corrupt data?"))))) (comment (thaw (freeze "hello")) (thaw (freeze "hello" {:compressor nil})) (thaw (freeze "hello" {:password [:salted "p"]})) ; ex (thaw (freeze "hello") {:password [:salted "p"]})) ;;;; Stress data (def stress-data "Reference data used for tests & benchmarks." (let [] {:bytes (byte-array [(byte 1) (byte 2) (byte 3)]) :nil nil :boolean true :char-utf8 \ಬ :string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" :string-long (apply str (range 1000)) :keyword :keyword :keyword-ns ::keyword :queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g)) :queue-empty (PersistentQueue/EMPTY) :sorted-set (sorted-set 1 2 3 4 5) :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10))) :list-quoted '(1 2 3 4 5 (6 7 8 (9 10))) :list-empty (list) :vector [1 2 3 4 5 [6 7 8 [9 10]]] :vector-empty [] :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}} :map-empty {} :set #{1 2 3 4 5 #{6 7 8 #{9 10}}} :set-empty #{} :meta (with-meta {:a :A} {:metakey :metaval}) :coll (repeatedly 1000 rand) :byte (byte 16) :short (short 42) :integer (int 3) :long (long 3) :bigint (bigint 31415926535897932384626433832795) :float (float 3.14) :double (double 3.14) :bigdec (bigdec 3.1415926535897932384626433832795) :ratio 22/7 ;; Clojure 1.4+ tagged literals :tagged-uuid (java.util.UUID/randomUUID) :tagged-date (java.util.Date.)})) ;;;; Deprecated API (defn- assert-legacy-args [compressor password] (when password (throw (AssertionError. "Encryption not supported in legacy mode."))) (when (and compressor (not= compressor snappy-compressor)) (throw (AssertionError. "Only Snappy compressor supported in legacy mode.")))) (defn freeze-to-bytes "DEPRECATED: Use `freeze` instead." ^bytes [x & {:keys [print-dup? compress?] :or {print-dup? true compress? true}}] (freeze x {:legacy-mode true :print-dup? print-dup? :compressor (when compress? snappy-compressor) :password nil})) (defn thaw-from-bytes "DEPRECATED: Use `thaw` instead." [ba & {:keys [read-eval? compressed?] :or {compressed? true}}] (thaw ba {:legacy-opts {:compressed? compressed?} :read-eval? read-eval? :password nil}))