Rework v2 header format

This commit is contained in:
Peter Taoussanis 2013-06-13 22:40:44 +07:00
parent ae491e7767
commit f706a51a4d
6 changed files with 85 additions and 97 deletions

View file

@ -2,10 +2,10 @@ Current [semantic](http://semver.org/) version:
```clojure ```clojure
[com.taoensso/nippy "1.2.1"] ; Stable [com.taoensso/nippy "1.2.1"] ; Stable
[com.taoensso/nippy "2.0.0-alpha4"] ; EXPERIMENTAL! (see notes below) [com.taoensso/nippy "2.0.0-alpha5"] ; Development (testers only!)
``` ```
2.x adds pluggable compression, crypto support (also pluggable), an improved API (including much better error messages), and hugely improved performance. It **is backwards compatible**, but please note that the `freeze-to-bytes`/`thaw-from-bytes` API has been **deprecated** in favor of `freeze`/`thaw`. **CURRENTLY EXPERIMENTAL** - don't use 2.x yet for anything besides testing. 2.x adds pluggable compression, crypto support (also pluggable), an improved API (including much better error messages), and hugely improved performance. It **is backwards compatible**, but please note that the old `freeze-to-bytes`/`thaw-from-bytes` API has been **deprecated** in favor of `freeze`/`thaw`.
# Nippy, a Clojure serialization library # Nippy, a Clojure serialization library

View file

@ -1,4 +1,4 @@
(defproject com.taoensso/nippy "2.0.0-alpha4" (defproject com.taoensso/nippy "2.0.0-alpha5"
:description "Clojure serialization library" :description "Clojure serialization library"
:url "https://github.com/ptaoussanis/nippy" :url "https://github.com/ptaoussanis/nippy"
:license {:name "Eclipse Public License" :license {:name "Eclipse Public License"

View file

@ -15,14 +15,15 @@
;; TODO Allow ba or wrapped-ba input? ;; TODO Allow ba or wrapped-ba input?
;; TODO Provide ToFreeze, Frozen, Encrypted, etc. tooling helpers ;; TODO Provide ToFreeze, Frozen, Encrypted, etc. tooling helpers
;;;; Header IDs ;;;; Nippy 2.x+ header spec (4 bytes)
;; Nippy 2.x+ prefixes frozen data with a 5-byte header:
(def ^:const id-nippy-magic-prefix (byte 17)) (def ^:private ^:const head-version 1)
(def ^:const id-nippy-header-ver (byte 0)) (def ^:private head-sig (.getBytes "NPY" "UTF-8"))
;; * Compressor id (0 if no compressor) (def ^:private head-meta "Final byte stores version-dependent metadata."
;; * Encryptor id (0 if no encryptor) {(byte 0) {:version 1 :compressed? false :encrypted? false}
(def ^:const id-nippy-reserved (byte 0)) (byte 1) {:version 1 :compressed? true :encrypted? false}
(byte 2) {:version 1 :compressed? false :encrypted? true}
(byte 3) {:version 1 :compressed? true :encrypted? true}})
;;;; Data type IDs ;;;; Data type IDs
@ -171,14 +172,16 @@
;; Use Clojure's own reader as final fallback ;; Use Clojure's own reader as final fallback
(freezer Object id-reader (write-bytes s (.getBytes (pr-str x) "UTF-8"))) (freezer Object id-reader (write-bytes s (.getBytes (pr-str x) "UTF-8")))
(defn- wrap-nippy-header [data-ba compressor encryptor password] (def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta))
(let [header-ba (byte-array
[id-nippy-magic-prefix (defn- wrap-header [data-ba metadata]
id-nippy-header-ver (if-let [meta-id (head-meta-id (assoc metadata :version head-version))]
(byte (if compressor (compression/header-id compressor) 0)) (let [head-ba (utils/ba-concat head-sig (byte-array [meta-id]))]
(byte (if password (encryption/header-id encryptor) 0)) (utils/ba-concat head-ba data-ba))
id-nippy-reserved])] (throw (Exception. (str "Unrecognized header metadata: " metadata)))))
(utils/ba-concat header-ba data-ba)))
(comment (wrap-header (.getBytes "foo") {:compressed? true
:encrypted? false}))
(defn freeze (defn freeze
"Serializes arg (any Clojure data type) to a byte array. Set :legacy-mode to "Serializes arg (any Clojure data type) to a byte array. Set :legacy-mode to
@ -193,7 +196,9 @@
(let [ba (.toByteArray ba) (let [ba (.toByteArray ba)
ba (if compressor (compression/compress compressor ba) ba) ba (if compressor (compression/compress compressor ba) ba)
ba (if password (encryption/encrypt encryptor password ba) ba)] ba (if password (encryption/encrypt encryptor password ba) ba)]
(if legacy-mode ba (wrap-nippy-header ba compressor encryptor password))))) (if legacy-mode ba
(wrap-header ba {:compressed? (boolean compressor)
:encrypted? (boolean password)})))))
;;;; Thawing ;;;; Thawing
@ -258,18 +263,22 @@
(throw (Exception. (str "Failed to thaw unknown type ID: " type-id)))))) (throw (Exception. (str "Failed to thaw unknown type ID: " type-id))))))
(defn- try-parse-header [ba]
(when-let [[head-ba data-ba] (utils/ba-split ba 4)]
(let [[head-sig* [meta-id]] (utils/ba-split head-ba 3)]
(when (utils/ba= head-sig* head-sig)
[data-ba (head-meta meta-id {:unrecognized-header? true})]))))
(defn thaw (defn thaw
"Deserializes frozen bytes to their original Clojure data type. "Deserializes frozen bytes to their original Clojure data type.
:legacy-mode can be set to one of the following values: :legacy-mode options:
true - Read bytes as if written by Nippy < 2.x. false - Nippy >= 2.x data only (best).
false - Read bytes as if written by Nippy >= 2.x. true - Nippy < 2.x data only (deprecated).
:auto (default) - Try read bytes as if written by Nippy >= 2.x, :auto - Mixed data (default, migrating).
fall back to reading bytes as if written by Nippy < 2.x.
In most cases you'll want :auto if you're using a preexisting data set, and In most cases you'll want :auto if you're using a preexisting data set, and
`false` otherwise. Note that error message detail will be limited under the `false` otherwise.
:auto (default) mode.
WARNING: Enabling `:read-eval?` can lead to security vulnerabilities unless WARNING: Enabling `:read-eval?` can lead to security vulnerabilities unless
you are sure you know what you're doing." you are sure you know what you're doing."
@ -279,83 +288,65 @@
compressor compression/default-snappy-compressor compressor compression/default-snappy-compressor
encryptor encryption/default-aes128-encryptor}}]] encryptor encryption/default-aes128-encryptor}}]]
(let [ex (fn [msg & [e]] (throw (Exception. (str "Thaw failed. " msg) e))) (let [ex (fn [msg & [e]] (throw (Exception. (str "Thaw failed: " msg) e)))
thaw-data (fn [data-ba compressor password] try-thaw-data
(let [ba data-ba (fn [data-ba {decompress? :compressed? decrypt? :encrypted?
ba (if password (encryption/decrypt encryptor password ba) ba) :or {decompress? compressor
ba (if compressor (compression/decompress compressor ba) ba) decrypt? password}
stream (DataInputStream. (ByteArrayInputStream. ba))] :as head-meta}]
(binding [*read-eval* read-eval?] (thaw-from-stream stream)))) (let [apparent-header? (not (empty? head-meta))]
(try
maybe-headers (let [ba data-ba
(fn [] ba (if decrypt? (encryption/decrypt encryptor password ba) ba)
(when-let [[[id-magic* & _ :as headers] data-ba] (utils/ba-split ba 5)] ba (if decompress? (compression/decompress compressor ba) ba)
(when (= id-magic* id-nippy-magic-prefix) ; Not a guarantee of correctness! stream (DataInputStream. (ByteArrayInputStream. ba))]
[headers data-ba]))) (binding [*read-eval* read-eval?] (thaw-from-stream stream)))
(catch Exception e
legacy-thaw (cond decrypt? (ex "Wrong password/encryptor?" e)
(fn [data-ba] decompress? (ex "Encrypted data or wrong compressor?" e)
(try (thaw-data data-ba compressor password) :else (if apparent-header?
(catch Exception e (ex "Corrupt data?" e)
(cond password (ex "Unencrypted data or wrong password?" e) (ex "Encrypted and/or compressed data?" e)))))))]
compressor (ex "Encrypted or uncompressed data?" e)
:else (ex "Encrypted and/or compressed data?" e)))))
modern-thaw
(fn [data-ba compressed? encrypted?]
(try (thaw-data data-ba (when compressed? compressor)
(when encrypted? password))
(catch Exception e
(if (and encrypted? password)
(ex "Wrong password, or data may be corrupt?" e)
(ex "Data may be corrupt?" e)))))]
(if (= legacy-mode true) (if (= legacy-mode true)
(legacy-thaw ba) ; Read as legacy, and only as legacy (try-thaw-data ba nil)
(if-let [[[_ id-header* id-comp* id-enc* _] data-ba] (maybe-headers)]
(let [compressed? (not (zero? id-comp*))
encrypted? (not (zero? id-enc*))]
(if (= legacy-mode :auto)
(try ; Header looks okay: try read as modern, fall back to legacy
(modern-thaw data-ba compressed? encrypted?)
(catch Exception _ (legacy-thaw ba)))
(cond ; Read as modern, and only as modern (if-let [[data-ba {:keys [unrecognized-header? compressed? encrypted?]
(> id-header* id-nippy-header-ver) :as head-meta}] (try-parse-header ba)]
(ex "Data frozen with newer Nippy version. Please upgrade.") (if (= legacy-mode :auto)
(try
;; Header seems okay, but we won't trust its metadata for
;; error-reporting purposes
(try-thaw-data data-ba head-meta)
(catch Exception _ (try-thaw-data ba nil)))
(and strict? (not encrypted?) password) (cond ; Trust metadata, give fancy error messages
(ex (str "Data is not encrypted. Try again w/o password.\n" unrecognized-header?
"Disable `:strict?` option to ignore this error. ")) (ex "Unrecognized header. Data frozen with newer Nippy version?")
(and strict? (not encrypted?) password)
(and strict? (not compressed?) compressor) (ex (str "Unencrypted data. Try again w/o password.\n"
(ex (str "Data is not compressed. Try again w/o compressor.\n" "Disable `:strict?` option to ignore this error. "))
"Disable `:strict?` option to ignore this error.")) (and strict? (not compressed?) compressor)
(ex (str "Uncompressed data. Try again w/o compressor.\n"
(and encrypted? (not password)) "Disable `:strict?` option to ignore this error."))
(ex "Data is encrypted. Please try again with a password.") (and compressed? (not compressor))
(ex "Compressed data. Try again with compressor.")
(and encrypted? password (and encrypted? (not password))
(not= id-enc* (encryption/header-id encryptor))) (ex "Encrypted data. Try again with password.")
(ex "Data encrypted with a different Encrypter.") :else (try-thaw-data data-ba head-meta)))
(and compressed? compressor
(not= id-comp* (compression/header-id compressor)))
(ex "Data compressed with a different Compressor.")
:else (modern-thaw data-ba compressed? encrypted?))))
;; Header definitely not okay ;; Header definitely not okay
(if (= legacy-mode :auto) (if (= legacy-mode :auto)
(legacy-thaw ba) (try-thaw-data ba nil) ; Legacy thaw
(ex (str "Not Nippy data, data frozen with Nippy < 2.x, " (ex (str "Not Nippy data, data frozen with Nippy < 2.x, "
"or data may be corrupt?\n" "or corrupt data?\n"
"See `:legacy-mode` option for data frozen with Nippy < 2.x."))))))) "See `:legacy-mode` option for data frozen with Nippy < 2.x.")))))))
(comment (thaw (freeze "hello")) (comment (thaw (freeze "hello"))
(thaw (freeze "hello" {:compressor nil})) (thaw (freeze "hello" {:compressor nil}))
(thaw (freeze "hello" {:compressor nil}) {:strict? true}) ; ex (thaw (freeze "hello" {:compressor nil}) {:legacy-mode false
:strict? true}) ; ex
(thaw (freeze "hello" {:password [:salted "p"]})) ; ex (thaw (freeze "hello" {:password [:salted "p"]})) ; ex
(thaw (freeze "hello") {:password [:salted "p"]})) (thaw (freeze "hello") {:password [:salted "p"]}))

View file

@ -6,7 +6,6 @@
;;;; Interface ;;;; Interface
(defprotocol ICompressor (defprotocol ICompressor
(header-id [compressor]) ; Unique, >0, <= 128
(compress ^bytes [compressor ba]) (compress ^bytes [compressor ba])
(decompress ^bytes [compressor ba])) (decompress ^bytes [compressor ba]))
@ -14,7 +13,6 @@
(deftype DefaultSnappyCompressor [] (deftype DefaultSnappyCompressor []
ICompressor ICompressor
(header-id [_] 1)
(compress [_ ba] (org.iq80.snappy.Snappy/compress ba)) (compress [_ ba] (org.iq80.snappy.Snappy/compress ba))
(decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba)))) (decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba))))

View file

@ -8,9 +8,8 @@
;;;; Interface ;;;; Interface
(defprotocol IEncryptor (defprotocol IEncryptor
(header-id [encryptor]) ; Unique, >0, <= 128 (encrypt ^bytes [encryptor pwd ba])
(encrypt ^bytes [encryptor pwd ba]) (decrypt ^bytes [encryptor pwd ba]))
(decrypt ^bytes [encryptor pwd ba]))
;;;; Default digests, ciphers, etc. ;;;; Default digests, ciphers, etc.
@ -67,8 +66,6 @@
(defrecord DefaultAES128Encryptor [key-cache] (defrecord DefaultAES128Encryptor [key-cache]
IEncryptor IEncryptor
(header-id [_] 1)
(encrypt [this typed-pwd data-ba] (encrypt [this typed-pwd data-ba]
(let [[type pwd] (destructure-typed-pwd typed-pwd) (let [[type pwd] (destructure-typed-pwd typed-pwd)
salt? (= type :salted) salt? (= type :salted)

View file

@ -69,6 +69,8 @@
(comment (memoized nil +) (comment (memoized nil +)
(memoized nil + 5 12)) (memoized nil + 5 12))
(defn ba= [^bytes x ^bytes y] (java.util.Arrays/equals x y))
(defn ba-concat ^bytes [^bytes ba1 ^bytes ba2] (defn ba-concat ^bytes [^bytes ba1 ^bytes ba2]
(let [s1 (alength ba1) (let [s1 (alength ba1)
s2 (alength ba2) s2 (alength ba2)