Rework v2 header format

This commit is contained in:
Peter Taoussanis 2013-06-13 22:40:44 +07:00
parent ae491e7767
commit f706a51a4d
6 changed files with 85 additions and 97 deletions

View file

@ -2,10 +2,10 @@ Current [semantic](http://semver.org/) version:
```clojure ```clojure
[com.taoensso/nippy "1.2.1"] ; Stable [com.taoensso/nippy "1.2.1"] ; Stable
[com.taoensso/nippy "2.0.0-alpha4"] ; EXPERIMENTAL! (see notes below) [com.taoensso/nippy "2.0.0-alpha5"] ; Development (testers only!)
``` ```
2.x adds pluggable compression, crypto support (also pluggable), an improved API (including much better error messages), and hugely improved performance. It **is backwards compatible**, but please note that the `freeze-to-bytes`/`thaw-from-bytes` API has been **deprecated** in favor of `freeze`/`thaw`. **CURRENTLY EXPERIMENTAL** - don't use 2.x yet for anything besides testing. 2.x adds pluggable compression, crypto support (also pluggable), an improved API (including much better error messages), and hugely improved performance. It **is backwards compatible**, but please note that the old `freeze-to-bytes`/`thaw-from-bytes` API has been **deprecated** in favor of `freeze`/`thaw`.
# Nippy, a Clojure serialization library # Nippy, a Clojure serialization library

View file

@ -1,4 +1,4 @@
(defproject com.taoensso/nippy "2.0.0-alpha4" (defproject com.taoensso/nippy "2.0.0-alpha5"
:description "Clojure serialization library" :description "Clojure serialization library"
:url "https://github.com/ptaoussanis/nippy" :url "https://github.com/ptaoussanis/nippy"
:license {:name "Eclipse Public License" :license {:name "Eclipse Public License"

View file

@ -15,14 +15,15 @@
;; TODO Allow ba or wrapped-ba input? ;; TODO Allow ba or wrapped-ba input?
;; TODO Provide ToFreeze, Frozen, Encrypted, etc. tooling helpers ;; TODO Provide ToFreeze, Frozen, Encrypted, etc. tooling helpers
;;;; Header IDs ;;;; Nippy 2.x+ header spec (4 bytes)
;; Nippy 2.x+ prefixes frozen data with a 5-byte header:
(def ^:const id-nippy-magic-prefix (byte 17)) (def ^:private ^:const head-version 1)
(def ^:const id-nippy-header-ver (byte 0)) (def ^:private head-sig (.getBytes "NPY" "UTF-8"))
;; * Compressor id (0 if no compressor) (def ^:private head-meta "Final byte stores version-dependent metadata."
;; * Encryptor id (0 if no encryptor) {(byte 0) {:version 1 :compressed? false :encrypted? false}
(def ^:const id-nippy-reserved (byte 0)) (byte 1) {:version 1 :compressed? true :encrypted? false}
(byte 2) {:version 1 :compressed? false :encrypted? true}
(byte 3) {:version 1 :compressed? true :encrypted? true}})
;;;; Data type IDs ;;;; Data type IDs
@ -171,14 +172,16 @@
;; Use Clojure's own reader as final fallback ;; Use Clojure's own reader as final fallback
(freezer Object id-reader (write-bytes s (.getBytes (pr-str x) "UTF-8"))) (freezer Object id-reader (write-bytes s (.getBytes (pr-str x) "UTF-8")))
(defn- wrap-nippy-header [data-ba compressor encryptor password] (def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta))
(let [header-ba (byte-array
[id-nippy-magic-prefix (defn- wrap-header [data-ba metadata]
id-nippy-header-ver (if-let [meta-id (head-meta-id (assoc metadata :version head-version))]
(byte (if compressor (compression/header-id compressor) 0)) (let [head-ba (utils/ba-concat head-sig (byte-array [meta-id]))]
(byte (if password (encryption/header-id encryptor) 0)) (utils/ba-concat head-ba data-ba))
id-nippy-reserved])] (throw (Exception. (str "Unrecognized header metadata: " metadata)))))
(utils/ba-concat header-ba data-ba)))
(comment (wrap-header (.getBytes "foo") {:compressed? true
:encrypted? false}))
(defn freeze (defn freeze
"Serializes arg (any Clojure data type) to a byte array. Set :legacy-mode to "Serializes arg (any Clojure data type) to a byte array. Set :legacy-mode to
@ -193,7 +196,9 @@
(let [ba (.toByteArray ba) (let [ba (.toByteArray ba)
ba (if compressor (compression/compress compressor ba) ba) ba (if compressor (compression/compress compressor ba) ba)
ba (if password (encryption/encrypt encryptor password ba) ba)] ba (if password (encryption/encrypt encryptor password ba) ba)]
(if legacy-mode ba (wrap-nippy-header ba compressor encryptor password))))) (if legacy-mode ba
(wrap-header ba {:compressed? (boolean compressor)
:encrypted? (boolean password)})))))
;;;; Thawing ;;;; Thawing
@ -258,18 +263,22 @@
(throw (Exception. (str "Failed to thaw unknown type ID: " type-id)))))) (throw (Exception. (str "Failed to thaw unknown type ID: " type-id))))))
(defn- try-parse-header [ba]
(when-let [[head-ba data-ba] (utils/ba-split ba 4)]
(let [[head-sig* [meta-id]] (utils/ba-split head-ba 3)]
(when (utils/ba= head-sig* head-sig)
[data-ba (head-meta meta-id {:unrecognized-header? true})]))))
(defn thaw (defn thaw
"Deserializes frozen bytes to their original Clojure data type. "Deserializes frozen bytes to their original Clojure data type.
:legacy-mode can be set to one of the following values: :legacy-mode options:
true - Read bytes as if written by Nippy < 2.x. false - Nippy >= 2.x data only (best).
false - Read bytes as if written by Nippy >= 2.x. true - Nippy < 2.x data only (deprecated).
:auto (default) - Try read bytes as if written by Nippy >= 2.x, :auto - Mixed data (default, migrating).
fall back to reading bytes as if written by Nippy < 2.x.
In most cases you'll want :auto if you're using a preexisting data set, and In most cases you'll want :auto if you're using a preexisting data set, and
`false` otherwise. Note that error message detail will be limited under the `false` otherwise.
:auto (default) mode.
WARNING: Enabling `:read-eval?` can lead to security vulnerabilities unless WARNING: Enabling `:read-eval?` can lead to security vulnerabilities unless
you are sure you know what you're doing." you are sure you know what you're doing."
@ -279,83 +288,65 @@
compressor compression/default-snappy-compressor compressor compression/default-snappy-compressor
encryptor encryption/default-aes128-encryptor}}]] encryptor encryption/default-aes128-encryptor}}]]
(let [ex (fn [msg & [e]] (throw (Exception. (str "Thaw failed. " msg) e))) (let [ex (fn [msg & [e]] (throw (Exception. (str "Thaw failed: " msg) e)))
thaw-data (fn [data-ba compressor password] try-thaw-data
(fn [data-ba {decompress? :compressed? decrypt? :encrypted?
:or {decompress? compressor
decrypt? password}
:as head-meta}]
(let [apparent-header? (not (empty? head-meta))]
(try
(let [ba data-ba (let [ba data-ba
ba (if password (encryption/decrypt encryptor password ba) ba) ba (if decrypt? (encryption/decrypt encryptor password ba) ba)
ba (if compressor (compression/decompress compressor ba) ba) ba (if decompress? (compression/decompress compressor ba) ba)
stream (DataInputStream. (ByteArrayInputStream. ba))] stream (DataInputStream. (ByteArrayInputStream. ba))]
(binding [*read-eval* read-eval?] (thaw-from-stream stream)))) (binding [*read-eval* read-eval?] (thaw-from-stream stream)))
maybe-headers
(fn []
(when-let [[[id-magic* & _ :as headers] data-ba] (utils/ba-split ba 5)]
(when (= id-magic* id-nippy-magic-prefix) ; Not a guarantee of correctness!
[headers data-ba])))
legacy-thaw
(fn [data-ba]
(try (thaw-data data-ba compressor password)
(catch Exception e (catch Exception e
(cond password (ex "Unencrypted data or wrong password?" e) (cond decrypt? (ex "Wrong password/encryptor?" e)
compressor (ex "Encrypted or uncompressed data?" e) decompress? (ex "Encrypted data or wrong compressor?" e)
:else (ex "Encrypted and/or compressed data?" e))))) :else (if apparent-header?
(ex "Corrupt data?" e)
modern-thaw (ex "Encrypted and/or compressed data?" e)))))))]
(fn [data-ba compressed? encrypted?]
(try (thaw-data data-ba (when compressed? compressor)
(when encrypted? password))
(catch Exception e
(if (and encrypted? password)
(ex "Wrong password, or data may be corrupt?" e)
(ex "Data may be corrupt?" e)))))]
(if (= legacy-mode true) (if (= legacy-mode true)
(legacy-thaw ba) ; Read as legacy, and only as legacy (try-thaw-data ba nil)
(if-let [[[_ id-header* id-comp* id-enc* _] data-ba] (maybe-headers)]
(let [compressed? (not (zero? id-comp*))
encrypted? (not (zero? id-enc*))]
(if-let [[data-ba {:keys [unrecognized-header? compressed? encrypted?]
:as head-meta}] (try-parse-header ba)]
(if (= legacy-mode :auto) (if (= legacy-mode :auto)
(try ; Header looks okay: try read as modern, fall back to legacy (try
(modern-thaw data-ba compressed? encrypted?) ;; Header seems okay, but we won't trust its metadata for
(catch Exception _ (legacy-thaw ba))) ;; error-reporting purposes
(try-thaw-data data-ba head-meta)
(cond ; Read as modern, and only as modern (catch Exception _ (try-thaw-data ba nil)))
(> id-header* id-nippy-header-ver)
(ex "Data frozen with newer Nippy version. Please upgrade.")
(cond ; Trust metadata, give fancy error messages
unrecognized-header?
(ex "Unrecognized header. Data frozen with newer Nippy version?")
(and strict? (not encrypted?) password) (and strict? (not encrypted?) password)
(ex (str "Data is not encrypted. Try again w/o password.\n" (ex (str "Unencrypted data. Try again w/o password.\n"
"Disable `:strict?` option to ignore this error. ")) "Disable `:strict?` option to ignore this error. "))
(and strict? (not compressed?) compressor) (and strict? (not compressed?) compressor)
(ex (str "Data is not compressed. Try again w/o compressor.\n" (ex (str "Uncompressed data. Try again w/o compressor.\n"
"Disable `:strict?` option to ignore this error.")) "Disable `:strict?` option to ignore this error."))
(and compressed? (not compressor))
(ex "Compressed data. Try again with compressor.")
(and encrypted? (not password)) (and encrypted? (not password))
(ex "Data is encrypted. Please try again with a password.") (ex "Encrypted data. Try again with password.")
:else (try-thaw-data data-ba head-meta)))
(and encrypted? password
(not= id-enc* (encryption/header-id encryptor)))
(ex "Data encrypted with a different Encrypter.")
(and compressed? compressor
(not= id-comp* (compression/header-id compressor)))
(ex "Data compressed with a different Compressor.")
:else (modern-thaw data-ba compressed? encrypted?))))
;; Header definitely not okay ;; Header definitely not okay
(if (= legacy-mode :auto) (if (= legacy-mode :auto)
(legacy-thaw ba) (try-thaw-data ba nil) ; Legacy thaw
(ex (str "Not Nippy data, data frozen with Nippy < 2.x, " (ex (str "Not Nippy data, data frozen with Nippy < 2.x, "
"or data may be corrupt?\n" "or corrupt data?\n"
"See `:legacy-mode` option for data frozen with Nippy < 2.x."))))))) "See `:legacy-mode` option for data frozen with Nippy < 2.x.")))))))
(comment (thaw (freeze "hello")) (comment (thaw (freeze "hello"))
(thaw (freeze "hello" {:compressor nil})) (thaw (freeze "hello" {:compressor nil}))
(thaw (freeze "hello" {:compressor nil}) {:strict? true}) ; ex (thaw (freeze "hello" {:compressor nil}) {:legacy-mode false
:strict? true}) ; ex
(thaw (freeze "hello" {:password [:salted "p"]})) ; ex (thaw (freeze "hello" {:password [:salted "p"]})) ; ex
(thaw (freeze "hello") {:password [:salted "p"]})) (thaw (freeze "hello") {:password [:salted "p"]}))

View file

@ -6,7 +6,6 @@
;;;; Interface ;;;; Interface
(defprotocol ICompressor (defprotocol ICompressor
(header-id [compressor]) ; Unique, >0, <= 128
(compress ^bytes [compressor ba]) (compress ^bytes [compressor ba])
(decompress ^bytes [compressor ba])) (decompress ^bytes [compressor ba]))
@ -14,7 +13,6 @@
(deftype DefaultSnappyCompressor [] (deftype DefaultSnappyCompressor []
ICompressor ICompressor
(header-id [_] 1)
(compress [_ ba] (org.iq80.snappy.Snappy/compress ba)) (compress [_ ba] (org.iq80.snappy.Snappy/compress ba))
(decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba)))) (decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba))))

View file

@ -8,7 +8,6 @@
;;;; Interface ;;;; Interface
(defprotocol IEncryptor (defprotocol IEncryptor
(header-id [encryptor]) ; Unique, >0, <= 128
(encrypt ^bytes [encryptor pwd ba]) (encrypt ^bytes [encryptor pwd ba])
(decrypt ^bytes [encryptor pwd ba])) (decrypt ^bytes [encryptor pwd ba]))
@ -67,8 +66,6 @@
(defrecord DefaultAES128Encryptor [key-cache] (defrecord DefaultAES128Encryptor [key-cache]
IEncryptor IEncryptor
(header-id [_] 1)
(encrypt [this typed-pwd data-ba] (encrypt [this typed-pwd data-ba]
(let [[type pwd] (destructure-typed-pwd typed-pwd) (let [[type pwd] (destructure-typed-pwd typed-pwd)
salt? (= type :salted) salt? (= type :salted)

View file

@ -69,6 +69,8 @@
(comment (memoized nil +) (comment (memoized nil +)
(memoized nil + 5 12)) (memoized nil + 5 12))
(defn ba= [^bytes x ^bytes y] (java.util.Arrays/equals x y))
(defn ba-concat ^bytes [^bytes ba1 ^bytes ba2] (defn ba-concat ^bytes [^bytes ba1 ^bytes ba2]
(let [s1 (alength ba1) (let [s1 (alength ba1)
s2 (alength ba2) s2 (alength ba2)