Further work on freezer fallback reliability

This commit is contained in:
Peter Taoussanis 2013-12-06 18:40:13 +07:00
parent 4b1f024b8b
commit 5ea4af7356
3 changed files with 147 additions and 106 deletions

View file

@ -1,16 +1,14 @@
## v2.5.2 / 2013-12-07
* Test Serializable objects at freeze time for better reliability.
* Don't cache `serializable?`/`readable?` for types with gensym-style names (e.g. as used for anonymous fns, etc.).
* Failed serialized/reader thaws will try return what they can (e.g. unreadable string) instead of just throwing.
* Thaw error messages now include failing type-id.
## v2.5.1 / 2013-12-03 ## v2.5.1 / 2013-12-03
Improved `freeze` fallback handling. This is a recommended upgrade for all users and a necessary upgrade for Timbre v3 Carmine appender users.
### Features
* Added experimental `inspect-ba` fn for examining data possibly frozen by Nippy. * Added experimental `inspect-ba` fn for examining data possibly frozen by Nippy.
### Changes
* Now throw exception at freeze (rather than thaw) time when trying to serialize an unreadable object using the Clojure reader. * Now throw exception at freeze (rather than thaw) time when trying to serialize an unreadable object using the Clojure reader.
### Fixes
-
## v2.4.1 → v2.5.0 ## v2.4.1 → v2.5.0
* Refactored standard Freezable protocol implementations to de-emphasise interfaces as a matter of hygiene, Ref. http://goo.gl/IFXzvh. * Refactored standard Freezable protocol implementations to de-emphasise interfaces as a matter of hygiene, Ref. http://goo.gl/IFXzvh.

View file

@ -8,7 +8,7 @@
(compression :as compression :refer (snappy-compressor)) (compression :as compression :refer (snappy-compressor))
(encryption :as encryption :refer (aes128-encryptor))]) (encryption :as encryption :refer (aes128-encryptor))])
(:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream (:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream
DataOutputStream Serializable] DataOutputStream Serializable ObjectOutputStream ObjectInputStream]
[java.lang.reflect Method] [java.lang.reflect Method]
[java.util Date UUID] [java.util Date UUID]
[clojure.lang Keyword BigInt Ratio [clojure.lang Keyword BigInt Ratio
@ -26,7 +26,7 @@
(byte 2) {:version 1 :compressed? false :encrypted? true} (byte 2) {:version 1 :compressed? false :encrypted? true}
(byte 3) {:version 1 :compressed? true :encrypted? true}}) (byte 3) {:version 1 :compressed? true :encrypted? true}})
(def ^:private ^:const debug-mode? false) (defmacro when-debug-mode [& body] (when #_true false `(do ~@body)))
;;;; Data type IDs ;;;; Data type IDs
@ -114,8 +114,9 @@
(defmacro ^:private freezer-coll [type id & body] (defmacro ^:private freezer-coll [type id & body]
`(freezer ~type ~id `(freezer ~type ~id
#_(when (and debug-mode? (instance? ISeq ~type)) (when-debug-mode
(println (format "DEBUG - freezer-coll: %s for %s" ~type (type ~'x)))) (when (instance? ISeq ~type)
(println (format "DEBUG - freezer-coll: %s for %s" ~type (type ~'x)))))
(if (counted? ~'x) (if (counted? ~'x)
(do (.writeInt ~'s (count ~'x)) (do (.writeInt ~'s (count ~'x))
(doseq [i# ~'x] (freeze-to-stream ~'s i#))) (doseq [i# ~'x] (freeze-to-stream ~'s i#)))
@ -186,29 +187,9 @@
(.writeLong s (.getMostSignificantBits x)) (.writeLong s (.getMostSignificantBits x))
(.writeLong s (.getLeastSignificantBits x))) (.writeLong s (.getLeastSignificantBits x)))
(def reader-serializable?
"`pr-str` will happily print stuff that the Reader can't actually read back,
so we have to test a full roundtrip if we want to throw an exception at freeze
(rather than thaw) time."
(let [cache (atom {})] ; {<type> <serializable?>}
(fn [x]
(let [t (type x)]
(if-let [dv (@cache t)] @dv
(locking cache ; For thread racing
(if-let [dv (@cache t)] @dv ; Retry after lock acquisition
(let [dv (delay
(try (edn/read-string {:readers *data-readers*}
(pr-str x))
true
(catch Exception _ false)))]
(swap! cache assoc t dv)
@dv))))))))
(comment (reader-serializable? "hello"))
(def ^:dynamic *final-freeze-fallback* "Alpha - subject to change." nil) (def ^:dynamic *final-freeze-fallback* "Alpha - subject to change." nil)
(defn freeze-fallback-as-str "Alpha-subject to change." [x s] (defn freeze-fallback-as-str "Alpha-subject to change." [x s]
(freeze-to-stream* {:nippy/unfreezable (str x) :type (type x)} s)) (freeze-to-stream* {:nippy/unfreezable (pr-str x) :type (type x)} s))
(comment (comment
(require '[clojure.core.async :as async]) (require '[clojure.core.async :as async])
@ -220,25 +201,24 @@
(extend-type Object (extend-type Object
Freezable Freezable
(freeze-to-stream* [x ^DataOutputStream s] (freeze-to-stream* [x ^DataOutputStream s]
(if (instance? Serializable x) (cond
(do ;; Fallback #1: Java's Serializable interface (utils/serializable? x) ; Fallback #1: Java's Serializable interface
#_(when debug-mode? (do (when-debug-mode
(println (format "DEBUG - Serializable fallback: %s" (type x)))) (println (format "DEBUG - Serializable fallback: %s" (type x))))
(write-id s id-serializable) (write-id s id-serializable)
(write-utf8 s (.getName (class x))) ; Reflect (write-utf8 s (.getName (class x))) ; Reflect
(.writeObject (java.io.ObjectOutputStream. s) x)) (.writeObject (ObjectOutputStream. s) x))
(do ;; Fallback #2: Clojure's Reader (utils/readable? x) ; Fallback #2: Clojure's Reader
(when (reader-serializable? x) (do (when-debug-mode
#_(when debug-mode? (println (format "DEBUG - Reader fallback: %s" (type x))))
(println (format "DEBUG - Reader fallback: %s" (type x)))) (write-id s id-reader)
(write-id s id-reader) (write-bytes s (.getBytes (pr-str x) "UTF-8")))
(write-bytes s (.getBytes (pr-str x) "UTF-8")))
;; Fallback #3: *final-freeze-fallback* :else ; Fallback #3: *final-freeze-fallback*
(if-let [ffb *final-freeze-fallback*] (ffb x s) (if-let [ffb *final-freeze-fallback*] (ffb x s)
(throw (Exception. (format "Unfreezable type: %s %s" (throw (Exception. (format "Unfreezable type: %s %s"
(type x) (str x))))))))) (type x) (str x))))))))
(def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta)) (def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta))
@ -301,75 +281,89 @@
(defn- thaw-from-stream (defn- thaw-from-stream
[^DataInputStream s] [^DataInputStream s]
(let [type-id (.readByte s)] (let [type-id (.readByte s)]
#_(when debug-mode? (try
(println (format "DEBUG - thawing type-id: %s" type-id))) (when-debug-mode
(utils/case-eval type-id (println (format "DEBUG - thawing type-id: %s" type-id)))
id-reader (edn/read-string {:readers *data-readers*} (read-utf8 s)) (utils/case-eval type-id
id-serializable
(let [class ^Class (Class/forName (read-utf8 s))]
(cast class (.readObject (java.io.ObjectInputStream. s))))
id-bytes (read-bytes s) id-reader
id-nil nil (let [edn (read-utf8 s)]
id-boolean (.readBoolean s) (try (edn/read-string {:readers *data-readers*} edn)
(catch Exception _ {:nippy/unthawable edn
:type :reader})))
id-char (.readChar s) id-serializable
id-string (read-utf8 s) (let [class-name (read-utf8 s)
id-keyword (keyword (read-utf8 s)) object (.readObject (ObjectInputStream. s))]
(try (let [class ^Class (Class/forName class-name)]
(cast class object))
(catch Exception _ {:nippy/unthawable [class-name object]
:type :serializable})))
id-queue (read-coll s (PersistentQueue/EMPTY)) id-bytes (read-bytes s)
id-sorted-set (read-coll s (sorted-set)) id-nil nil
id-sorted-map (read-kvs s (sorted-map)) id-boolean (.readBoolean s)
id-list (into '() (rseq (read-coll s []))) id-char (.readChar s)
id-vector (read-coll s []) id-string (read-utf8 s)
id-set (read-coll s #{}) id-keyword (keyword (read-utf8 s))
id-map (read-kvs s {})
id-seq (seq (read-coll s []))
id-meta (let [m (thaw-from-stream s)] (with-meta (thaw-from-stream s) m)) id-queue (read-coll s (PersistentQueue/EMPTY))
id-sorted-set (read-coll s (sorted-set))
id-sorted-map (read-kvs s (sorted-map))
id-byte (.readByte s) id-list (into '() (rseq (read-coll s [])))
id-short (.readShort s) id-vector (read-coll s [])
id-integer (.readInt s) id-set (read-coll s #{})
id-long (.readLong s) id-map (read-kvs s {})
id-bigint (bigint (read-biginteger s)) id-seq (seq (read-coll s []))
id-float (.readFloat s) id-meta (let [m (thaw-from-stream s)] (with-meta (thaw-from-stream s) m))
id-double (.readDouble s)
id-bigdec (BigDecimal. (read-biginteger s) (.readInt s))
id-ratio (/ (bigint (read-biginteger s)) id-byte (.readByte s)
(bigint (read-biginteger s))) id-short (.readShort s)
id-integer (.readInt s)
id-long (.readLong s)
id-bigint (bigint (read-biginteger s))
id-record id-float (.readFloat s)
(let [class ^Class (Class/forName (read-utf8 s)) id-double (.readDouble s)
meth-sig (into-array Class [IPersistentMap]) id-bigdec (BigDecimal. (read-biginteger s) (.readInt s))
method ^Method (.getMethod class "create" meth-sig)]
(.invoke method class (into-array Object [(thaw-from-stream s)])))
id-date (Date. (.readLong s)) id-ratio (/ (bigint (read-biginteger s))
id-uuid (UUID. (.readLong s) (.readLong s)) (bigint (read-biginteger s)))
;;; DEPRECATED id-record
id-old-reader (edn/read-string (.readUTF s)) (let [class ^Class (Class/forName (read-utf8 s))
id-old-string (.readUTF s) meth-sig (into-array Class [IPersistentMap])
id-old-map (apply hash-map (utils/repeatedly-into [] method ^Method (.getMethod class "create" meth-sig)]
(* 2 (.readInt s)) (thaw-from-stream s))) (.invoke method class (into-array Object [(thaw-from-stream s)])))
id-old-keyword (keyword (.readUTF s))
(if-not (neg? type-id) id-date (Date. (.readLong s))
(throw (Exception. (str "Unknown type ID: " type-id))) id-uuid (UUID. (.readLong s) (.readLong s))
;; Custom types ;;; DEPRECATED
(if-let [reader (get @custom-readers type-id)] id-old-reader (edn/read-string (.readUTF s))
(try (reader s) id-old-string (.readUTF s)
(catch Exception e id-old-map (apply hash-map (utils/repeatedly-into []
(throw (Exception. (str "Reader exception for custom type ID: " (* 2 (.readInt s)) (thaw-from-stream s)))
(- type-id)) e)))) id-old-keyword (keyword (.readUTF s))
(throw (Exception. (str "No reader provided for custom type ID: "
(- type-id))))))))) (if-not (neg? type-id)
(throw (Exception. (str "Unknown type ID: " type-id)))
;; Custom types
(if-let [reader (get @custom-readers type-id)]
(try (reader s)
(catch Exception e
(throw (Exception. (str "Reader exception for custom type ID: "
(- type-id)) e))))
(throw (Exception. (str "No reader provided for custom type ID: "
(- type-id)))))))
(catch Exception e
(throw (Exception. (format "Thaw failed against type-id: %s" type-id) e))))))
(defn thaw-from-stream! (defn thaw-from-stream!
"Low-level API. Deserializes a frozen object from given DataInputStream to its "Low-level API. Deserializes a frozen object from given DataInputStream to its

View file

@ -1,6 +1,9 @@
(ns taoensso.nippy.utils (ns taoensso.nippy.utils
{:author "Peter Taoussanis"} {:author "Peter Taoussanis"}
(:require [clojure.string :as str])) (:require [clojure.string :as str]
[clojure.tools.reader.edn :as edn])
(:import [java.io ByteArrayInputStream ByteArrayOutputStream Serializable
ObjectOutputStream ObjectInputStream]))
(defmacro case-eval (defmacro case-eval
"Like `case` but evaluates test constants for their compile-time value." "Like `case` but evaluates test constants for their compile-time value."
@ -95,3 +98,49 @@
(comment (String. (ba-concat (.getBytes "foo") (.getBytes "bar"))) (comment (String. (ba-concat (.getBytes "foo") (.getBytes "bar")))
(let [[x y] (ba-split (.getBytes "foobar") 5)] (let [[x y] (ba-split (.getBytes "foobar") 5)]
[(String. x) (String. y)])) [(String. x) (String. y)]))
;;;; Fallback type tests
;; Unfortunately the only reliable way we can tell if something's
;; really serializable/readable is to actually try a full roundtrip.
(defn- memoize-type-test [f-test]
(let [cache (atom {})] ; {<type> <type-okay?>}
(fn [x]
(let [t (type x)
;; This is a bit hackish, but no other obvious solutions (?):
cacheable? (not (re-find #"__\d+" (str t))) ; gensym form
test (fn [] (try (f-test x) (catch Exception _ false)))]
(if-not cacheable? (test)
(if-let [dv (@cache t)] @dv
(locking cache ; For thread racing
(if-let [dv (@cache t)] @dv ; Retry after lock acquisition
(let [dv (delay (test))]
(swap! cache assoc t dv)
@dv)))))))))
(def serializable?
(memoize-type-test
(fn [x]
(when (instance? Serializable x)
(let [class-name (.getName (class x))
class ^Class (Class/forName class-name) ; Try 1st (fail fast)
bas (ByteArrayOutputStream.)
_ (.writeObject (ObjectOutputStream. bas) x)
ba (.toByteArray bas)
object (.readObject (ObjectInputStream.
(ByteArrayInputStream. ba)))]
(cast class object)
true)))))
(def readable? (memoize-type-test (fn [x] (-> x pr-str (edn/read-string)) true)))
(comment
(serializable? "Hello world")
(serializable? (fn []))
(readable? "Hello world")
(readable? (fn []))
(time (dotimes [_ 10000] (serializable? "Hello world")))
(time (dotimes [_ 10000] (serializable? (fn []))))
(time (dotimes [_ 10000] (readable? "Hello world")))
(time (dotimes [_ 10000] (readable? (fn [])))))