Further work on freezer fallback reliability

This commit is contained in:
Peter Taoussanis 2013-12-06 18:40:13 +07:00
parent 4b1f024b8b
commit 5ea4af7356
3 changed files with 147 additions and 106 deletions

View file

@ -1,16 +1,14 @@
## v2.5.2 / 2013-12-07
* Test Serializable objects at freeze time for better reliability.
* Don't cache `serializable?`/`readable?` for types with gensym-style names (e.g. as used for anonymous fns, etc.).
* Failed serialized/reader thaws will try return what they can (e.g. unreadable string) instead of just throwing.
* Thaw error messages now include failing type-id.
## v2.5.1 / 2013-12-03 ## v2.5.1 / 2013-12-03
Improved `freeze` fallback handling. This is a recommended upgrade for all users and a necessary upgrade for Timbre v3 Carmine appender users.
### Features
* Added experimental `inspect-ba` fn for examining data possibly frozen by Nippy. * Added experimental `inspect-ba` fn for examining data possibly frozen by Nippy.
### Changes
* Now throw exception at freeze (rather than thaw) time when trying to serialize an unreadable object using the Clojure reader. * Now throw exception at freeze (rather than thaw) time when trying to serialize an unreadable object using the Clojure reader.
### Fixes
-
## v2.4.1 → v2.5.0 ## v2.4.1 → v2.5.0
* Refactored standard Freezable protocol implementations to de-emphasise interfaces as a matter of hygiene, Ref. http://goo.gl/IFXzvh. * Refactored standard Freezable protocol implementations to de-emphasise interfaces as a matter of hygiene, Ref. http://goo.gl/IFXzvh.

View file

@ -8,7 +8,7 @@
(compression :as compression :refer (snappy-compressor)) (compression :as compression :refer (snappy-compressor))
(encryption :as encryption :refer (aes128-encryptor))]) (encryption :as encryption :refer (aes128-encryptor))])
(:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream (:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream
DataOutputStream Serializable] DataOutputStream Serializable ObjectOutputStream ObjectInputStream]
[java.lang.reflect Method] [java.lang.reflect Method]
[java.util Date UUID] [java.util Date UUID]
[clojure.lang Keyword BigInt Ratio [clojure.lang Keyword BigInt Ratio
@ -26,7 +26,7 @@
(byte 2) {:version 1 :compressed? false :encrypted? true} (byte 2) {:version 1 :compressed? false :encrypted? true}
(byte 3) {:version 1 :compressed? true :encrypted? true}}) (byte 3) {:version 1 :compressed? true :encrypted? true}})
(def ^:private ^:const debug-mode? false) (defmacro when-debug-mode [& body] (when #_true false `(do ~@body)))
;;;; Data type IDs ;;;; Data type IDs
@ -114,8 +114,9 @@
(defmacro ^:private freezer-coll [type id & body] (defmacro ^:private freezer-coll [type id & body]
`(freezer ~type ~id `(freezer ~type ~id
#_(when (and debug-mode? (instance? ISeq ~type)) (when-debug-mode
(println (format "DEBUG - freezer-coll: %s for %s" ~type (type ~'x)))) (when (instance? ISeq ~type)
(println (format "DEBUG - freezer-coll: %s for %s" ~type (type ~'x)))))
(if (counted? ~'x) (if (counted? ~'x)
(do (.writeInt ~'s (count ~'x)) (do (.writeInt ~'s (count ~'x))
(doseq [i# ~'x] (freeze-to-stream ~'s i#))) (doseq [i# ~'x] (freeze-to-stream ~'s i#)))
@ -186,29 +187,9 @@
(.writeLong s (.getMostSignificantBits x)) (.writeLong s (.getMostSignificantBits x))
(.writeLong s (.getLeastSignificantBits x))) (.writeLong s (.getLeastSignificantBits x)))
(def reader-serializable?
"`pr-str` will happily print stuff that the Reader can't actually read back,
so we have to test a full roundtrip if we want to throw an exception at freeze
(rather than thaw) time."
(let [cache (atom {})] ; {<type> <serializable?>}
(fn [x]
(let [t (type x)]
(if-let [dv (@cache t)] @dv
(locking cache ; For thread racing
(if-let [dv (@cache t)] @dv ; Retry after lock acquisition
(let [dv (delay
(try (edn/read-string {:readers *data-readers*}
(pr-str x))
true
(catch Exception _ false)))]
(swap! cache assoc t dv)
@dv))))))))
(comment (reader-serializable? "hello"))
(def ^:dynamic *final-freeze-fallback* "Alpha - subject to change." nil) (def ^:dynamic *final-freeze-fallback* "Alpha - subject to change." nil)
(defn freeze-fallback-as-str "Alpha-subject to change." [x s] (defn freeze-fallback-as-str "Alpha-subject to change." [x s]
(freeze-to-stream* {:nippy/unfreezable (str x) :type (type x)} s)) (freeze-to-stream* {:nippy/unfreezable (pr-str x) :type (type x)} s))
(comment (comment
(require '[clojure.core.async :as async]) (require '[clojure.core.async :as async])
@ -220,25 +201,24 @@
(extend-type Object (extend-type Object
Freezable Freezable
(freeze-to-stream* [x ^DataOutputStream s] (freeze-to-stream* [x ^DataOutputStream s]
(if (instance? Serializable x) (cond
(do ;; Fallback #1: Java's Serializable interface (utils/serializable? x) ; Fallback #1: Java's Serializable interface
#_(when debug-mode? (do (when-debug-mode
(println (format "DEBUG - Serializable fallback: %s" (type x)))) (println (format "DEBUG - Serializable fallback: %s" (type x))))
(write-id s id-serializable) (write-id s id-serializable)
(write-utf8 s (.getName (class x))) ; Reflect (write-utf8 s (.getName (class x))) ; Reflect
(.writeObject (java.io.ObjectOutputStream. s) x)) (.writeObject (ObjectOutputStream. s) x))
(do ;; Fallback #2: Clojure's Reader (utils/readable? x) ; Fallback #2: Clojure's Reader
(when (reader-serializable? x) (do (when-debug-mode
#_(when debug-mode?
(println (format "DEBUG - Reader fallback: %s" (type x)))) (println (format "DEBUG - Reader fallback: %s" (type x))))
(write-id s id-reader) (write-id s id-reader)
(write-bytes s (.getBytes (pr-str x) "UTF-8"))) (write-bytes s (.getBytes (pr-str x) "UTF-8")))
;; Fallback #3: *final-freeze-fallback* :else ; Fallback #3: *final-freeze-fallback*
(if-let [ffb *final-freeze-fallback*] (ffb x s) (if-let [ffb *final-freeze-fallback*] (ffb x s)
(throw (Exception. (format "Unfreezable type: %s %s" (throw (Exception. (format "Unfreezable type: %s %s"
(type x) (str x))))))))) (type x) (str x))))))))
(def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta)) (def ^:private head-meta-id (reduce-kv #(assoc %1 %3 %2) {} head-meta))
@ -301,14 +281,25 @@
(defn- thaw-from-stream (defn- thaw-from-stream
[^DataInputStream s] [^DataInputStream s]
(let [type-id (.readByte s)] (let [type-id (.readByte s)]
#_(when debug-mode? (try
(when-debug-mode
(println (format "DEBUG - thawing type-id: %s" type-id))) (println (format "DEBUG - thawing type-id: %s" type-id)))
(utils/case-eval type-id (utils/case-eval type-id
id-reader (edn/read-string {:readers *data-readers*} (read-utf8 s)) id-reader
(let [edn (read-utf8 s)]
(try (edn/read-string {:readers *data-readers*} edn)
(catch Exception _ {:nippy/unthawable edn
:type :reader})))
id-serializable id-serializable
(let [class ^Class (Class/forName (read-utf8 s))] (let [class-name (read-utf8 s)
(cast class (.readObject (java.io.ObjectInputStream. s)))) object (.readObject (ObjectInputStream. s))]
(try (let [class ^Class (Class/forName class-name)]
(cast class object))
(catch Exception _ {:nippy/unthawable [class-name object]
:type :serializable})))
id-bytes (read-bytes s) id-bytes (read-bytes s)
id-nil nil id-nil nil
@ -369,7 +360,10 @@
(throw (Exception. (str "Reader exception for custom type ID: " (throw (Exception. (str "Reader exception for custom type ID: "
(- type-id)) e)))) (- type-id)) e))))
(throw (Exception. (str "No reader provided for custom type ID: " (throw (Exception. (str "No reader provided for custom type ID: "
(- type-id))))))))) (- type-id)))))))
(catch Exception e
(throw (Exception. (format "Thaw failed against type-id: %s" type-id) e))))))
(defn thaw-from-stream! (defn thaw-from-stream!
"Low-level API. Deserializes a frozen object from given DataInputStream to its "Low-level API. Deserializes a frozen object from given DataInputStream to its

View file

@ -1,6 +1,9 @@
(ns taoensso.nippy.utils (ns taoensso.nippy.utils
{:author "Peter Taoussanis"} {:author "Peter Taoussanis"}
(:require [clojure.string :as str])) (:require [clojure.string :as str]
[clojure.tools.reader.edn :as edn])
(:import [java.io ByteArrayInputStream ByteArrayOutputStream Serializable
ObjectOutputStream ObjectInputStream]))
(defmacro case-eval (defmacro case-eval
"Like `case` but evaluates test constants for their compile-time value." "Like `case` but evaluates test constants for their compile-time value."
@ -95,3 +98,49 @@
(comment (String. (ba-concat (.getBytes "foo") (.getBytes "bar"))) (comment (String. (ba-concat (.getBytes "foo") (.getBytes "bar")))
(let [[x y] (ba-split (.getBytes "foobar") 5)] (let [[x y] (ba-split (.getBytes "foobar") 5)]
[(String. x) (String. y)])) [(String. x) (String. y)]))
;;;; Fallback type tests
;; Unfortunately the only reliable way we can tell if something's
;; really serializable/readable is to actually try a full roundtrip.
(defn- memoize-type-test [f-test]
(let [cache (atom {})] ; {<type> <type-okay?>}
(fn [x]
(let [t (type x)
;; This is a bit hackish, but no other obvious solutions (?):
cacheable? (not (re-find #"__\d+" (str t))) ; gensym form
test (fn [] (try (f-test x) (catch Exception _ false)))]
(if-not cacheable? (test)
(if-let [dv (@cache t)] @dv
(locking cache ; For thread racing
(if-let [dv (@cache t)] @dv ; Retry after lock acquisition
(let [dv (delay (test))]
(swap! cache assoc t dv)
@dv)))))))))
(def serializable?
(memoize-type-test
(fn [x]
(when (instance? Serializable x)
(let [class-name (.getName (class x))
class ^Class (Class/forName class-name) ; Try 1st (fail fast)
bas (ByteArrayOutputStream.)
_ (.writeObject (ObjectOutputStream. bas) x)
ba (.toByteArray bas)
object (.readObject (ObjectInputStream.
(ByteArrayInputStream. ba)))]
(cast class object)
true)))))
(def readable? (memoize-type-test (fn [x] (-> x pr-str (edn/read-string)) true)))
(comment
(serializable? "Hello world")
(serializable? (fn []))
(readable? "Hello world")
(readable? (fn []))
(time (dotimes [_ 10000] (serializable? "Hello world")))
(time (dotimes [_ 10000] (serializable? (fn []))))
(time (dotimes [_ 10000] (readable? "Hello world")))
(time (dotimes [_ 10000] (readable? (fn [])))))