From fb6f75e4d7db2b567fdf3a8ffa841e290eb9e924 Mon Sep 17 00:00:00 2001 From: Peter Taoussanis Date: Tue, 26 Sep 2023 10:53:46 +0200 Subject: [PATCH] [new] Smarter, faster, protocol-based `freezable?` util --- src/taoensso/nippy.clj | 128 +++++++++++++++++++++++----------- src/taoensso/nippy/impl.clj | 58 +++++++++++++++ test/taoensso/nippy_tests.clj | 14 ++++ 3 files changed, 160 insertions(+), 40 deletions(-) create mode 100644 src/taoensso/nippy/impl.clj diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index e8d5b47..5b2d4a3 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -6,7 +6,7 @@ [clojure.java.io :as jio] [taoensso.encore :as enc] [taoensso.nippy - [utils :as utils] + [impl :as impl] [compression :as compression] [encryption :as encryption]]) @@ -47,7 +47,7 @@ ;; * Supports :auto freeze compressor (since this depends on :auto thaw ;; compressor) ;; -;; [2] See `IFreezable1` protocol for type-specific payload formats, +;; [2] See `IFreezable` protocol for type-specific payload formats, ;; `thaw-from-in!` for reference type-specific thaw implementations ;; (def ^:private head-sig "First 3 bytes of Nippy header" (.getBytes "NPY" StandardCharsets/UTF_8)) @@ -347,9 +347,7 @@ encryption/aes128-gcm-encryptor encryption/aes128-cbc-encryptor encryption/aes128-gcm-encryptor - {:src encryption/aes128-gcm-encryptor, :alias aes128-encryptor} - - utils/freezable?) + {:src encryption/aes128-gcm-encryptor, :alias aes128-encryptor}) ;;;; Dynamic config ;; See also `nippy.tools` ns for further dynamic config support @@ -695,6 +693,49 @@ (binding [*freeze-serializable-allowlist* #{"foo.*" "bar"}] (freeze-serializable-allowed? "foo.bar"))) +;;;; Freezing interface + +;; We extend `IFreezable` to supported types: +(defprotocol IFreezable + (-freezable? [_]) + (-freeze-without-meta! [_ data-output])) + +(defprotocol IFreezableWithMeta ; Must be a separate protocol + (-freeze-with-meta! [_ data-output])) + +(defmacro write-id [out id] `(.writeByte ~out ~id)) + +(extend-protocol IFreezableWithMeta + clojure.lang.IObj ; IMeta => `meta` will work, IObj => `with-meta` will work + (-freeze-with-meta! [x ^DataOutput data-output] + (when-let [m (when *incl-metadata?* (meta x))] + (write-id data-output id-meta) + (-freeze-without-meta! m data-output)) + (-freeze-without-meta! x data-output)) + + nil (-freeze-with-meta! [x data-output] (-freeze-without-meta! x data-output)) + Object (-freeze-with-meta! [x data-output] (-freeze-without-meta! x data-output))) + +(defmacro ^:private freezer [type & body] + `(extend-type ~type + IFreezable + (~'-freezable? [~'x] true) + (~'-freeze-without-meta! [~'x ~(with-meta 'out {:tag 'DataOutput})] ~@body))) + +(defmacro ^:private freezer* [type & body] + `(extend-type ~type + IFreezable + (~'-freezable? [~'x] nil) + (~'-freeze-without-meta! [~'x ~(with-meta 'out {:tag 'DataOutput})] ~@body))) + +(defmacro ^:private id-freezer [type id & body] + `(extend-type ~type + IFreezable + (~'-freezable? [~'x] true) + (~'-freeze-without-meta! [~'x ~(with-meta 'out {:tag 'DataOutput})] + (write-id ~'out ~id) + ~@body))) + ;;;; Freezing (do @@ -703,8 +744,6 @@ (def ^:private ^:const range-uint (- Integer/MAX_VALUE Integer/MIN_VALUE))) (do - (defmacro write-id [out id] `(.writeByte ~out ~id)) - (defmacro ^:private sm-count?* [n] `(<= ~n range-ubyte)) ; Unsigned (defmacro ^:private sm-count? [n] `(<= ~n Byte/MAX_VALUE)) (defmacro ^:private md-count? [n] `(<= ~n Short/MAX_VALUE)) @@ -719,21 +758,6 @@ (defmacro ^:private read-md-count [in] `(.readShort ~in)) (defmacro ^:private read-lg-count [in] `(.readInt ~in))) - ; We extend `IFreezable1` to supported types: -(defprotocol IFreezable1 (-freeze-without-meta! [x data-output])) -(defprotocol IFreezable2 (-freeze-with-meta! [x data-output])) -(extend-protocol IFreezable2 ; Must be a separate protocol - clojure.lang.IObj ; IMeta => `meta` will work, IObj => `with-meta` will work - (-freeze-with-meta! [x ^DataOutput data-output] - (let [m (when *incl-metadata?* (meta x))] - (when m - (write-id data-output id-meta) - (-freeze-without-meta! m data-output))) - (-freeze-without-meta! x data-output)) - - nil (-freeze-with-meta! [x data-output] (-freeze-without-meta! x data-output)) - Object (-freeze-with-meta! [x data-output] (-freeze-without-meta! x data-output))) - (defn- write-bytes-sm* [^DataOutput out ^bytes ba] (let [len (alength ba)] (write-sm-count* out len) (.write out ba 0 len))) (defn- write-bytes-sm [^DataOutput out ^bytes ba] (let [len (alength ba)] (write-sm-count out len) (.write out ba 0 len))) (defn- write-bytes-md [^DataOutput out ^bytes ba] (let [len (alength ba)] (write-md-count out len) (.write out ba 0 len))) @@ -998,7 +1022,7 @@ (catch Throwable _ nil)))) (defn try-write-readable [out x] - (when (utils/readable? x) + (when (impl/seems-readable? x) (try (write-readable out x) true @@ -1033,17 +1057,6 @@ This is a low-level util: in most cases you'll want `freeze` instead." [^DataOutput data-output x] (-freeze-with-meta! x data-output)) -(defmacro ^:private freezer [type & body] - `(extend-type ~type IFreezable1 - (~'-freeze-without-meta! [~'x ~(with-meta 'out {:tag 'DataOutput})] - ~@body))) - -(defmacro ^:private id-freezer [type id & body] - `(extend-type ~type IFreezable1 - (~'-freeze-without-meta! [~'x ~(with-meta 'out {:tag 'DataOutput})] - (write-id ~'out ~id) - ~@body))) - ;;;; Caching ; Experimental ;; Nb: don't use an auto initialValue; can cause thread-local state to @@ -1194,7 +1207,7 @@ (freezer APersistentMap (write-map out x)) (freezer PersistentList (write-counted-coll out id-list-0 id-list-sm id-list-md id-list-lg x)) (freezer LazySeq (write-uncounted-coll out id-seq-0 id-seq-sm id-seq-md id-seq-lg x)) -(freezer ISeq (write-coll out id-seq-0 id-seq-sm id-seq-md id-seq-lg x)) +(freezer* ISeq (write-coll out id-seq-0 id-seq-sm id-seq-md id-seq-lg x)) (freezer IRecord (let [class-name (.getName (class x)) ; Reflect class-name-ba (.getBytes class-name StandardCharsets/UTF_8) @@ -1246,7 +1259,7 @@ (.writeInt out (.getDays x))) nil) -(freezer Object +(freezer* Object (when-debug (println (str "freeze-fallback: " (type x)))) (if-let [ff *freeze-fallback*] (if-not (identical? ff :write-unfreezable) @@ -2015,10 +2028,10 @@ ;; Unprefixed [cust byte id][payload]: `(write-id ~out ~(coerce-custom-type-id custom-type-id)))] - `(extend-type ~type IFreezable1 - (~'-freeze-without-meta! [~x ~(with-meta out {:tag 'java.io.DataOutput})] - ~write-id-form - ~@body)))) + `(extend-type ~type + IFreezable + (~'-freezable? [~'x] true) + (~'-freeze-without-meta! [~x ~(with-meta out {:tag 'java.io.DataOutput})] ~write-id-form ~@body)))) (defmacro extend-thaw "Extends Nippy to support thawing of a custom type with given id: @@ -2150,6 +2163,41 @@ ;;;; Tools +(defn freezable? + "Alpha, subject to change. + Returns ∈ #{:native :maybe-clojure-reader :maybe-java-serializable nil}, + truthy iff Nippy seems to support freezing for the given argument. + + Important: result can be inaccurate in some cases. To be completely sure you + unfortunately need to try freeze then thaw the argument, and check the thawed + value. + + Options include: + `recursive?` - Check recursively into given arg? + `allow-clojure-reader?` - Allow freezing with Clojure's reader? + `allow-java-serializable?` - Allow freezing with Java's `Serializable`?" + + ([x] (freezable? x nil)) + ([x + {:as opts + :keys [recursive? allow-clojure-reader? allow-java-serializable?] + :or {recursive? true}}] + + (or + (and + (-freezable? x) + (and + (or + (not recursive?) (not (coll? x)) + (enc/revery? #(freezable? % opts) x))) + :native) + + (and allow-clojure-reader? (impl/seems-readable? x) :maybe-clojure-reader) + (and allow-java-serializable? (impl/seems-serializable? x) :maybe-java-serializable) + nil))) + +(comment (enc/qb 1e6 (freezable? "hello"))) ; 49.76 + (defn inspect-ba "Experimental, subject to change. Feedback welcome." ([ba ] (inspect-ba ba nil)) diff --git a/src/taoensso/nippy/impl.clj b/src/taoensso/nippy/impl.clj new file mode 100644 index 0000000..82be808 --- /dev/null +++ b/src/taoensso/nippy/impl.clj @@ -0,0 +1,58 @@ +(ns ^:no-doc taoensso.nippy.impl + "Private, implementation detail." + (:require + [clojure.string :as str] + [taoensso.encore :as enc])) + +;;;; Fallback type tests + +(defn- memoize-type-test + "Unfortunately the only ~reliable way we can tell if something's + really serializable/readable is to actually try a full roundtrip." + [test-fn] + (let [cache_ (enc/latom {})] ; { } + (fn [x] + (let [t (type x) + gensym? (re-find #"__\d+" (str t)) + cacheable? (not gensym?) ; Hack, but no obviously better solutions + test (fn [] (try (test-fn x) (catch Exception _ false)))] + + (if cacheable? + @(cache_ t #(if % % (delay (test)))) + (do (test))))))) + +(def seems-readable? (memoize-type-test (fn [x] (-> x enc/pr-edn enc/read-edn) true))) +(def seems-serializable? + (let [mtt + (memoize-type-test + (fn [x] + (let [class-name (.getName (class x)) + c (Class/forName class-name) ; Try 1st (fail fast) + bas (java.io.ByteArrayOutputStream.) + _ (.writeObject (java.io.ObjectOutputStream. bas) x) + ba (.toByteArray bas)] + + #_ + (cast c + (.readObject ; Unsafe + usu. unnecessary to check + (ObjectInputStream. + (ByteArrayInputStream. ba)))) + + true)))] + + (fn [x] + (if (instance? java.io.Serializable x) + (if (fn? x) + false ; Reports as true but is unreliable + (mtt x)) + false)))) + +(comment + (enc/qb 1e4 ; [2.52 2.53 521.34 0.63] + (seems-readable? "Hello world") ; Cacheable + (seems-serializable? "Hello world") ; Cacheable + (seems-readable? (fn [])) ; Uncacheable + (seems-serializable? (fn [])) ; Uncacheable + )) + +;;;; diff --git a/test/taoensso/nippy_tests.clj b/test/taoensso/nippy_tests.clj index fb02824..6bc31f7 100644 --- a/test/taoensso/nippy_tests.clj +++ b/test/taoensso/nippy_tests.clj @@ -354,6 +354,20 @@ "Don't try to preserve metadata on vars")]) +;;;; Freezable? + +(deftest _freezable? + [(is (= (nippy/freezable? :foo) :native)) + (is (= (nippy/freezable? [:a :b]) :native)) + (is (= (nippy/freezable? [:a (fn [])]) nil)) + (is (= (nippy/freezable? [:a (byte-array [1 2 3])]) :native)) + (is (= (nippy/freezable? [:a (java.util.Date.)]) :native)) + (is (= (nippy/freezable? (Exception.)) nil)) + (is (= (nippy/freezable? (MyType. "a" "b")) :native)) + (is (= (nippy/freezable? (MyRec. "a" "b")) :native)) + (is (= (nippy/freezable? (Exception.) {:allow-java-serializable? true}) + :maybe-java-serializable))]) + ;;;; thaw-xform (deftest _thaw-xform