diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index ab72d69..c56f1e3 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -4,7 +4,7 @@ (:require [clojure.string :as str] [clojure.java.io :as jio] - [taoensso.encore :as enc :refer [cond*]] + [taoensso.encore :as enc] [taoensso.nippy [utils :as utils] [compression :as compression] @@ -35,6 +35,7 @@ ;;;; TODO ;; - Ensure all error responses are entirely under {:nippy/_ <...>} key? +;;; Would be a nice change, but breaking. ;; - Performance would benefit from ^:static support / direct linking / etc. ;; - Ability to compile out metadata support? ;; - Auto cache keywords? When map keys? Configurable? Per-map @@ -111,14 +112,13 @@ 47 :reader-sm 51 :reader-md 52 :reader-lg - 5 :reader-lg2 ; == :reader-lg, used only for back-compatible thawing - 75 :serializable-sm - 76 :serializable-md + 75 :serializable-q-sm ; Quarantined + 76 :serializable-q-md ; '' 48 :record-sm 49 :record-md - 80 :record-lg ; Used only for back-compatible thawing + 80 :record-lg ; Unrealistic, future removal candidate 81 :type @@ -133,10 +133,12 @@ 13 :str-lg 106 :kw-sm - 14 :kw-lg + 77 :kw-md + 14 :kw-lg ; Unrealistic, future removal candidate 56 :sym-sm - 57 :sym-lg + 78 :sym-md + 57 :sym-lg ; Unrealistic, future removal candidate 58 :regex 71 :uri @@ -214,17 +216,18 @@ 68 :cached-md ;;; DEPRECATED (only support thawing) - 1 :reader-depr1 ; v0.9.2+ for +64k support + 5 :reader-lg2 ; == :reader-lg, used only for back-compatible thawing + 1 :reader-depr1 ; v0.9.2 for +64k support 11 :str-depr1 ; '' - 22 :map-depr1 ; v0.9.0+ for more efficient thaw - 12 :kw-depr1 ; v2.0.0-alpha5+ for str consistecy - 27 :map-depr2 ; v2.11+ for count/2 + 22 :map-depr1 ; v0.9.0 for more efficient thaw + 12 :kw-depr1 ; v2.0.0-alpha5 for str consistecy + 27 :map-depr2 ; v2.11 for count/2 29 :sorted-map-depr1 ; '' - 4 :boolean-depr1 ; v2.12+ for switch to true/false ids + 4 :boolean-depr1 ; v2.12 for switch to true/false ids - 46 :serializable-sm-depr1 ; v2.14.1+ for quarantined object bas - 50 :serializable-md-depr1 ; '' - 6 :serializable-lg-depr1 ; '' + 46 :serializable-uq-sm ; Unquarantined + 50 :serializable-uq-md ; '' + 6 :serializable-uq-lg ; ''; unrealistic, future removal candidate }) (comment @@ -446,38 +449,6 @@ ;;;; Freezing -#_(do - (defmacro write-id [out id] `(.writeByte ~out ~id)) - - (defmacro ^:private sm-count? [n] `(<= ~n 255)) #_(- Byte/MAX_VALUE Byte/MIN_VALUE) - (defmacro ^:private md-count? [n] `(<= ~n 65535)) #_(- Short/MAX_VALUE Short/MIN_VALUE) - - (defmacro ^:private write-sm-count [out n] - `(if (<= ~n 127) - (.writeByte ~out ~n) - (.writeByte ~out (unchecked-subtract 127 ~n)))) - - (defmacro ^:private write-md-count [out n] - `(if (<= ~n 32767) - (.writeShort ~out ~n) - (.writeShort ~out (unchecked-subtract 32767 ~n)))) - - (defmacro ^:private write-lg-count [out n] `(.writeInt ~out ~n)) - - (defmacro ^:private read-sm-count [in] - `(let [n# (.readByte ~in)] - (if (pos? n#) - n# - (unchecked-subtract 127 n#)))) - - (defmacro ^:private read-md-count [in] - `(let [n# (.readShort ~in)] - (if (pos? n#) - n# - (unchecked-subtract 32767 n#)))) - - (defmacro ^:private read-lg-count [in] `(.readInt ~in))) - (do (defmacro write-id [out id] `(.writeByte ~out ~id)) @@ -504,13 +475,8 @@ (-freeze-without-meta! m data-output))) (-freeze-without-meta! x data-output)) - nil - (-freeze-with-meta! [x data-output] - (-freeze-without-meta! x data-output)) - - Object - (-freeze-with-meta! [x data-output] - (-freeze-without-meta! x data-output))) + nil (-freeze-with-meta! [x data-output] (-freeze-without-meta! x data-output)) + Object (-freeze-with-meta! [x data-output] (-freeze-without-meta! x data-output))) (defn- write-bytes-sm [^DataOutput out ^bytes ba] (let [len (alength ba)] @@ -534,7 +500,7 @@ (if (zero? len) (write-id out id-bytes-0) (do - (cond* + (enc/cond (sm-count? len) (do (write-id out id-bytes-sm) (write-sm-count out len)) @@ -549,11 +515,6 @@ (.write out ba 0 len))))) -;; (defn- str->bytes [s] (.getBytes s charset)) -(defn- write-utf8-sm [out ^String s] (write-bytes-sm out (.getBytes s charset))) -(defn- write-utf8-md [out ^String s] (write-bytes-md out (.getBytes s charset))) -(defn- write-utf8-lg [out ^String s] (write-bytes-lg out (.getBytes s charset))) -(defn- write-utf8 [out ^String s] (write-bytes out (.getBytes s charset))) (defn- write-biginteger [out ^BigInteger n] (write-bytes-lg out (.toByteArray n))) (defn- write-str [^DataOutput out ^String s] @@ -561,7 +522,7 @@ (write-id out id-str-0) (let [ba (.getBytes s charset) len (alength ba)] - (cond* + (enc/cond (sm-count? len) (do (write-id out id-str-sm) (write-sm-count out len)) @@ -580,14 +541,20 @@ (let [s (if-let [ns (namespace kw)] (str ns "/" (name kw)) (name kw)) ba (.getBytes s charset) len (alength ba)] - (cond* + (enc/cond (sm-count? len) (do (write-id out id-kw-sm) (write-sm-count out len)) - :else ; Rare! - (do (write-id out id-kw-lg) - (write-lg-count out len))) + (md-count? len) + (do (write-id out id-kw-md) + (write-lg-count out len)) + + ;; :else ; Unrealistic + ;; (do (write-id out id-kw-lg) + ;; (write-lg-count out len)) + + :else (throw (ex-info "Keyword too long" {:full-name s}))) (.write out ba 0 len))) @@ -595,24 +562,30 @@ (let [s (if-let [ns (namespace s)] (str ns "/" (name s)) (name s)) ba (.getBytes s charset) len (alength ba)] - (cond* + (enc/cond (sm-count? len) (do (write-id out id-sym-sm) (write-sm-count out len)) - :else ; Rare! - (do (write-id out id-sym-lg) - (write-lg-count out len))) + (md-count? len) + (do (write-id out id-sym-md) + (write-lg-count out len)) + + ;; :else ; Unrealistic + ;; (do (write-id out id-sym-lg) + ;; (write-lg-count out len)) + + :else (throw (ex-info "Symbol too long" {:full-name s}))) (.write out ba 0 len))) (defn- write-long [^DataOutput out ^long n] - (cond* + (enc/cond (zero? n) (write-id out id-long-zero) (> n 0) - (cond* + (enc/cond (<= n 127 #_Byte/MAX_VALUE) (do (write-id out id-long-sm) (.writeByte out n)) @@ -630,7 +603,7 @@ (.writeLong out n))) :else - (cond* + (enc/cond (>= n -128 #_Byte/MIN_VALUE) (do (write-id out id-long-sm) (.writeByte out n)) @@ -655,9 +628,9 @@ (if (zero? cnt) (write-id out id-vec-0) (do - (cond* + (enc/cond (sm-count? cnt) - (cond* + (enc/cond (== cnt 2) (write-id out id-vec-2) (== cnt 3) (write-id out id-vec-3) :else @@ -690,7 +663,7 @@ (if (zero? cnt) (write-id out id-empty) (do - (cond* + (enc/cond (sm-count? cnt) (do (write-id out id-sm) (write-sm-count out cnt)) @@ -723,7 +696,7 @@ (if (zero? cnt) (write-id out id-empty) (do - (cond* + (enc/cond (sm-count? cnt) (do (write-id out id-sm) (write-sm-count out cnt)) @@ -759,7 +732,7 @@ (if (zero? cnt) (write-id out id-empty) (do - (cond* + (enc/cond (sm-count? cnt) (do (write-id out id-sm) (write-sm-count out cnt)) @@ -792,7 +765,7 @@ (if (zero? cnt) (write-id out id-map-0) (do - (cond* + (enc/cond (sm-count? cnt) (do (write-id out id-map-sm) (write-sm-count out cnt)) @@ -818,7 +791,7 @@ (if (zero? cnt) (write-id out id-set-0) (do - (cond* + (enc/cond (sm-count? cnt) (do (write-id out id-set-sm) (write-sm-count out cnt)) @@ -844,16 +817,23 @@ (let [class-name-ba (.getBytes class-name charset) len (alength class-name-ba)] - (cond* + (enc/cond (sm-count? len) - (do (write-id out id-serializable-sm) + (do (write-id out id-serializable-q-sm) (write-bytes-sm out class-name-ba)) - ;; Note no :serializable-lg freeze support (unrealistic name length) + (md-count? len) + (do (write-id out id-serializable-q-md) + (write-bytes-md out class-name-ba)) + + ;; :else ; Unrealistic + ;; (do (write-id out id-serializable-q-lg) + ;; (write-bytes-md out class-name-ba)) :else - (do (write-id out id-serializable-md) - (write-bytes-md out class-name-ba))) + (throw + (ex-info "Serializable class name too long" + {:class-name class-name}))) ;; Legacy: write object directly to out. ;; (.writeObject (ObjectOutputStream. out) x) @@ -869,7 +849,7 @@ (let [edn (enc/pr-edn x) edn-ba (.getBytes ^String edn charset) len (alength edn-ba)] - (cond* + (enc/cond (sm-count? len) (do (write-id out id-reader-sm) (write-bytes-sm out edn-ba)) @@ -985,7 +965,7 @@ first-occurance? (nil? ?idx)] - (cond* + (enc/cond (sm-count? idx) (case (int idx) 0 (do (write-id out id-cached-0) (when first-occurance? (-freeze-with-meta! x-val out))) @@ -1088,27 +1068,34 @@ (freezer LazySeq (write-uncounted-coll out id-seq-0 id-seq-sm id-seq-md id-seq-lg x)) (freezer ISeq (write-coll out id-seq-0 id-seq-sm id-seq-md id-seq-lg x)) (freezer IRecord - (let [cname (.getName (class x)) ; Reflect - cname-ba (.getBytes cname charset) - len (alength cname-ba)] - (cond* + (let [class-name (.getName (class x)) ; Reflect + class-name-ba (.getBytes class-name charset) + len (alength class-name-ba)] + (enc/cond (sm-count? len) (do (write-id out id-record-sm) - (write-bytes-sm out cname-ba)) + (write-bytes-sm out class-name-ba)) - ;; Note no :record-lg freeze support (unrealistic) + (md-count? len) + (do (write-id out id-record-md) + (write-bytes-md out class-name-ba)) + + ;; :else ; Unrealistic + ;; (do (write-id out id-record-lg) + ;; (write-bytes-md out class-name-ba)) :else - (do (write-id out id-record-md) - (write-bytes-md out cname-ba))) + (throw + (ex-info "Record class name too long" + {:class-name class-name}))) (-freeze-without-meta! (into {} x) out))) (freezer IType - (let [aclass (class x) - cname (.getName aclass)] + (let [aclass (class x) + class-name (.getName aclass)] (write-id out id-type) - (write-str out cname) + (write-str out class-name) (let [basis-method (.getMethod aclass "getBasis" nil) basis (.invoke basis-method nil nil)] (-run! @@ -1212,14 +1199,14 @@ (let [;; Intentionally undocumented: no-header? (or (get opts :no-header?) - (get opts :skip-header?)) + (get opts :skip-header?)) encryptor (when password encryptor) baos (ByteArrayOutputStream. 64) dos (DataOutputStream. baos)] (if (and (nil? compressor) (nil? encryptor)) - (do ; Optimized case - (when-not no-header? ; Avoid `wrap-header`'s array copy: + (do ; Optimized case + (when-not no-header? ; Avoid `wrap-header`'s array copy: (let [head-ba (get-head-ba {:compressor-id nil :encryptor-id nil})] (.write dos head-ba 0 4))) (with-cache (-freeze-with-meta! x dos)) @@ -1240,8 +1227,8 @@ (when (> (alength ba) 8192) lz4-compressor))) (if (fn? compressor) - (compressor ba) ; Assume compressor selector fn - compressor ; Assume compressor + (compressor ba) ; Assume compressor selector fn + compressor ; Assume compressor )) ba (if compressor (compress compressor ba) ba) @@ -1277,10 +1264,10 @@ id-bytes-md (read-bytes in (read-md-count in)) id-bytes-lg (read-bytes in (read-lg-count in))))) -(defn- read-utf8-sm [^DataInput in] (String. ^bytes (read-bytes in (read-sm-count in)) charset)) -(defn- read-utf8-md [^DataInput in] (String. ^bytes (read-bytes in (read-md-count in)) charset)) -(defn- read-utf8-lg [^DataInput in] (String. ^bytes (read-bytes in (read-lg-count in)) charset)) -(defn- read-utf8 +(defn- read-str-sm [^DataInput in] (String. ^bytes (read-bytes in (read-sm-count in)) charset)) +(defn- read-str-md [^DataInput in] (String. ^bytes (read-bytes in (read-md-count in)) charset)) +(defn- read-str-lg [^DataInput in] (String. ^bytes (read-bytes in (read-lg-count in)) charset)) +(defn- read-str ([^DataInput in len] (String. ^bytes (read-bytes in len) charset)) ([^DataInput in ] (enc/case-eval (.readByte in) @@ -1364,7 +1351,10 @@ {:class-name class-name :content nil :serializable-whitelist-pass? true}}))) -(defn- read-serializable [^DataInput in class-name] +(defn- read-serializable-q + "Quarantined => object serialized to ba, then ba written to output stream. + Has length prefix => can skip `readObject` in event of whitelist failure." + [^DataInput in class-name] (let [quarantined-ba (read-bytes in)] (if (serializable-whitelisted? class-name) (read-object (DataInputStream. (ByteArrayInputStream. quarantined-ba)) class-name) @@ -1373,7 +1363,10 @@ {:class-name class-name :content quarantined-ba :serializable-whitelist-pass? false}}))) -(defn- read-serializable-depr1 [^DataInput in class-name] +(defn- read-serializable-uq + "Unquarantined => object serialized directly to output stream. + No length prefix => cannot skip `readObject` in event of whitelist failure." + [^DataInput in class-name] (if (serializable-whitelisted? class-name) (read-object in class-name) (throw ; No way to skip bytes, so best we can do is throw @@ -1427,15 +1420,20 @@ (try (enc/case-eval type-id - id-reader-sm (read-edn (read-utf8 in (read-sm-count in))) - id-reader-md (read-edn (read-utf8 in (read-md-count in))) - id-reader-lg (read-edn (read-utf8 in (read-lg-count in))) - id-reader-lg2 (read-edn (read-utf8 in (read-lg-count in))) - id-serializable-sm (read-serializable in (read-utf8 in (read-sm-count in))) - id-serializable-md (read-serializable in (read-utf8 in (read-md-count in))) - id-record-sm (read-record in (read-utf8 in (read-sm-count in))) - id-record-md (read-record in (read-utf8 in (read-md-count in))) - id-record-lg (read-record in (read-utf8 in (read-lg-count in))) + id-reader-sm (read-edn (read-str in (read-sm-count in))) + id-reader-md (read-edn (read-str in (read-md-count in))) + id-reader-lg (read-edn (read-str in (read-lg-count in))) + id-reader-lg2 (read-edn (read-str in (read-lg-count in))) + id-record-sm (read-record in (read-str in (read-sm-count in))) + id-record-md (read-record in (read-str in (read-md-count in))) + id-record-lg (read-record in (read-str in (read-lg-count in))) + + id-serializable-q-sm (read-serializable-q in (read-str in (read-sm-count in))) + id-serializable-q-md (read-serializable-q in (read-str in (read-md-count in))) + + id-serializable-uq-sm (read-serializable-uq in (read-str in (read-sm-count in))) + id-serializable-uq-md (read-serializable-uq in (read-str in (read-md-count in))) + id-serializable-uq-lg (read-serializable-uq in (read-str in (read-lg-count in))) id-type (read-type in (thaw-from-in! in)) @@ -1465,13 +1463,17 @@ id-objects-lg (read-objects (object-array (read-lg-count in)) in) id-str-0 "" - id-str-sm (read-utf8 in (read-sm-count in)) - id-str-md (read-utf8 in (read-md-count in)) - id-str-lg (read-utf8 in (read-lg-count in)) - id-kw-sm (keyword (read-utf8 in (read-sm-count in))) - id-kw-lg (keyword (read-utf8 in (read-lg-count in))) - id-sym-sm (symbol (read-utf8 in (read-sm-count in))) - id-sym-lg (symbol (read-utf8 in (read-lg-count in))) + id-str-sm (read-str in (read-sm-count in)) + id-str-md (read-str in (read-md-count in)) + id-str-lg (read-str in (read-lg-count in)) + + id-kw-sm (keyword (read-str in (read-sm-count in))) + id-kw-md (keyword (read-str in (read-md-count in))) + id-kw-lg (keyword (read-str in (read-lg-count in))) + + id-sym-sm (symbol (read-str in (read-sm-count in))) + id-sym-md (symbol (read-str in (read-md-count in))) + id-sym-lg (symbol (read-str in (read-lg-count in))) id-regex (re-pattern (thaw-from-in! in)) id-vec-0 [] @@ -1540,10 +1542,6 @@ id-map-depr1 (apply hash-map (enc/repeatedly-into [] (* 2 (.readInt in)) (fn [] (thaw-from-in! in)))) - - id-serializable-sm-depr1 (read-serializable-depr1 in (read-utf8 in (read-sm-count in))) - id-serializable-md-depr1 (read-serializable-depr1 in (read-utf8 in (read-md-count in))) - id-serializable-lg-depr1 (read-serializable-depr1 in (read-utf8 in (read-lg-count in))) ;; ----------------------------------------------------------------- id-prefixed-custom (read-custom! in :prefixed (.readShort in)) @@ -1883,7 +1881,7 @@ (when (enc/bytes? ba) (let [[first2bytes nextbytes] (enc/ba-split ba 2) ?known-wrapper - (cond* + (enc/cond (enc/ba= first2bytes (.getBytes "\u0000<" charset)) :carmine/bin (enc/ba= first2bytes (.getBytes "\u0000>" charset)) :carmine/clj) diff --git a/src/taoensso/nippy/utils.clj b/src/taoensso/nippy/utils.clj index 17ccd70..b1a8d23 100644 --- a/src/taoensso/nippy/utils.clj +++ b/src/taoensso/nippy/utils.clj @@ -22,19 +22,22 @@ (def readable? (memoize-type-test (fn [x] (-> x enc/pr-edn enc/read-edn) true))) (def serializable? - (let [test-fn - (fn [x] - (let [class-name (.getName (class x)) - class ^Class (Class/forName class-name) ; Try 1st (fail fast) - bas (ByteArrayOutputStream.) - _ (.writeObject (ObjectOutputStream. bas) x) - ba (.toByteArray bas) - object (.readObject (ObjectInputStream. - (ByteArrayInputStream. ba)))] - (cast class object) - true)) + (let [mtt + (memoize-type-test + (fn [x] + (let [class-name (.getName (class x)) + c (Class/forName class-name) ; Try 1st (fail fast) + bas (ByteArrayOutputStream.) + _ (.writeObject (ObjectOutputStream. bas) x) + ba (.toByteArray bas)] - mtt (memoize-type-test test-fn)] + #_ + (cast c + (.readObject ; Unsafe + usu. unnecessary to check + (ObjectInputStream. + (ByteArrayInputStream. ba)))) + + true)))] (fn [x] (if (instance? Serializable x) @@ -44,12 +47,12 @@ false)))) (comment - (enc/qb 10000 + (enc/qb 1e4 (readable? "Hello world") ; Cacheable (serializable? "Hello world") ; Cacheable - (readable? (fn [])) ; Uncacheable - (serializable? (fn [])) ; Uncacheable - )) + (readable? (fn [])) ; Uncacheable + (serializable? (fn [])) ; Uncacheable + )) ; [5.65 5.88 1129.46 1.4] ;;;;