From c2770c6e9977eeca333a52730b5e837d5887762c Mon Sep 17 00:00:00 2001 From: Peter Taoussanis Date: Thu, 11 Jan 2024 17:46:14 +0100 Subject: [PATCH] [mod] Refactor stress data BREAKING for the very small minority of folks that use `nippy/stress-data`. Changes: 1. Make `nippy/stress-data` a function It's unnecessarily wasteful to generate and store all this data when it's not being used in the common case. 2. Make data deterministic The stress data will now generally be stable by default between different versions of Nippy, etc. This will help support an upcoming test for stable serialized output. --- src/taoensso/nippy.clj | 157 +++++++++++++---------------- test/taoensso/nippy_benchmarks.clj | 40 ++++---- test/taoensso/nippy_tests.clj | 32 +++--- wiki/1 Getting-started.md | 123 ++++++++++------------ 4 files changed, 164 insertions(+), 188 deletions(-) diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index 0d4eb44..560b61a 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -1928,101 +1928,88 @@ (deftype StressType [my-data] Object (equals [a b] (= (.-my-data a) (.-my-data ^StressType b)))) -(def stress-data "Reference data used for tests & benchmarks" - {:nil nil - :true true - :false false - :boxed-false (Boolean. false) +(defn stress-data + "Returns map of reference stress data for use by tests, benchmarks, etc." + [{:keys [comparable?] :as opts}] + (let [rng (java.util.Random. 123456) ; Seeded for determinism + rand-nth (fn [coll] (nth coll (.nextInt rng (count coll)))) + all + {:nil nil + :true true + :false false + :false-boxed (Boolean. false) - :char \ಬ - :str-short "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" - :str-long (apply str (range 1000)) - :kw :keyword - :kw-ns ::keyword - :kw-long (keyword - (apply str "kw" (range 1000)) - (apply str "kw" (range 1000))) + :char \ಬ + :str-short "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" + :str-long (reduce str (range 1024)) + :kw :keyword + :kw-ns ::keyword + :sym 'foo + :sym-ns 'foo/bar + :kw-long (keyword (reduce str "_" (range 128)) (reduce str "_" (range 128))) + :sym-long (symbol (reduce str "_" (range 128)) (reduce str "_" (range 128))) - :sym 'foo - :sym-ns 'foo/bar - :sym-long (symbol - (apply str "sym" (range 1000)) - (apply str "sym" (range 1000))) + :byte (byte 16) + :short (short 42) + :integer (int 3) + :long (long 3) + :float (float 3.1415926535897932384626433832795) + :double (double 3.1415926535897932384626433832795) + :bigdec (bigdec 3.1415926535897932384626433832795) + :bigint (bigint 31415926535897932384626433832795) + :ratio 22/7 - :regex #"^(https?:)?//(www\?|\?)?" + :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 (list) ()))) + :vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]] + :subvec (subvec [1 2 3 4 5 6 7 8] 2 8) + :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}} + :map-entry (clojure.lang.MapEntry/create "key" "val") + :set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}} + :meta (with-meta {:a :A} {:metakey :metaval}) + :nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}} + #{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}} + [1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"] {} #{} [] ()]]]]] - ;;; Try reflect real-world data: - :many-small-numbers (vec (range 200)) - :many-small-keywords (->> (java.util.Locale/getISOLanguages) - (mapv keyword)) - :many-small-strings (->> (java.util.Locale/getISOCountries) - (mapv #(.getDisplayCountry (java.util.Locale. "en" %)))) + :regex #"^(https?:)?//(www\?|\?)?" + :sorted-set (sorted-set 1 2 3 4 5) + :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) + :lazy-seq-empty (map identity ()) + :lazy-seq (repeatedly 64 #(do nil)) + :queue (into clojure.lang.PersistentQueue/EMPTY [:a :b :c :d :e :f :g]) + :queue-empty clojure.lang.PersistentQueue/EMPTY - :queue (enc/queue [:a :b :c :d :e :f :g]) - :queue-empty (enc/queue) - :sorted-set (sorted-set 1 2 3 4 5) - :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) + :uuid (java.util.UUID. 7232453380187312026 -7067939076204274491) + :uri (java.net.URI. "https://clojure.org") + :defrecord (StressRecord. "data") + :deftype (StressType. "data") + :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + :objects (object-array [1 "two" {:data "data"}]) - :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 '(())))) - :vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]] - :subvec (subvec [1 2 3 4 5 6 7 8] 2 8) - :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}} - :map-entry (clojure.lang.MapEntry. "key" "val") - :set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}} - :meta (with-meta {:a :A} {:metakey :metaval}) - :nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}} - #{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}} - [1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5]]]]]] + :util-date (java.util.Date. 1577884455500) + :sql-date (java.sql.Date. 1577884455500) + :instant (enc/compile-if java.time.Instant (java.time.Instant/parse "2020-01-01T13:14:15.50Z") ::skip) + :duration (enc/compile-if java.time.Duration (java.time.Duration/ofSeconds 100 100) ::skip) + :period (enc/compile-if java.time.Period (java.time.Period/of 1 1 1) ::skip) - :lazy-seq (repeatedly 1000 rand) - :lazy-seq-empty (map identity '()) + :throwable (Throwable. "Msg") + :exception (Exception. "Msg") + :ex-info (ex-info "Msg" {:data "data"}) - :byte (byte 16) - :short (short 42) - :integer (int 3) - :long (long 3) - :bigint (bigint 31415926535897932384626433832795) + :many-longs (vec (repeatedly 512 #(rand-nth (range 10)))) + :many-doubles (vec (repeatedly 512 #(double (rand-nth (range 10))))) + :many-strings (vec (repeatedly 512 #(rand-nth ["foo" "bar" "baz" "qux"]))) + :many-keywords (vec (repeatedly 512 + #(keyword + (rand-nth ["foo" "bar" "baz" "qux" nil]) + (rand-nth ["foo" "bar" "baz" "qux" ]))))}] - :float (float 3.14) - :double (double 3.14) - :bigdec (bigdec 3.1415926535897932384626433832795) + (if comparable? + (dissoc all :bytes :objects :throwable :exception :ex-info :regex) + (do all)))) - :ratio 22/7 - :uri (java.net.URI. "https://clojure.org/reference/data_structures") - :uuid (java.util.UUID/randomUUID) - :util-date (java.util.Date.) - :sql-date (java.sql.Date/valueOf "2023-06-21") - - ;;; JVM 8+ - :time-instant (enc/compile-if java.time.Instant (java.time.Instant/now) nil) - :time-duration (enc/compile-if java.time.Duration (java.time.Duration/ofSeconds 100 100) nil) - :time-period (enc/compile-if java.time.Period (java.time.Period/of 1 1 1) nil) - - :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) - :objects (object-array [1 "two" {:data "data"}]) - - :stress-record (StressRecord. "data") - :stress-type (StressType. "data") - - ;; Serializable - :throwable (Throwable. "Yolo") - :exception (try (/ 1 0) (catch Exception e e)) - :ex-info (ex-info "ExInfo" {:data "data"})}) - -(def stress-data-comparable - "Reference data with stuff removed that breaks roundtrip equality." - (dissoc stress-data :bytes :objects :throwable :exception :ex-info :regex)) - -(comment (let [data stress-data-comparable] (= (thaw (freeze data)) data))) - -(def stress-data-benchable - "Reference data with stuff removed that breaks reader or other utils we'll - be benching with." - (dissoc stress-data-comparable - :queue :queue-empty - :stress-record :stress-type - :time-instant :time-duration :time-period - :byte :uri)) +(comment + [(= (stress-data {:comparable? true}) (stress-data {:comparable? true})) + (let [d (stress-data {:comparable? true})] (= (thaw (freeze d)) d))]) ;;;; Tools diff --git a/test/taoensso/nippy_benchmarks.clj b/test/taoensso/nippy_benchmarks.clj index 7fcbd29..f5d8378 100644 --- a/test/taoensso/nippy_benchmarks.clj +++ b/test/taoensso/nippy_benchmarks.clj @@ -27,30 +27,29 @@ ;;;; Benchable data -(def data - "Map of data suitable for benching, a subset of - `nippy/stress-data-comparable`." - (reduce-kv - (fn [m k v] - (try - (-> v freeze-reader thaw-reader) - (-> v freeze-fress thaw-fress) - m - (catch Throwable _ (dissoc m k)))) - nippy/stress-data-comparable - nippy/stress-data-comparable)) +(def bench-data + "Subset of stress data suitable for benching." + (let [sd (nippy/stress-data {:comparable? true})] + (reduce-kv + (fn [m k v] + (try + (-> v freeze-reader thaw-reader) + (-> v freeze-fress thaw-fress) + m + (catch Throwable _ (dissoc m k)))) + sd sd))) (comment (clojure.set/difference - (set (keys nippy/stress-data-comparable)) - (set (keys data)))) + (set (keys (nippy/stress-data {:comparable? true}))) + (set (keys bench-data)))) ;;;; (defn- bench1 [{:keys [laps warmup] :or {laps 1e4, warmup 25e3}} freezer thawer sizer] - (let [data-frozen (freezer data) - time-freeze (enc/bench laps {:warmup-laps warmup} (freezer data)) + (let [data-frozen (freezer bench-data) + time-freeze (enc/bench laps {:warmup-laps warmup} (freezer bench-data)) time-thaw (enc/bench laps {:warmup-laps warmup} (thawer data-frozen)) data-size (sizer data-frozen)] @@ -127,19 +126,18 @@ ;;;; Compressors -(let [_ (require '[taoensso.nippy :as nippy]) - data (nippy/freeze nippy/stress-data-comparable {:compressor nil})] +(let [bench-data (nippy/freeze (nippy/stress-data {:comparable? true}) {:compressor nil})] (defn bench1-compressor [{:keys [laps warmup] :or {laps 1e4, warmup 2e4}} compressor] - (let [data-compressed (compr/compress compressor data) - time-compress (enc/bench laps {:warmup-laps warmup} (compr/compress compressor data)) + (let [data-compressed (compr/compress compressor bench-data) + time-compress (enc/bench laps {:warmup-laps warmup} (compr/compress compressor bench-data)) time-decompress (enc/bench laps {:warmup-laps warmup} (compr/decompress compressor data-compressed))] {:round (+ time-compress time-decompress) :compress time-compress :decompress time-decompress - :ratio (enc/round2 (/ (count data-compressed) (count data)))})) + :ratio (enc/round2 (/ (count data-compressed) (count bench-data)))})) (defn bench-compressors [bench1-opts lzma-opts] (merge diff --git a/test/taoensso/nippy_tests.clj b/test/taoensso/nippy_tests.clj index 76b9d6c..bae418a 100644 --- a/test/taoensso/nippy_tests.clj +++ b/test/taoensso/nippy_tests.clj @@ -16,7 +16,7 @@ ;;;; Config, etc. -(def test-data nippy/stress-data-comparable) +(def test-data (nippy/stress-data {:comparable? true})) (def tc-gen-recursive-any-equatable (tc-gens/recursive-gen tc-gens/container-type tc-gens/any-equatable)) @@ -35,7 +35,13 @@ ;;;; Core (deftest _core - [(println (str "Clojure version: " *clojure-version*)) + (println (str "Clojure version: " *clojure-version*)) + [(is (= test-data test-data) "Test data is comparable") + (is (= + (nippy/stress-data {:comparable? true}) + (nippy/stress-data {:comparable? true})) + "Stress data is deterministic") + (is (= test-data ((comp thaw freeze) test-data))) (is (= test-data ((comp #(thaw % {:no-header? true :compressor nippy/lz4-compressor @@ -47,8 +53,9 @@ #(freeze % {:password [:salted "p"]})) test-data))) - (is (= (vec (:objects nippy/stress-data)) - ((comp vec thaw freeze) (:objects nippy/stress-data)))) + (let [d (nippy/stress-data {})] + [(is (= (vec (:bytes d)) ((comp vec thaw freeze) (:bytes d)))) + (is (= (vec (:objects d)) ((comp vec thaw freeze) (:objects d))))]) (is (= test-data ((comp #(thaw % {:compressor nippy/lzma2-compressor}) #(freeze % {:compressor nippy/lzma2-compressor})) @@ -141,18 +148,15 @@ ;;;; Caching (deftest _caching - (let [stress [nippy/stress-data-comparable - nippy/stress-data-comparable - nippy/stress-data-comparable - nippy/stress-data-comparable] - cached (mapv nippy/cache stress) - cached (mapv nippy/cache stress) ; <=1 wrap auto-enforced + (let [test-data* [test-data test-data test-data test-data] ; Data with duplicates + cached (mapv nippy/cache test-data*) + cached (mapv nippy/cache test-data*) ; <=1 wrap auto-enforced ] - [(is (= stress (thaw (freeze stress {:compressor nil})))) - (is (= stress (thaw (freeze cached {:compressor nil})))) - (let [size-stress (count (freeze stress {:compressor nil})) - size-cached (count (freeze cached {:compressor nil}))] + [(is (= test-data* (thaw (freeze test-data* {:compressor nil})))) + (is (= test-data* (thaw (freeze cached {:compressor nil})))) + (let [size-stress (count (freeze test-data* {:compressor nil})) + size-cached (count (freeze cached {:compressor nil}))] (is (>= size-stress (* 3 size-cached))) (is (< size-stress (* 4 size-cached))))])) diff --git a/wiki/1 Getting-started.md b/wiki/1 Getting-started.md index 83531ba..7f6b238 100644 --- a/wiki/1 Getting-started.md +++ b/wiki/1 Getting-started.md @@ -15,96 +15,83 @@ And setup your namespace imports: # De/serializing -As an example of what it can do, let's take a look at Nippy's own reference stress data: +As an example of what it can do, let's take a look at Nippy's own reference [stress data](https://taoensso.github.io/nippy/taoensso.nippy.html#var-stress-data): ```clojure -nippy/stress-data -=> {:nil nil :true true :false false - :boxed-false (Boolean. false) + :false-boxed (Boolean. false) :char \ಬ :str-short "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" - :str-long (apply str (range 1000)) + :str-long (reduce str (range 1024)) :kw :keyword :kw-ns ::keyword - :kw-long (keyword - (apply str "kw" (range 1000)) - (apply str "kw" (range 1000))) - :sym 'foo :sym-ns 'foo/bar - :sym-long (symbol - (apply str "sym" (range 1000)) - (apply str "sym" (range 1000))) + :kw-long (keyword (reduce str "_" (range 128)) (reduce str "_" (range 128))) + :sym-long (symbol (reduce str "_" (range 128)) (reduce str "_" (range 128))) - :regex #"^(https?:)?//(www\?|\?)?" + :byte (byte 16) + :short (short 42) + :integer (int 3) + :long (long 3) + :float (float 3.1415926535897932384626433832795) + :double (double 3.1415926535897932384626433832795) + :bigdec (bigdec 3.1415926535897932384626433832795) + :bigint (bigint 31415926535897932384626433832795) + :ratio 22/7 - ;;; Try reflect real-world data: - :many-small-numbers (vec (range 200)) - :many-small-keywords (->> (java.util.Locale/getISOLanguages) - (mapv keyword)) - :many-small-strings (->> (java.util.Locale/getISOCountries) - (mapv #(.getDisplayCountry (java.util.Locale. "en" %)))) + :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 (list) ()))) + :vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]] + :subvec (subvec [1 2 3 4 5 6 7 8] 2 8) + :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}} + :map-entry (clojure.lang.MapEntry/create "key" "val") + :set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}} + :meta (with-meta {:a :A} {:metakey :metaval}) + :nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}} + #{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}} + [1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"] {} #{} [] ()]]]]] - :queue (enc/queue [:a :b :c :d :e :f :g]) - :queue-empty (enc/queue) - :sorted-set (sorted-set 1 2 3 4 5) - :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) + :regex #"^(https?:)?//(www\?|\?)?" + :sorted-set (sorted-set 1 2 3 4 5) + :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) + :lazy-seq-empty (map identity ()) + :lazy-seq (repeatedly 64 #(do nil)) + :queue-empty (into clojure.lang.PersistentQueue/EMPTY [:a :b :c :d :e :f :g]) + :queue clojure.lang.PersistentQueue/EMPTY - :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 '(())))) - :vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]] - :subvec (subvec [1 2 3 4 5 6 7 8] 2 8) - :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}} - :map-entry (clojure.lang.MapEntry. "key" "val") - :set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}} - :meta (with-meta {:a :A} {:metakey :metaval}) - :nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}} - #{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}} - [1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5]]]]]] + :uuid (java.util.UUID. 7232453380187312026 -7067939076204274491) + :uri (java.net.URI. "https://clojure.org") + :defrecord (nippy/StressRecord. "data") + :deftype (nippy/StressType. "data") + :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + :objects (object-array [1 "two" {:data "data"}]) - :lazy-seq (repeatedly 1000 rand) - :lazy-seq-empty (map identity '()) + :util-date (java.util.Date. 1577884455500) + :sql-date (java.sql.Date. 1577884455500) + :instant (java.time.Instant/parse "2020-01-01T13:14:15.50Z") + :duration (java.time.Duration/ofSeconds 100 100) + :period (java.time.Period/of 1 1 1) - :byte (byte 16) - :short (short 42) - :integer (int 3) - :long (long 3) - :bigint (bigint 31415926535897932384626433832795) + :throwable (Throwable. "Msg") + :exception (Exception. "Msg") + :ex-info (ex-info "Msg" {:data "data"}) - :float (float 3.14) - :double (double 3.14) - :bigdec (bigdec 3.1415926535897932384626433832795) - - :ratio 22/7 - :uri (java.net.URI. "https://clojure.org/reference/data_structures") - :uuid (java.util.UUID/randomUUID) - :util-date (java.util.Date.) - :sql-date (java.sql.Date/valueOf "2023-06-21") - - ;;; JVM 8+ - :time-instant (enc/compile-if java.time.Instant (java.time.Instant/now) nil) - :time-duration (enc/compile-if java.time.Duration (java.time.Duration/ofSeconds 100 100) nil) - :time-period (enc/compile-if java.time.Period (java.time.Period/of 1 1 1) nil) - - :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) - :objects (object-array [1 "two" {:data "data"}]) - - :stress-record (StressRecord. "data") - :stress-type (StressType. "data") - - ;; Serializable - :throwable (Throwable. "Yolo") - :exception (try (/ 1 0) (catch Exception e e)) - :ex-info (ex-info "ExInfo" {:data "data"})} + :many-longs (vec (repeatedly 512 #(rand-nth (range 10)))) + :many-doubles (vec (repeatedly 512 #(double (rand-nth (range 10))))) + :many-strings (vec (repeatedly 512 #(rand-nth ["foo" "bar" "baz" "qux"]))) + :many-keywords (vec (repeatedly 512 + #(keyword + (rand-nth ["foo" "bar" "baz" "qux" nil]) + (rand-nth ["foo" "bar" "baz" "qux" ]))))} ``` Serialize it: ```clojure -(def frozen-stress-data (nippy/freeze nippy/stress-data)) +(def frozen-stress-data (nippy/freeze (nippy/stress-data {}))) => # ``` @@ -130,8 +117,8 @@ Nippy also gives you **dead simple data encryption**. Add a single option to your usual freeze/thaw calls like so: ```clojure -(nippy/freeze nippy/stress-data {:password [:salted "my-password"]}) ; Encrypt -(nippy/thaw {:password [:salted "my-password"]}) ; Decrypt +(nippy/freeze (nippy/stress-data {}) {:password [:salted "my-password"]}) ; Encrypt +(nippy/thaw {:password [:salted "my-password"]}) ; Decrypt ``` There's two default forms of encryption on offer: `:salted` and `:cached`. Each of these makes carefully-chosen trade-offs and is suited to one of two common use cases. See [`aes128-encryptor`](https://taoensso.github.io/nippy/taoensso.nippy.html#var-aes128-encryptor) for a detailed explanation of why/when you'd want one or the other.