diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index 0d4eb44..560b61a 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -1928,101 +1928,88 @@ (deftype StressType [my-data] Object (equals [a b] (= (.-my-data a) (.-my-data ^StressType b)))) -(def stress-data "Reference data used for tests & benchmarks" - {:nil nil - :true true - :false false - :boxed-false (Boolean. false) +(defn stress-data + "Returns map of reference stress data for use by tests, benchmarks, etc." + [{:keys [comparable?] :as opts}] + (let [rng (java.util.Random. 123456) ; Seeded for determinism + rand-nth (fn [coll] (nth coll (.nextInt rng (count coll)))) + all + {:nil nil + :true true + :false false + :false-boxed (Boolean. false) - :char \ಬ - :str-short "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" - :str-long (apply str (range 1000)) - :kw :keyword - :kw-ns ::keyword - :kw-long (keyword - (apply str "kw" (range 1000)) - (apply str "kw" (range 1000))) + :char \ಬ + :str-short "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" + :str-long (reduce str (range 1024)) + :kw :keyword + :kw-ns ::keyword + :sym 'foo + :sym-ns 'foo/bar + :kw-long (keyword (reduce str "_" (range 128)) (reduce str "_" (range 128))) + :sym-long (symbol (reduce str "_" (range 128)) (reduce str "_" (range 128))) - :sym 'foo - :sym-ns 'foo/bar - :sym-long (symbol - (apply str "sym" (range 1000)) - (apply str "sym" (range 1000))) + :byte (byte 16) + :short (short 42) + :integer (int 3) + :long (long 3) + :float (float 3.1415926535897932384626433832795) + :double (double 3.1415926535897932384626433832795) + :bigdec (bigdec 3.1415926535897932384626433832795) + :bigint (bigint 31415926535897932384626433832795) + :ratio 22/7 - :regex #"^(https?:)?//(www\?|\?)?" + :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 (list) ()))) + :vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]] + :subvec (subvec [1 2 3 4 5 6 7 8] 2 8) + :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}} + :map-entry (clojure.lang.MapEntry/create "key" "val") + :set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}} + :meta (with-meta {:a :A} {:metakey :metaval}) + :nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}} + #{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}} + [1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"] {} #{} [] ()]]]]] - ;;; Try reflect real-world data: - :many-small-numbers (vec (range 200)) - :many-small-keywords (->> (java.util.Locale/getISOLanguages) - (mapv keyword)) - :many-small-strings (->> (java.util.Locale/getISOCountries) - (mapv #(.getDisplayCountry (java.util.Locale. "en" %)))) + :regex #"^(https?:)?//(www\?|\?)?" + :sorted-set (sorted-set 1 2 3 4 5) + :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) + :lazy-seq-empty (map identity ()) + :lazy-seq (repeatedly 64 #(do nil)) + :queue (into clojure.lang.PersistentQueue/EMPTY [:a :b :c :d :e :f :g]) + :queue-empty clojure.lang.PersistentQueue/EMPTY - :queue (enc/queue [:a :b :c :d :e :f :g]) - :queue-empty (enc/queue) - :sorted-set (sorted-set 1 2 3 4 5) - :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) + :uuid (java.util.UUID. 7232453380187312026 -7067939076204274491) + :uri (java.net.URI. "https://clojure.org") + :defrecord (StressRecord. "data") + :deftype (StressType. "data") + :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + :objects (object-array [1 "two" {:data "data"}]) - :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 '(())))) - :vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]] - :subvec (subvec [1 2 3 4 5 6 7 8] 2 8) - :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}} - :map-entry (clojure.lang.MapEntry. "key" "val") - :set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}} - :meta (with-meta {:a :A} {:metakey :metaval}) - :nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}} - #{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}} - [1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5]]]]]] + :util-date (java.util.Date. 1577884455500) + :sql-date (java.sql.Date. 1577884455500) + :instant (enc/compile-if java.time.Instant (java.time.Instant/parse "2020-01-01T13:14:15.50Z") ::skip) + :duration (enc/compile-if java.time.Duration (java.time.Duration/ofSeconds 100 100) ::skip) + :period (enc/compile-if java.time.Period (java.time.Period/of 1 1 1) ::skip) - :lazy-seq (repeatedly 1000 rand) - :lazy-seq-empty (map identity '()) + :throwable (Throwable. "Msg") + :exception (Exception. "Msg") + :ex-info (ex-info "Msg" {:data "data"}) - :byte (byte 16) - :short (short 42) - :integer (int 3) - :long (long 3) - :bigint (bigint 31415926535897932384626433832795) + :many-longs (vec (repeatedly 512 #(rand-nth (range 10)))) + :many-doubles (vec (repeatedly 512 #(double (rand-nth (range 10))))) + :many-strings (vec (repeatedly 512 #(rand-nth ["foo" "bar" "baz" "qux"]))) + :many-keywords (vec (repeatedly 512 + #(keyword + (rand-nth ["foo" "bar" "baz" "qux" nil]) + (rand-nth ["foo" "bar" "baz" "qux" ]))))}] - :float (float 3.14) - :double (double 3.14) - :bigdec (bigdec 3.1415926535897932384626433832795) + (if comparable? + (dissoc all :bytes :objects :throwable :exception :ex-info :regex) + (do all)))) - :ratio 22/7 - :uri (java.net.URI. "https://clojure.org/reference/data_structures") - :uuid (java.util.UUID/randomUUID) - :util-date (java.util.Date.) - :sql-date (java.sql.Date/valueOf "2023-06-21") - - ;;; JVM 8+ - :time-instant (enc/compile-if java.time.Instant (java.time.Instant/now) nil) - :time-duration (enc/compile-if java.time.Duration (java.time.Duration/ofSeconds 100 100) nil) - :time-period (enc/compile-if java.time.Period (java.time.Period/of 1 1 1) nil) - - :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) - :objects (object-array [1 "two" {:data "data"}]) - - :stress-record (StressRecord. "data") - :stress-type (StressType. "data") - - ;; Serializable - :throwable (Throwable. "Yolo") - :exception (try (/ 1 0) (catch Exception e e)) - :ex-info (ex-info "ExInfo" {:data "data"})}) - -(def stress-data-comparable - "Reference data with stuff removed that breaks roundtrip equality." - (dissoc stress-data :bytes :objects :throwable :exception :ex-info :regex)) - -(comment (let [data stress-data-comparable] (= (thaw (freeze data)) data))) - -(def stress-data-benchable - "Reference data with stuff removed that breaks reader or other utils we'll - be benching with." - (dissoc stress-data-comparable - :queue :queue-empty - :stress-record :stress-type - :time-instant :time-duration :time-period - :byte :uri)) +(comment + [(= (stress-data {:comparable? true}) (stress-data {:comparable? true})) + (let [d (stress-data {:comparable? true})] (= (thaw (freeze d)) d))]) ;;;; Tools diff --git a/test/taoensso/nippy_benchmarks.clj b/test/taoensso/nippy_benchmarks.clj index 7fcbd29..f5d8378 100644 --- a/test/taoensso/nippy_benchmarks.clj +++ b/test/taoensso/nippy_benchmarks.clj @@ -27,30 +27,29 @@ ;;;; Benchable data -(def data - "Map of data suitable for benching, a subset of - `nippy/stress-data-comparable`." - (reduce-kv - (fn [m k v] - (try - (-> v freeze-reader thaw-reader) - (-> v freeze-fress thaw-fress) - m - (catch Throwable _ (dissoc m k)))) - nippy/stress-data-comparable - nippy/stress-data-comparable)) +(def bench-data + "Subset of stress data suitable for benching." + (let [sd (nippy/stress-data {:comparable? true})] + (reduce-kv + (fn [m k v] + (try + (-> v freeze-reader thaw-reader) + (-> v freeze-fress thaw-fress) + m + (catch Throwable _ (dissoc m k)))) + sd sd))) (comment (clojure.set/difference - (set (keys nippy/stress-data-comparable)) - (set (keys data)))) + (set (keys (nippy/stress-data {:comparable? true}))) + (set (keys bench-data)))) ;;;; (defn- bench1 [{:keys [laps warmup] :or {laps 1e4, warmup 25e3}} freezer thawer sizer] - (let [data-frozen (freezer data) - time-freeze (enc/bench laps {:warmup-laps warmup} (freezer data)) + (let [data-frozen (freezer bench-data) + time-freeze (enc/bench laps {:warmup-laps warmup} (freezer bench-data)) time-thaw (enc/bench laps {:warmup-laps warmup} (thawer data-frozen)) data-size (sizer data-frozen)] @@ -127,19 +126,18 @@ ;;;; Compressors -(let [_ (require '[taoensso.nippy :as nippy]) - data (nippy/freeze nippy/stress-data-comparable {:compressor nil})] +(let [bench-data (nippy/freeze (nippy/stress-data {:comparable? true}) {:compressor nil})] (defn bench1-compressor [{:keys [laps warmup] :or {laps 1e4, warmup 2e4}} compressor] - (let [data-compressed (compr/compress compressor data) - time-compress (enc/bench laps {:warmup-laps warmup} (compr/compress compressor data)) + (let [data-compressed (compr/compress compressor bench-data) + time-compress (enc/bench laps {:warmup-laps warmup} (compr/compress compressor bench-data)) time-decompress (enc/bench laps {:warmup-laps warmup} (compr/decompress compressor data-compressed))] {:round (+ time-compress time-decompress) :compress time-compress :decompress time-decompress - :ratio (enc/round2 (/ (count data-compressed) (count data)))})) + :ratio (enc/round2 (/ (count data-compressed) (count bench-data)))})) (defn bench-compressors [bench1-opts lzma-opts] (merge diff --git a/test/taoensso/nippy_tests.clj b/test/taoensso/nippy_tests.clj index 76b9d6c..bae418a 100644 --- a/test/taoensso/nippy_tests.clj +++ b/test/taoensso/nippy_tests.clj @@ -16,7 +16,7 @@ ;;;; Config, etc. -(def test-data nippy/stress-data-comparable) +(def test-data (nippy/stress-data {:comparable? true})) (def tc-gen-recursive-any-equatable (tc-gens/recursive-gen tc-gens/container-type tc-gens/any-equatable)) @@ -35,7 +35,13 @@ ;;;; Core (deftest _core - [(println (str "Clojure version: " *clojure-version*)) + (println (str "Clojure version: " *clojure-version*)) + [(is (= test-data test-data) "Test data is comparable") + (is (= + (nippy/stress-data {:comparable? true}) + (nippy/stress-data {:comparable? true})) + "Stress data is deterministic") + (is (= test-data ((comp thaw freeze) test-data))) (is (= test-data ((comp #(thaw % {:no-header? true :compressor nippy/lz4-compressor @@ -47,8 +53,9 @@ #(freeze % {:password [:salted "p"]})) test-data))) - (is (= (vec (:objects nippy/stress-data)) - ((comp vec thaw freeze) (:objects nippy/stress-data)))) + (let [d (nippy/stress-data {})] + [(is (= (vec (:bytes d)) ((comp vec thaw freeze) (:bytes d)))) + (is (= (vec (:objects d)) ((comp vec thaw freeze) (:objects d))))]) (is (= test-data ((comp #(thaw % {:compressor nippy/lzma2-compressor}) #(freeze % {:compressor nippy/lzma2-compressor})) @@ -141,18 +148,15 @@ ;;;; Caching (deftest _caching - (let [stress [nippy/stress-data-comparable - nippy/stress-data-comparable - nippy/stress-data-comparable - nippy/stress-data-comparable] - cached (mapv nippy/cache stress) - cached (mapv nippy/cache stress) ; <=1 wrap auto-enforced + (let [test-data* [test-data test-data test-data test-data] ; Data with duplicates + cached (mapv nippy/cache test-data*) + cached (mapv nippy/cache test-data*) ; <=1 wrap auto-enforced ] - [(is (= stress (thaw (freeze stress {:compressor nil})))) - (is (= stress (thaw (freeze cached {:compressor nil})))) - (let [size-stress (count (freeze stress {:compressor nil})) - size-cached (count (freeze cached {:compressor nil}))] + [(is (= test-data* (thaw (freeze test-data* {:compressor nil})))) + (is (= test-data* (thaw (freeze cached {:compressor nil})))) + (let [size-stress (count (freeze test-data* {:compressor nil})) + size-cached (count (freeze cached {:compressor nil}))] (is (>= size-stress (* 3 size-cached))) (is (< size-stress (* 4 size-cached))))])) diff --git a/wiki/1 Getting-started.md b/wiki/1 Getting-started.md index 83531ba..7f6b238 100644 --- a/wiki/1 Getting-started.md +++ b/wiki/1 Getting-started.md @@ -15,96 +15,83 @@ And setup your namespace imports: # De/serializing -As an example of what it can do, let's take a look at Nippy's own reference stress data: +As an example of what it can do, let's take a look at Nippy's own reference [stress data](https://taoensso.github.io/nippy/taoensso.nippy.html#var-stress-data): ```clojure -nippy/stress-data -=> {:nil nil :true true :false false - :boxed-false (Boolean. false) + :false-boxed (Boolean. false) :char \ಬ :str-short "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" - :str-long (apply str (range 1000)) + :str-long (reduce str (range 1024)) :kw :keyword :kw-ns ::keyword - :kw-long (keyword - (apply str "kw" (range 1000)) - (apply str "kw" (range 1000))) - :sym 'foo :sym-ns 'foo/bar - :sym-long (symbol - (apply str "sym" (range 1000)) - (apply str "sym" (range 1000))) + :kw-long (keyword (reduce str "_" (range 128)) (reduce str "_" (range 128))) + :sym-long (symbol (reduce str "_" (range 128)) (reduce str "_" (range 128))) - :regex #"^(https?:)?//(www\?|\?)?" + :byte (byte 16) + :short (short 42) + :integer (int 3) + :long (long 3) + :float (float 3.1415926535897932384626433832795) + :double (double 3.1415926535897932384626433832795) + :bigdec (bigdec 3.1415926535897932384626433832795) + :bigint (bigint 31415926535897932384626433832795) + :ratio 22/7 - ;;; Try reflect real-world data: - :many-small-numbers (vec (range 200)) - :many-small-keywords (->> (java.util.Locale/getISOLanguages) - (mapv keyword)) - :many-small-strings (->> (java.util.Locale/getISOCountries) - (mapv #(.getDisplayCountry (java.util.Locale. "en" %)))) + :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 (list) ()))) + :vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]] + :subvec (subvec [1 2 3 4 5 6 7 8] 2 8) + :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}} + :map-entry (clojure.lang.MapEntry/create "key" "val") + :set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}} + :meta (with-meta {:a :A} {:metakey :metaval}) + :nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}} + #{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}} + [1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"] {} #{} [] ()]]]]] - :queue (enc/queue [:a :b :c :d :e :f :g]) - :queue-empty (enc/queue) - :sorted-set (sorted-set 1 2 3 4 5) - :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) + :regex #"^(https?:)?//(www\?|\?)?" + :sorted-set (sorted-set 1 2 3 4 5) + :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) + :lazy-seq-empty (map identity ()) + :lazy-seq (repeatedly 64 #(do nil)) + :queue-empty (into clojure.lang.PersistentQueue/EMPTY [:a :b :c :d :e :f :g]) + :queue clojure.lang.PersistentQueue/EMPTY - :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 '(())))) - :vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]] - :subvec (subvec [1 2 3 4 5 6 7 8] 2 8) - :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}} - :map-entry (clojure.lang.MapEntry. "key" "val") - :set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}} - :meta (with-meta {:a :A} {:metakey :metaval}) - :nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}} - #{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}} - [1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5]]]]]] + :uuid (java.util.UUID. 7232453380187312026 -7067939076204274491) + :uri (java.net.URI. "https://clojure.org") + :defrecord (nippy/StressRecord. "data") + :deftype (nippy/StressType. "data") + :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + :objects (object-array [1 "two" {:data "data"}]) - :lazy-seq (repeatedly 1000 rand) - :lazy-seq-empty (map identity '()) + :util-date (java.util.Date. 1577884455500) + :sql-date (java.sql.Date. 1577884455500) + :instant (java.time.Instant/parse "2020-01-01T13:14:15.50Z") + :duration (java.time.Duration/ofSeconds 100 100) + :period (java.time.Period/of 1 1 1) - :byte (byte 16) - :short (short 42) - :integer (int 3) - :long (long 3) - :bigint (bigint 31415926535897932384626433832795) + :throwable (Throwable. "Msg") + :exception (Exception. "Msg") + :ex-info (ex-info "Msg" {:data "data"}) - :float (float 3.14) - :double (double 3.14) - :bigdec (bigdec 3.1415926535897932384626433832795) - - :ratio 22/7 - :uri (java.net.URI. "https://clojure.org/reference/data_structures") - :uuid (java.util.UUID/randomUUID) - :util-date (java.util.Date.) - :sql-date (java.sql.Date/valueOf "2023-06-21") - - ;;; JVM 8+ - :time-instant (enc/compile-if java.time.Instant (java.time.Instant/now) nil) - :time-duration (enc/compile-if java.time.Duration (java.time.Duration/ofSeconds 100 100) nil) - :time-period (enc/compile-if java.time.Period (java.time.Period/of 1 1 1) nil) - - :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) - :objects (object-array [1 "two" {:data "data"}]) - - :stress-record (StressRecord. "data") - :stress-type (StressType. "data") - - ;; Serializable - :throwable (Throwable. "Yolo") - :exception (try (/ 1 0) (catch Exception e e)) - :ex-info (ex-info "ExInfo" {:data "data"})} + :many-longs (vec (repeatedly 512 #(rand-nth (range 10)))) + :many-doubles (vec (repeatedly 512 #(double (rand-nth (range 10))))) + :many-strings (vec (repeatedly 512 #(rand-nth ["foo" "bar" "baz" "qux"]))) + :many-keywords (vec (repeatedly 512 + #(keyword + (rand-nth ["foo" "bar" "baz" "qux" nil]) + (rand-nth ["foo" "bar" "baz" "qux" ]))))} ``` Serialize it: ```clojure -(def frozen-stress-data (nippy/freeze nippy/stress-data)) +(def frozen-stress-data (nippy/freeze (nippy/stress-data {}))) => # ``` @@ -130,8 +117,8 @@ Nippy also gives you **dead simple data encryption**. Add a single option to your usual freeze/thaw calls like so: ```clojure -(nippy/freeze nippy/stress-data {:password [:salted "my-password"]}) ; Encrypt -(nippy/thaw {:password [:salted "my-password"]}) ; Decrypt +(nippy/freeze (nippy/stress-data {}) {:password [:salted "my-password"]}) ; Encrypt +(nippy/thaw {:password [:salted "my-password"]}) ; Decrypt ``` There's two default forms of encryption on offer: `:salted` and `:cached`. Each of these makes carefully-chosen trade-offs and is suited to one of two common use cases. See [`aes128-encryptor`](https://taoensso.github.io/nippy/taoensso.nippy.html#var-aes128-encryptor) for a detailed explanation of why/when you'd want one or the other.