nippy/test/taoensso/nippy_tests.clj
Peter Taoussanis 0a9d67084b [new] [Storage efficiency] PoC: separate signed long types
Before:
  Longs in [  -128,   127] use 1 byte
  Longs in [-32768, 32767] use 2 bytes
  etc.

After:
  Longs in [  -255,   255] use 1 byte
  Longs in [-65535, 65535] use 2 bytes
  etc.

I.e. doubles the range of longs that can be stored by 1, 2, and 4 bytes.

This changes saves:
  - 1 byte  per long in [  128,   255], or [  -129,   -255]
  - 2 bytes per long in [32768, 65535], or [-32769, -65535]
  - 4 bytes per long ...

Is this advantage worth the extra complexity? Probably yes, given how
common longs (and colls of longs) are in Clojure.
2023-08-02 13:50:40 +02:00

379 lines
13 KiB
Clojure

(ns taoensso.nippy-tests
(:require
[clojure.test :as test :refer [deftest testing is]]
[clojure.test.check :as tc]
[clojure.test.check.generators :as tc-gens]
[clojure.test.check.properties :as tc-props]
[taoensso.encore :as enc :refer []]
[taoensso.nippy :as nippy :refer [freeze thaw]]
[taoensso.nippy.benchmarks :as benchmarks]))
(comment
(remove-ns 'taoensso.nippy-tests)
(test/run-tests 'taoensso.nippy-tests))
;;;; Config, etc.
(def test-data nippy/stress-data-comparable)
(def tc-num-tests 120)
(def tc-gens
"Like `tc-gens/any` but removes NaN (which breaks equality tests)"
(tc-gens/recursive-gen tc-gens/container-type #_simple-type
(tc-gens/one-of
[tc-gens/int tc-gens/large-integer #_tc-gens/double
(tc-gens/double* {:NaN? false})
tc-gens/char tc-gens/string tc-gens/ratio tc-gens/boolean tc-gens/keyword
tc-gens/keyword-ns tc-gens/symbol tc-gens/symbol-ns tc-gens/uuid])))
(comment (tc-gens/sample tc-gens 10))
;;;; Core
(deftest _core
[(println (str "Clojure version: " *clojure-version*))
(is (= test-data ((comp thaw freeze) test-data)))
(is (= test-data ((comp #(thaw % {:no-header? true
:compressor nippy/lz4-compressor
:encryptor nil})
#(freeze % {:no-header? true}))
test-data)))
(is (= test-data ((comp #(thaw % {:password [:salted "p"]})
#(freeze % {:password [:salted "p"]}))
test-data)))
(is (= (vec (:objects nippy/stress-data))
((comp vec thaw freeze) (:objects nippy/stress-data))))
(is (= test-data ((comp #(thaw % {:compressor nippy/lzma2-compressor})
#(freeze % {:compressor nippy/lzma2-compressor}))
test-data)))
(is (= test-data ((comp #(thaw % {:compressor nippy/lzma2-compressor
:password [:salted "p"]})
#(freeze % {:compressor nippy/lzma2-compressor
:password [:salted "p"]}))
test-data)))
(is (= test-data ((comp #(thaw % {:compressor nippy/lz4-compressor})
#(freeze % {:compressor nippy/lz4hc-compressor}))
test-data)))
(is ; Try roundtrip anything that simple-check can dream up
(:result (tc/quick-check tc-num-tests
(tc-props/for-all [val tc-gens]
(= val (thaw (freeze val)))))))
(is (enc/throws? Exception (thaw (freeze test-data {:password "malformed"}))))
(is (enc/throws? Exception (thaw (freeze test-data {:password [:salted "p"]})
{;; Necessary to prevent against JVM segfault due to
;; https://goo.gl/t0OUIo:
:v1-compatibility? false})))
(is (enc/throws? Exception (thaw (freeze test-data {:password [:salted "p"]})
{:v1-compatibility? false ; Ref. https://goo.gl/t0OUIo
:compressor nil})))
(is ; Snappy lib compatibility (for legacy versions of Nippy)
(let [^bytes raw-ba (freeze test-data {:compressor nil})
^bytes xerial-ba (org.xerial.snappy.Snappy/compress raw-ba)
^bytes iq80-ba (org.iq80.snappy.Snappy/compress raw-ba)]
(= (thaw raw-ba)
(thaw (org.xerial.snappy.Snappy/uncompress xerial-ba))
(thaw (org.xerial.snappy.Snappy/uncompress iq80-ba))
(thaw (org.iq80.snappy.Snappy/uncompress iq80-ba 0 (alength iq80-ba)))
(thaw (org.iq80.snappy.Snappy/uncompress xerial-ba 0 (alength xerial-ba))))))
(is ; CBC auto-encryptor compatibility
(= "payload"
(thaw (freeze "payload" {:password [:salted "pwd"] :encryptor nippy/aes128-cbc-encryptor})
(do {:password [:salted "pwd"]}))))
(testing "Signed long types"
(let [range-ushort+ (+ (long @#'nippy/range-ushort) 128)
range-uint+ (+ (long @#'nippy/range-uint) 128)]
[(let [r (range (- range-ushort+) range-ushort+)] (= (thaw (freeze r)) r))
(let [n range-uint+] (= (thaw (freeze n)) n))
(let [n (- range-uint+)] (= (thaw (freeze n)) n))]))])
;;;; Custom types & records
(deftype MyType [basic_field fancy-field!]) ; Note `fancy-field!` field name will be munged
(defrecord MyRec [basic_field fancy-field!])
(deftest _types
[(testing "Extend to custom type"
[(is
(enc/throws? Exception ; No thaw extension yet
(do
(alter-var-root #'nippy/*custom-readers* (constantly {}))
(nippy/extend-freeze MyType 1 [x s]
(.writeUTF s (.basic_field x))
(.writeUTF s (.fancy-field! x)))
(thaw (freeze (MyType. "basic" "fancy"))))))
(is
(do
(nippy/extend-thaw 1 [s] (MyType. (.readUTF s) (.readUTF s)))
(let [mt1 (MyType. "basic" "fancy")
^MyType mt2 (thaw (freeze mt1))]
(=
[(.basic_field mt1) (.fancy-field! mt1)]
[(.basic_field mt2) (.fancy-field! mt2)]))))])
(testing "Extend to custom Record"
(is
(do
(nippy/extend-freeze MyRec 2 [x s]
(.writeUTF s (str "foo-" (:basic_field x)))
(.writeUTF s (str "foo-" (:fancy-field! x))))
(nippy/extend-thaw 2 [s] (MyRec. (.readUTF s) (.readUTF s)))
(=
(do (MyRec. "foo-basic" "foo-fancy"))
(thaw (freeze (MyRec. "basic" "fancy")))))))
(testing "Keyword (prefixed) extensions"
(is
(do
(nippy/extend-freeze MyRec :nippy-tests/MyRec [x s]
(.writeUTF s (:basic_field x))
(.writeUTF s (:fancy-field! x)))
(nippy/extend-thaw :nippy-tests/MyRec [s] (MyRec. (.readUTF s) (.readUTF s)))
(let [mr (MyRec. "basic" "fancy")]
(= mr (thaw (freeze mr)))))))])
;;;; Caching
(deftest _caching
(let [stress [nippy/stress-data-comparable
nippy/stress-data-comparable
nippy/stress-data-comparable
nippy/stress-data-comparable]
cached (mapv nippy/cache stress)
cached (mapv nippy/cache stress) ; <=1 wrap auto-enforced
]
(is (= stress (thaw (freeze stress {:compressor nil}))))
(is (= stress (thaw (freeze cached {:compressor nil}))))
(let [size-stress (count (freeze stress {:compressor nil}))
size-cached (count (freeze cached {:compressor nil}))]
(is (>= size-stress (* 3 size-cached)))
(is (< size-stress (* 4 size-cached))))))
(deftest _caching-metadata
(let [v1 (with-meta [] {:id :v1})
v2 (with-meta [] {:id :v2})
frozen-without-caching (freeze [v1 v2 v1 v2])
frozen-with-caching
(freeze [(nippy/cache v1)
(nippy/cache v2)
(nippy/cache v1)
(nippy/cache v2)])]
(is (> (count frozen-without-caching)
(count frozen-with-caching)))
(is (= (thaw frozen-without-caching)
(thaw frozen-with-caching)))
(is (= (mapv meta (thaw frozen-with-caching))
[{:id :v1} {:id :v2} {:id :v1} {:id :v2}]))))
;;;; Stable binary representation of vals
(deftest _stable-bin
[(is (= (seq (freeze test-data))
(seq (freeze test-data)))) ; f(x)=f(y) | x=y
;; As above, but try multiple times to catch possible protocol interface races:
(is (every? true?
(repeatedly 1000 (fn [] (= (seq (freeze test-data))
(seq (freeze test-data)))))))
;; NB abandoning - no way to do this reliably w/o appropriate contracts from
;; (seq <unordered-coll>):
;;
;; (is (= (seq (-> test-data freeze))
;; (seq (-> test-data freeze thaw freeze)))) ; f(x)=f(f-1(f(x)))
;;
;; As above, but with repeated refreeze to catch possible protocol interface races:
;; (is (= (seq (freeze test-data))
;; (seq (reduce (fn [frozen _] (freeze (thaw frozen)))
;; (freeze test-data) (range 1000)))))
])
(defn qc-prop-bijection [& [n]]
(let [bin->val (atom {})
val->bin (atom {})]
(merge
(tc/quick-check (or n 1)
(tc-props/for-all [val tc-gens]
(let [;; Nb need `seq` for Clojure hash equality:
bin (hash (seq (freeze val)))]
(and
(if (contains? val->bin val)
(= (get val->bin val) bin) ; x=y => f(x)=f(y) by clj=
(do (swap! val->bin assoc val bin)
true))
(if (contains? bin->val bin)
(= (get bin->val bin) val) ; f(x)=f(y) => x=y by clj=
(do (swap! bin->val assoc bin val)
true))))))
#_{:bin->val @bin->val
:val->bin @val->bin}
nil)))
(comment
(tc-gens/sample tc-gens 10)
(:result (qc-prop-bijection 80))
(let [{:keys [result bin->val val->bin]} (qc-prop-bijection 10)]
[result (vals bin->val)]))
(deftest _gc-prop-bijection
(is (:result (qc-prop-bijection tc-num-tests))))
;;;; Thread safety
(deftest _thread-safe
[(is
(let [futures
(mapv
(fn [_]
(future
(= (thaw (freeze test-data)) test-data)))
(range 50))]
(every? deref futures)))
(is
(let [futures
(mapv
(fn [_]
(future
(= (thaw (freeze test-data {:password [:salted "password"]})
{:password [:salted "password"]})
test-data)))
(range 50))]
(every? deref futures)))
(is
(let [futures
(mapv
(fn [_]
(future
(= (thaw (freeze test-data {:password [:cached "password"]})
{:password [:cached "password"]})
test-data)))
(range 50))]
(every? deref futures)))])
;;;; Redefs
(defrecord MyFoo [] Object (toString [_] "v1"))
(defrecord MyFoo [] Object (toString [_] "v2"))
(deftest _redefs
(is (= (str (thaw (freeze (MyFoo.)))) "v2")))
;;;; Serializable
(do
(def ^:private semcn "java.util.concurrent.Semaphore")
(def ^:private sem (java.util.concurrent.Semaphore. 1))
(defn- sem? [x] (instance? java.util.concurrent.Semaphore x)))
(deftest _serializable
[(is (= nippy/*thaw-serializable-allowlist* #{"base.1" "base.2" "add.1" "add.2"})
"JVM properties override initial allowlist values")
(is (enc/throws? Exception (nippy/freeze sem {:serializable-allowlist #{}}))
"Can't freeze Serializable objects unless approved by allowlist")
(is (sem?
(nippy/thaw
(nippy/freeze sem {:serializable-allowlist #{semcn}})
{:serializable-allowlist #{semcn}}))
"Can freeze and thaw Serializable objects if approved by allowlist")
(is (sem?
(nippy/thaw
(nippy/freeze sem {:serializable-allowlist #{"java.util.concurrent.*"}})
{:serializable-allowlist #{"java.util.concurrent.*"}}))
"Strings in allowlist sets may contain \"*\" wildcards")
(let [ba (nippy/freeze sem #_{:serializable-allowlist "*"})
thawed (nippy/thaw ba {:serializable-allowlist #{}})]
[(is (= :quarantined (get-in thawed [:nippy/unthawable :cause]))
"Serializable objects will be quarantined when approved for freezing but not thawing.")
(is (sem? (nippy/read-quarantined-serializable-object-unsafe! thawed))
"Quarantined Serializable objects can still be manually force-read.")
(is (sem? (nippy/read-quarantined-serializable-object-unsafe!
(nippy/thaw (nippy/freeze thawed))))
"Quarantined Serializable objects are themselves safely transportable.")])
(let [obj
(nippy/thaw
(nippy/freeze sem)
{:serializable-allowlist "allow-and-record"})]
[(is (sem? obj)
"Special \"allow-and-record\" allowlist permits any class")
(is
(contains? (nippy/get-recorded-serializable-classes) semcn)
"Special \"allow-and-record\" allowlist records classes")])])
;;;; Metadata
(def my-var "Just a string")
(deftest _metadata
[(is
(:has-meta?
(meta
(nippy/thaw
(nippy/freeze (with-meta [] {:has-meta? true}) {:incl-metadata? true})
{:incl-metadata? true}
)))
"Metadata successfully included")
(is
(nil?
(meta
(nippy/thaw
(nippy/freeze (with-meta [] {:has-meta? true}) {:incl-metadata? true})
{:incl-metadata? false}
)))
"Metadata successfully excluded by thaw")
(is
(nil?
(meta
(nippy/thaw
(nippy/freeze (with-meta [] {:has-meta? true}) {:incl-metadata? false})
{:incl-metadata? true}
)))
"Metadata successfully excluded by freeze")
(is (var? (nippy/read-quarantined-serializable-object-unsafe!
(nippy/thaw (nippy/freeze #'my-var))))
"Don't try to preserve metadata on vars")])
;;;; Benchmarks
(deftest _benchmarks
(is (benchmarks/bench {})) ; Also tests :cached passwords
)