[new] [Storage efficiency] PoC: separate signed long types

Before:
  Longs in [  -128,   127] use 1 byte
  Longs in [-32768, 32767] use 2 bytes
  etc.

After:
  Longs in [  -255,   255] use 1 byte
  Longs in [-65535, 65535] use 2 bytes
  etc.

I.e. doubles the range of longs that can be stored by 1, 2, and 4 bytes.

This changes saves:
  - 1 byte  per long in [  128,   255], or [  -129,   -255]
  - 2 bytes per long in [32768, 65535], or [-32769, -65535]
  - 4 bytes per long ...

Is this advantage worth the extra complexity? Probably yes, given how
common longs (and colls of longs) are in Clojure.
This commit is contained in:
Peter Taoussanis 2023-07-31 22:37:08 +02:00
parent fa1cc66bf3
commit 0a9d67084b
2 changed files with 50 additions and 18 deletions

View file

@ -104,11 +104,17 @@
41 [:short [[:bytes 2]]] 41 [:short [[:bytes 2]]]
42 [:integer [[:bytes 4]]] 42 [:integer [[:bytes 4]]]
0 [:long-0 []] 0 [:long-0 []]
100 [:long-sm [[:bytes 1]]]
101 [:long-md [[:bytes 2]]] 87 [:long-pos-sm [[:bytes 1]]]
102 [:long-lg [[:bytes 4]]] 88 [:long-pos-md [[:bytes 2]]]
43 [:long-xl [[:bytes 8]]] 89 [:long-pos-lg [[:bytes 4]]]
93 [:long-neg-sm [[:bytes 1]]]
94 [:long-neg-md [[:bytes 2]]]
95 [:long-neg-lg [[:bytes 4]]]
43 [:long-xl [[:bytes 8]]]
55 [:double-0 []] 55 [:double-0 []]
60 [:float [[:bytes 4]]] 60 [:float [[:bytes 4]]]
@ -211,6 +217,10 @@
;;; DEPRECATED (only support thawing) ;;; DEPRECATED (only support thawing)
;; Desc-sorted by deprecation date ;; Desc-sorted by deprecation date
100 [:long-sm_ [[:bytes 1]]] ; [2023-mm-dd v3.3.3] Switch to 2x pos/neg ids
101 [:long-md_ [[:bytes 2]]] ; [2023-mm-dd v3.3.3] Switch to 2x pos/neg ids
102 [:long-lg_ [[:bytes 4]]] ; [2023-mm-dd v3.3.3] Switch to 2x pos/neg ids
78 [:sym-md_ [[:bytes {:read 4}]]] ; [2020-11-18 v3.1.1] Buggy size field, Ref. #138 78 [:sym-md_ [[:bytes {:read 4}]]] ; [2020-11-18 v3.1.1] Buggy size field, Ref. #138
77 [:kw-md_ [[:bytes {:read 4}]]] ; [2020-11-18 v3.1.1] Buggy size field, Ref. #138 77 [:kw-md_ [[:bytes {:read 4}]]] ; [2020-11-18 v3.1.1] Buggy size field, Ref. #138
@ -612,6 +622,11 @@
;;;; Freezing ;;;; Freezing
(do
(def ^:private ^:const range-ubyte (- Byte/MAX_VALUE Byte/MIN_VALUE))
(def ^:private ^:const range-ushort (- Short/MAX_VALUE Short/MIN_VALUE))
(def ^:private ^:const range-uint (- Integer/MAX_VALUE Integer/MIN_VALUE)))
(do (do
(defmacro write-id [out id] `(.writeByte ~out ~id)) (defmacro write-id [out id] `(.writeByte ~out ~id))
@ -702,17 +717,18 @@
(zero? n) (write-id out id-long-0) (zero? n) (write-id out id-long-0)
(pos? n) (pos? n)
(enc/cond (enc/cond
(<= n Byte/MAX_VALUE) (do (write-id out id-long-sm) (.writeByte out n)) (<= n range-ubyte) (do (write-id out id-long-pos-sm) (.writeByte out (+ n Byte/MIN_VALUE)))
(<= n Short/MAX_VALUE) (do (write-id out id-long-md) (.writeShort out n)) (<= n range-ushort) (do (write-id out id-long-pos-md) (.writeShort out (+ n Short/MIN_VALUE)))
(<= n Integer/MAX_VALUE) (do (write-id out id-long-lg) (.writeInt out n)) (<= n range-uint) (do (write-id out id-long-pos-lg) (.writeInt out (+ n Integer/MIN_VALUE)))
:else (do (write-id out id-long-xl) (.writeLong out n))) :else (do (write-id out id-long-xl) (.writeLong out n)))
:else :else
(enc/cond (let [y (- n)]
(>= n Byte/MIN_VALUE) (do (write-id out id-long-sm) (.writeByte out n)) (enc/cond
(>= n Short/MIN_VALUE) (do (write-id out id-long-md) (.writeShort out n)) (<= y range-ubyte) (do (write-id out id-long-neg-sm) (.writeByte out (+ y Byte/MIN_VALUE)))
(>= n Integer/MIN_VALUE) (do (write-id out id-long-lg) (.writeInt out n)) (<= y range-ushort) (do (write-id out id-long-neg-md) (.writeShort out (+ y Short/MIN_VALUE)))
:else (do (write-id out id-long-xl) (.writeLong out n))))) (<= y range-uint) (do (write-id out id-long-neg-lg) (.writeInt out (+ y Integer/MIN_VALUE)))
:else (do (write-id out id-long-xl) (.writeLong out n))))))
(defmacro ^:private -run! [proc coll] `(do (reduce #(~proc %2) nil ~coll) nil)) (defmacro ^:private -run! [proc coll] `(do (reduce #(~proc %2) nil ~coll) nil))
(defmacro ^:private -run-kv! [proc m] `(do (reduce-kv #(~proc %2 %3) nil ~m) nil)) (defmacro ^:private -run-kv! [proc m] `(do (reduce-kv #(~proc %2 %3) nil ~m) nil))
@ -1596,11 +1612,19 @@
id-short (.readShort in) id-short (.readShort in)
id-integer (.readInt in) id-integer (.readInt in)
id-long-0 0 id-long-0 0
id-long-sm (long (.readByte in)) id-long-sm_ (long (.readByte in))
id-long-md (long (.readShort in)) id-long-md_ (long (.readShort in))
id-long-lg (long (.readInt in)) id-long-lg_ (long (.readInt in))
id-long-xl (.readLong in) id-long-xl (.readLong in)
id-long-pos-sm (- (long (.readByte in)) Byte/MIN_VALUE)
id-long-pos-md (- (long (.readShort in)) Short/MIN_VALUE)
id-long-pos-lg (- (long (.readInt in)) Integer/MIN_VALUE)
id-long-neg-sm (- (- (long (.readByte in)) Byte/MIN_VALUE))
id-long-neg-md (- (- (long (.readShort in)) Short/MIN_VALUE))
id-long-neg-lg (- (- (long (.readInt in)) Integer/MIN_VALUE))
id-bigint (bigint (read-biginteger in)) id-bigint (bigint (read-biginteger in))
id-biginteger (read-biginteger in) id-biginteger (read-biginteger in)

View file

@ -86,7 +86,15 @@
(is ; CBC auto-encryptor compatibility (is ; CBC auto-encryptor compatibility
(= "payload" (= "payload"
(thaw (freeze "payload" {:password [:salted "pwd"] :encryptor nippy/aes128-cbc-encryptor}) (thaw (freeze "payload" {:password [:salted "pwd"] :encryptor nippy/aes128-cbc-encryptor})
(do {:password [:salted "pwd"]}))))]) (do {:password [:salted "pwd"]}))))
(testing "Signed long types"
(let [range-ushort+ (+ (long @#'nippy/range-ushort) 128)
range-uint+ (+ (long @#'nippy/range-uint) 128)]
[(let [r (range (- range-ushort+) range-ushort+)] (= (thaw (freeze r)) r))
(let [n range-uint+] (= (thaw (freeze n)) n))
(let [n (- range-uint+)] (= (thaw (freeze n)) n))]))])
;;;; Custom types & records ;;;; Custom types & records