Add experimental LZMA2 compressor for higher compression ratios than Snappy
This commit is contained in:
parent
512aca32aa
commit
3879cf4d62
5 changed files with 94 additions and 55 deletions
BIN
benchmarks.png
BIN
benchmarks.png
Binary file not shown.
|
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 21 KiB |
|
|
@ -5,7 +5,8 @@
|
|||
:url "http://www.eclipse.org/legal/epl-v10.html"}
|
||||
:dependencies [[org.clojure/clojure "1.4.0"]
|
||||
[org.clojure/tools.reader "0.7.8"]
|
||||
[org.iq80.snappy/snappy "0.3"]]
|
||||
[org.iq80.snappy/snappy "0.3"]
|
||||
[org.tukaani/xz "1.4"]]
|
||||
:profiles {:1.4 {:dependencies [[org.clojure/clojure "1.4.0"]]}
|
||||
:1.5 {:dependencies [[org.clojure/clojure "1.5.1"]]}
|
||||
:1.6 {:dependencies [[org.clojure/clojure "1.6.0-master-SNAPSHOT"]]}
|
||||
|
|
|
|||
|
|
@ -2,69 +2,56 @@
|
|||
{:author "Peter Taoussanis"}
|
||||
(:require [clojure.tools.reader.edn :as edn]
|
||||
[taoensso.nippy :as nippy :refer (freeze thaw)]
|
||||
[taoensso.nippy.utils :as utils]))
|
||||
[taoensso.nippy.compression :as compression]
|
||||
[taoensso.nippy.utils :as utils]))
|
||||
|
||||
;; Remove stuff from stress-data that breaks reader
|
||||
(def data (dissoc nippy/stress-data :queue :queue-empty :bytes))
|
||||
|
||||
(defmacro bench* [& body] `(utils/bench 10000 (do ~@body) :warmup-laps 2000))
|
||||
(defmacro bench* [& body] `(utils/bench 10000 (do ~@body) :warmup-laps 20000))
|
||||
(defn bench1 [freezer thawer & [sizer]]
|
||||
(let [data-frozen (freezer data)
|
||||
time-freeze (bench* (freezer data))
|
||||
time-thaw (bench* (thawer data-frozen))]
|
||||
{:round (+ time-freeze time-thaw)
|
||||
:freeze time-freeze
|
||||
:thaw time-thaw
|
||||
:size ((or sizer count) data-frozen)}))
|
||||
|
||||
(defn freeze-reader [x] (pr-str x))
|
||||
(defn thaw-reader [x] (edn/read-string x))
|
||||
(def roundtrip-reader (comp thaw-reader freeze-reader))
|
||||
|
||||
(def roundtrip-defaults (comp thaw freeze))
|
||||
(def roundtrip-encrypted (comp #(thaw % {:password [:cached "p"]})
|
||||
#(freeze % {:password [:cached "p"]})))
|
||||
(def roundtrip-fast (comp thaw #(freeze % {:compressor nil})))
|
||||
|
||||
(defn bench [{:keys [reader? laps] :or {reader? true laps 1}}]
|
||||
(println)
|
||||
(println "Benching (this can take some time)")
|
||||
(defn bench [{:keys [reader? lzma2? laps] :or {laps 1}}]
|
||||
(println "\nBenching (this can take some time)")
|
||||
(println "----------------------------------")
|
||||
(dotimes [l laps]
|
||||
(println)
|
||||
(println (str "Lap " (inc l) "/" laps "..."))
|
||||
(println (str "\nLap " (inc l) "/" laps "..."))
|
||||
|
||||
(when reader?
|
||||
(println
|
||||
{:reader
|
||||
{:round (bench* (roundtrip-reader data))
|
||||
:freeze (bench* (freeze-reader data))
|
||||
:thaw (let [frozen (freeze-reader data)] (bench* (thaw-reader frozen)))
|
||||
:data-size (count (.getBytes ^String (freeze-reader data) "UTF-8"))}}))
|
||||
(when reader? ; Slow
|
||||
(println {:reader (bench1 #(pr-str %) #(edn/read-string %)
|
||||
#(count (.getBytes ^String % "UTF-8")))}))
|
||||
|
||||
(println
|
||||
{:defaults
|
||||
{:round (bench* (roundtrip-defaults data))
|
||||
:freeze (bench* (freeze data))
|
||||
:thaw (let [frozen (freeze data)] (bench* (thaw frozen)))
|
||||
:data-size (count (freeze data))}})
|
||||
(println {:default (bench1 #(freeze % {})
|
||||
#(thaw % {}))})
|
||||
(println {:encrypted (bench1 #(freeze % {:password [:cached "p"]})
|
||||
#(thaw % {:password [:cached "p"]}))})
|
||||
(println {:fast (bench1 #(freeze % {:compressor nil})
|
||||
#(thaw % {:compressor nil}))})
|
||||
|
||||
(println
|
||||
{:encrypted
|
||||
{:round (bench* (roundtrip-encrypted data))
|
||||
:freeze (bench* (freeze data {:password [:cached "p"]}))
|
||||
:thaw (let [frozen (freeze data {:password [:cached "p"]})]
|
||||
(bench* (thaw frozen {:password [:cached "p"]})))
|
||||
:data-size (count (freeze data {:password [:cached "p"]}))}})
|
||||
(when lzma2? ; Slow as molasses
|
||||
(println {:lzma2 (bench1 #(freeze % {:compressor compression/lzma2-compressor})
|
||||
#(thaw % {:compressor compression/lzma2-compressor}))})))
|
||||
|
||||
(println
|
||||
{:fast
|
||||
{:round (bench* (roundtrip-fast data))
|
||||
:freeze (bench* (freeze data {:compressor nil}))
|
||||
:thaw (let [frozen (freeze data {:compressor nil})]
|
||||
(bench* (thaw frozen)))
|
||||
:data-size (count (freeze data {:compressor nil}))}}))
|
||||
|
||||
(println)
|
||||
(println "Done! (Time for cake?)")
|
||||
(println "\nDone! (Time for cake?)")
|
||||
true)
|
||||
|
||||
(comment
|
||||
;; (bench {:reader? true :laps 2})
|
||||
;; (bench {:reader? false :laps 1})
|
||||
;; (bench {:reader? false :laps 2})
|
||||
;; (bench {:reader? true :lzma2? true :laps 1})
|
||||
;; (bench {:laps 2})
|
||||
|
||||
;;; 19 Oct 2013: Nippy v2.3.0, with lzma2 & (nb!) round=freeze+thaw
|
||||
;; {:reader {:round 67798, :freeze 23202, :thaw 44596, :size 22971}}
|
||||
;; {:default {:round 3632, :freeze 2349, :thaw 1283, :size 12369}}
|
||||
;; {:encrypted {:round 6970, :freeze 4073, :thaw 2897, :size 12388}}
|
||||
;; {:fast {:round 3294, :freeze 2109, :thaw 1185, :size 13277}}
|
||||
;; {:lzma2 {:round 145301, :freeze 123650, :thaw 21651, :size 9024}}
|
||||
|
||||
;;; 11 Oct 2013: Nippy v2.2.0, with both ztellman mods
|
||||
;; {:defaults {:round 4319, :freeze 2950, :thaw 1446, :data-size 12369}}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
(ns taoensso.nippy.compression
|
||||
"Alpha - subject to change."
|
||||
{:author "Peter Taoussanis"}
|
||||
(:require [taoensso.nippy.utils :as utils]))
|
||||
(:require [taoensso.nippy.utils :as utils])
|
||||
(:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream
|
||||
DataOutputStream]))
|
||||
|
||||
;;;; Interface
|
||||
|
||||
|
|
@ -16,5 +18,45 @@
|
|||
(compress [_ ba] (org.iq80.snappy.Snappy/compress ba))
|
||||
(decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba))))
|
||||
|
||||
(def snappy-compressor "Default org.iq80.snappy.Snappy compressor."
|
||||
(->SnappyCompressor))
|
||||
(def snappy-compressor
|
||||
"Default org.iq80.snappy.Snappy compressor:
|
||||
Ratio: low.
|
||||
Write speed: very high.
|
||||
Read speed: very high.
|
||||
|
||||
A good general-purpose compressor for Redis."
|
||||
(->SnappyCompressor))
|
||||
|
||||
(deftype LZMA2Compressor [compression-level]
|
||||
;; Compression level ∈ℕ[0,9] (low->high) with 6 LZMA2 default (we use 4)
|
||||
ICompressor
|
||||
(compress [_ ba]
|
||||
(let [ba-len (alength ^bytes ba)
|
||||
ba-os (ByteArrayOutputStream.)
|
||||
;; Prefix with uncompressed length:
|
||||
_ (.writeInt (DataOutputStream. ba-os) ba-len)
|
||||
xzs (org.tukaani.xz.XZOutputStream. ba-os
|
||||
(org.tukaani.xz.LZMA2Options. compression-level))]
|
||||
(.write xzs ^bytes ba)
|
||||
(.close xzs)
|
||||
(.toByteArray ba-os)))
|
||||
|
||||
(decompress [_ ba]
|
||||
(let [ba-is (ByteArrayInputStream. ba)
|
||||
ba-len (.readInt (DataInputStream. ba-is))
|
||||
ba (byte-array ba-len)
|
||||
xzs (org.tukaani.xz.XZInputStream. ba-is)]
|
||||
(.read xzs ba 0 ba-len)
|
||||
(when (not= -1 (.read xzs)) ; Good practice as extra safety measure
|
||||
(throw (Exception. "LZMA2 Decompress failed: corrupt data?")))
|
||||
ba)))
|
||||
|
||||
(def lzma2-compressor
|
||||
"Alpha - subject to change.
|
||||
Default org.tukaani.xz.LZMA2 compressor:
|
||||
Ratio: high.
|
||||
Write speed: very slow.
|
||||
Read speed: medium.
|
||||
|
||||
A specialized compressor for large, low-write, high-read data."
|
||||
(->LZMA2Compressor 4))
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
(ns taoensso.nippy.tests.main
|
||||
(:require [expectations :as test :refer :all]
|
||||
[taoensso.nippy :as nippy :refer (freeze thaw)]
|
||||
[taoensso.nippy.benchmarks :as benchmarks]))
|
||||
[taoensso.nippy.compression :as compression]
|
||||
[taoensso.nippy.benchmarks :as benchmarks]))
|
||||
|
||||
;; Remove stuff from stress-data that breaks roundtrip equality
|
||||
(def test-data (dissoc nippy/stress-data :bytes))
|
||||
|
|
@ -14,6 +15,14 @@
|
|||
(expect test-data ((comp #(thaw % {:password [:salted "p"]})
|
||||
#(freeze % {:password [:salted "p"]}))
|
||||
test-data))
|
||||
(expect test-data ((comp #(thaw % {:compressor compression/lzma2-compressor})
|
||||
#(freeze % {:compressor compression/lzma2-compressor}))
|
||||
test-data))
|
||||
(expect test-data ((comp #(thaw % {:compressor compression/lzma2-compressor
|
||||
:password [:salted "p"]})
|
||||
#(freeze % {:compressor compression/lzma2-compressor
|
||||
:password [:salted "p"]}))
|
||||
test-data))
|
||||
|
||||
(expect AssertionError (thaw (freeze test-data {:password "malformed"})))
|
||||
(expect Exception (thaw (freeze test-data {:password [:salted "p"]})))
|
||||
|
|
@ -46,4 +55,4 @@
|
|||
(nippy/extend-thaw 2 [s] (->MyRec (.readUTF s)))
|
||||
(= (->MyRec "fast-val") (thaw (freeze (->MyRec "val"))))))
|
||||
|
||||
(expect (benchmarks/bench {:reader? false})) ; Also tests :cached passwords
|
||||
(expect (benchmarks/bench {})) ; Also tests :cached passwords
|
||||
|
|
|
|||
Loading…
Reference in a new issue