Add experimental LZMA2 compressor for higher compression ratios than Snappy
This commit is contained in:
parent
512aca32aa
commit
3879cf4d62
5 changed files with 94 additions and 55 deletions
BIN
benchmarks.png
BIN
benchmarks.png
Binary file not shown.
|
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 21 KiB |
|
|
@ -5,7 +5,8 @@
|
||||||
:url "http://www.eclipse.org/legal/epl-v10.html"}
|
:url "http://www.eclipse.org/legal/epl-v10.html"}
|
||||||
:dependencies [[org.clojure/clojure "1.4.0"]
|
:dependencies [[org.clojure/clojure "1.4.0"]
|
||||||
[org.clojure/tools.reader "0.7.8"]
|
[org.clojure/tools.reader "0.7.8"]
|
||||||
[org.iq80.snappy/snappy "0.3"]]
|
[org.iq80.snappy/snappy "0.3"]
|
||||||
|
[org.tukaani/xz "1.4"]]
|
||||||
:profiles {:1.4 {:dependencies [[org.clojure/clojure "1.4.0"]]}
|
:profiles {:1.4 {:dependencies [[org.clojure/clojure "1.4.0"]]}
|
||||||
:1.5 {:dependencies [[org.clojure/clojure "1.5.1"]]}
|
:1.5 {:dependencies [[org.clojure/clojure "1.5.1"]]}
|
||||||
:1.6 {:dependencies [[org.clojure/clojure "1.6.0-master-SNAPSHOT"]]}
|
:1.6 {:dependencies [[org.clojure/clojure "1.6.0-master-SNAPSHOT"]]}
|
||||||
|
|
|
||||||
|
|
@ -2,69 +2,56 @@
|
||||||
{:author "Peter Taoussanis"}
|
{:author "Peter Taoussanis"}
|
||||||
(:require [clojure.tools.reader.edn :as edn]
|
(:require [clojure.tools.reader.edn :as edn]
|
||||||
[taoensso.nippy :as nippy :refer (freeze thaw)]
|
[taoensso.nippy :as nippy :refer (freeze thaw)]
|
||||||
[taoensso.nippy.utils :as utils]))
|
[taoensso.nippy.compression :as compression]
|
||||||
|
[taoensso.nippy.utils :as utils]))
|
||||||
|
|
||||||
;; Remove stuff from stress-data that breaks reader
|
;; Remove stuff from stress-data that breaks reader
|
||||||
(def data (dissoc nippy/stress-data :queue :queue-empty :bytes))
|
(def data (dissoc nippy/stress-data :queue :queue-empty :bytes))
|
||||||
|
|
||||||
(defmacro bench* [& body] `(utils/bench 10000 (do ~@body) :warmup-laps 2000))
|
(defmacro bench* [& body] `(utils/bench 10000 (do ~@body) :warmup-laps 20000))
|
||||||
|
(defn bench1 [freezer thawer & [sizer]]
|
||||||
|
(let [data-frozen (freezer data)
|
||||||
|
time-freeze (bench* (freezer data))
|
||||||
|
time-thaw (bench* (thawer data-frozen))]
|
||||||
|
{:round (+ time-freeze time-thaw)
|
||||||
|
:freeze time-freeze
|
||||||
|
:thaw time-thaw
|
||||||
|
:size ((or sizer count) data-frozen)}))
|
||||||
|
|
||||||
(defn freeze-reader [x] (pr-str x))
|
(defn bench [{:keys [reader? lzma2? laps] :or {laps 1}}]
|
||||||
(defn thaw-reader [x] (edn/read-string x))
|
(println "\nBenching (this can take some time)")
|
||||||
(def roundtrip-reader (comp thaw-reader freeze-reader))
|
|
||||||
|
|
||||||
(def roundtrip-defaults (comp thaw freeze))
|
|
||||||
(def roundtrip-encrypted (comp #(thaw % {:password [:cached "p"]})
|
|
||||||
#(freeze % {:password [:cached "p"]})))
|
|
||||||
(def roundtrip-fast (comp thaw #(freeze % {:compressor nil})))
|
|
||||||
|
|
||||||
(defn bench [{:keys [reader? laps] :or {reader? true laps 1}}]
|
|
||||||
(println)
|
|
||||||
(println "Benching (this can take some time)")
|
|
||||||
(println "----------------------------------")
|
(println "----------------------------------")
|
||||||
(dotimes [l laps]
|
(dotimes [l laps]
|
||||||
(println)
|
(println (str "\nLap " (inc l) "/" laps "..."))
|
||||||
(println (str "Lap " (inc l) "/" laps "..."))
|
|
||||||
|
|
||||||
(when reader?
|
(when reader? ; Slow
|
||||||
(println
|
(println {:reader (bench1 #(pr-str %) #(edn/read-string %)
|
||||||
{:reader
|
#(count (.getBytes ^String % "UTF-8")))}))
|
||||||
{:round (bench* (roundtrip-reader data))
|
|
||||||
:freeze (bench* (freeze-reader data))
|
|
||||||
:thaw (let [frozen (freeze-reader data)] (bench* (thaw-reader frozen)))
|
|
||||||
:data-size (count (.getBytes ^String (freeze-reader data) "UTF-8"))}}))
|
|
||||||
|
|
||||||
(println
|
(println {:default (bench1 #(freeze % {})
|
||||||
{:defaults
|
#(thaw % {}))})
|
||||||
{:round (bench* (roundtrip-defaults data))
|
(println {:encrypted (bench1 #(freeze % {:password [:cached "p"]})
|
||||||
:freeze (bench* (freeze data))
|
#(thaw % {:password [:cached "p"]}))})
|
||||||
:thaw (let [frozen (freeze data)] (bench* (thaw frozen)))
|
(println {:fast (bench1 #(freeze % {:compressor nil})
|
||||||
:data-size (count (freeze data))}})
|
#(thaw % {:compressor nil}))})
|
||||||
|
|
||||||
(println
|
(when lzma2? ; Slow as molasses
|
||||||
{:encrypted
|
(println {:lzma2 (bench1 #(freeze % {:compressor compression/lzma2-compressor})
|
||||||
{:round (bench* (roundtrip-encrypted data))
|
#(thaw % {:compressor compression/lzma2-compressor}))})))
|
||||||
:freeze (bench* (freeze data {:password [:cached "p"]}))
|
|
||||||
:thaw (let [frozen (freeze data {:password [:cached "p"]})]
|
|
||||||
(bench* (thaw frozen {:password [:cached "p"]})))
|
|
||||||
:data-size (count (freeze data {:password [:cached "p"]}))}})
|
|
||||||
|
|
||||||
(println
|
(println "\nDone! (Time for cake?)")
|
||||||
{:fast
|
|
||||||
{:round (bench* (roundtrip-fast data))
|
|
||||||
:freeze (bench* (freeze data {:compressor nil}))
|
|
||||||
:thaw (let [frozen (freeze data {:compressor nil})]
|
|
||||||
(bench* (thaw frozen)))
|
|
||||||
:data-size (count (freeze data {:compressor nil}))}}))
|
|
||||||
|
|
||||||
(println)
|
|
||||||
(println "Done! (Time for cake?)")
|
|
||||||
true)
|
true)
|
||||||
|
|
||||||
(comment
|
(comment
|
||||||
;; (bench {:reader? true :laps 2})
|
;; (bench {:reader? true :lzma2? true :laps 1})
|
||||||
;; (bench {:reader? false :laps 1})
|
;; (bench {:laps 2})
|
||||||
;; (bench {:reader? false :laps 2})
|
|
||||||
|
;;; 19 Oct 2013: Nippy v2.3.0, with lzma2 & (nb!) round=freeze+thaw
|
||||||
|
;; {:reader {:round 67798, :freeze 23202, :thaw 44596, :size 22971}}
|
||||||
|
;; {:default {:round 3632, :freeze 2349, :thaw 1283, :size 12369}}
|
||||||
|
;; {:encrypted {:round 6970, :freeze 4073, :thaw 2897, :size 12388}}
|
||||||
|
;; {:fast {:round 3294, :freeze 2109, :thaw 1185, :size 13277}}
|
||||||
|
;; {:lzma2 {:round 145301, :freeze 123650, :thaw 21651, :size 9024}}
|
||||||
|
|
||||||
;;; 11 Oct 2013: Nippy v2.2.0, with both ztellman mods
|
;;; 11 Oct 2013: Nippy v2.2.0, with both ztellman mods
|
||||||
;; {:defaults {:round 4319, :freeze 2950, :thaw 1446, :data-size 12369}}
|
;; {:defaults {:round 4319, :freeze 2950, :thaw 1446, :data-size 12369}}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
(ns taoensso.nippy.compression
|
(ns taoensso.nippy.compression
|
||||||
"Alpha - subject to change."
|
"Alpha - subject to change."
|
||||||
{:author "Peter Taoussanis"}
|
{:author "Peter Taoussanis"}
|
||||||
(:require [taoensso.nippy.utils :as utils]))
|
(:require [taoensso.nippy.utils :as utils])
|
||||||
|
(:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream
|
||||||
|
DataOutputStream]))
|
||||||
|
|
||||||
;;;; Interface
|
;;;; Interface
|
||||||
|
|
||||||
|
|
@ -16,5 +18,45 @@
|
||||||
(compress [_ ba] (org.iq80.snappy.Snappy/compress ba))
|
(compress [_ ba] (org.iq80.snappy.Snappy/compress ba))
|
||||||
(decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba))))
|
(decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba))))
|
||||||
|
|
||||||
(def snappy-compressor "Default org.iq80.snappy.Snappy compressor."
|
(def snappy-compressor
|
||||||
|
"Default org.iq80.snappy.Snappy compressor:
|
||||||
|
Ratio: low.
|
||||||
|
Write speed: very high.
|
||||||
|
Read speed: very high.
|
||||||
|
|
||||||
|
A good general-purpose compressor for Redis."
|
||||||
(->SnappyCompressor))
|
(->SnappyCompressor))
|
||||||
|
|
||||||
|
(deftype LZMA2Compressor [compression-level]
|
||||||
|
;; Compression level ∈ℕ[0,9] (low->high) with 6 LZMA2 default (we use 4)
|
||||||
|
ICompressor
|
||||||
|
(compress [_ ba]
|
||||||
|
(let [ba-len (alength ^bytes ba)
|
||||||
|
ba-os (ByteArrayOutputStream.)
|
||||||
|
;; Prefix with uncompressed length:
|
||||||
|
_ (.writeInt (DataOutputStream. ba-os) ba-len)
|
||||||
|
xzs (org.tukaani.xz.XZOutputStream. ba-os
|
||||||
|
(org.tukaani.xz.LZMA2Options. compression-level))]
|
||||||
|
(.write xzs ^bytes ba)
|
||||||
|
(.close xzs)
|
||||||
|
(.toByteArray ba-os)))
|
||||||
|
|
||||||
|
(decompress [_ ba]
|
||||||
|
(let [ba-is (ByteArrayInputStream. ba)
|
||||||
|
ba-len (.readInt (DataInputStream. ba-is))
|
||||||
|
ba (byte-array ba-len)
|
||||||
|
xzs (org.tukaani.xz.XZInputStream. ba-is)]
|
||||||
|
(.read xzs ba 0 ba-len)
|
||||||
|
(when (not= -1 (.read xzs)) ; Good practice as extra safety measure
|
||||||
|
(throw (Exception. "LZMA2 Decompress failed: corrupt data?")))
|
||||||
|
ba)))
|
||||||
|
|
||||||
|
(def lzma2-compressor
|
||||||
|
"Alpha - subject to change.
|
||||||
|
Default org.tukaani.xz.LZMA2 compressor:
|
||||||
|
Ratio: high.
|
||||||
|
Write speed: very slow.
|
||||||
|
Read speed: medium.
|
||||||
|
|
||||||
|
A specialized compressor for large, low-write, high-read data."
|
||||||
|
(->LZMA2Compressor 4))
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
(ns taoensso.nippy.tests.main
|
(ns taoensso.nippy.tests.main
|
||||||
(:require [expectations :as test :refer :all]
|
(:require [expectations :as test :refer :all]
|
||||||
[taoensso.nippy :as nippy :refer (freeze thaw)]
|
[taoensso.nippy :as nippy :refer (freeze thaw)]
|
||||||
[taoensso.nippy.benchmarks :as benchmarks]))
|
[taoensso.nippy.compression :as compression]
|
||||||
|
[taoensso.nippy.benchmarks :as benchmarks]))
|
||||||
|
|
||||||
;; Remove stuff from stress-data that breaks roundtrip equality
|
;; Remove stuff from stress-data that breaks roundtrip equality
|
||||||
(def test-data (dissoc nippy/stress-data :bytes))
|
(def test-data (dissoc nippy/stress-data :bytes))
|
||||||
|
|
@ -14,6 +15,14 @@
|
||||||
(expect test-data ((comp #(thaw % {:password [:salted "p"]})
|
(expect test-data ((comp #(thaw % {:password [:salted "p"]})
|
||||||
#(freeze % {:password [:salted "p"]}))
|
#(freeze % {:password [:salted "p"]}))
|
||||||
test-data))
|
test-data))
|
||||||
|
(expect test-data ((comp #(thaw % {:compressor compression/lzma2-compressor})
|
||||||
|
#(freeze % {:compressor compression/lzma2-compressor}))
|
||||||
|
test-data))
|
||||||
|
(expect test-data ((comp #(thaw % {:compressor compression/lzma2-compressor
|
||||||
|
:password [:salted "p"]})
|
||||||
|
#(freeze % {:compressor compression/lzma2-compressor
|
||||||
|
:password [:salted "p"]}))
|
||||||
|
test-data))
|
||||||
|
|
||||||
(expect AssertionError (thaw (freeze test-data {:password "malformed"})))
|
(expect AssertionError (thaw (freeze test-data {:password "malformed"})))
|
||||||
(expect Exception (thaw (freeze test-data {:password [:salted "p"]})))
|
(expect Exception (thaw (freeze test-data {:password [:salted "p"]})))
|
||||||
|
|
@ -46,4 +55,4 @@
|
||||||
(nippy/extend-thaw 2 [s] (->MyRec (.readUTF s)))
|
(nippy/extend-thaw 2 [s] (->MyRec (.readUTF s)))
|
||||||
(= (->MyRec "fast-val") (thaw (freeze (->MyRec "val"))))))
|
(= (->MyRec "fast-val") (thaw (freeze (->MyRec "val"))))))
|
||||||
|
|
||||||
(expect (benchmarks/bench {:reader? false})) ; Also tests :cached passwords
|
(expect (benchmarks/bench {})) ; Also tests :cached passwords
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue