Add experimental LZMA2 compressor for higher compression ratios than Snappy

This commit is contained in:
Peter Taoussanis 2013-10-19 12:49:45 +07:00
parent 512aca32aa
commit 3879cf4d62
5 changed files with 94 additions and 55 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 21 KiB

View file

@ -5,7 +5,8 @@
:url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [[org.clojure/clojure "1.4.0"]
[org.clojure/tools.reader "0.7.8"]
[org.iq80.snappy/snappy "0.3"]]
[org.iq80.snappy/snappy "0.3"]
[org.tukaani/xz "1.4"]]
:profiles {:1.4 {:dependencies [[org.clojure/clojure "1.4.0"]]}
:1.5 {:dependencies [[org.clojure/clojure "1.5.1"]]}
:1.6 {:dependencies [[org.clojure/clojure "1.6.0-master-SNAPSHOT"]]}

View file

@ -2,69 +2,56 @@
{:author "Peter Taoussanis"}
(:require [clojure.tools.reader.edn :as edn]
[taoensso.nippy :as nippy :refer (freeze thaw)]
[taoensso.nippy.utils :as utils]))
[taoensso.nippy.compression :as compression]
[taoensso.nippy.utils :as utils]))
;; Remove stuff from stress-data that breaks reader
(def data (dissoc nippy/stress-data :queue :queue-empty :bytes))
(defmacro bench* [& body] `(utils/bench 10000 (do ~@body) :warmup-laps 2000))
(defmacro bench* [& body] `(utils/bench 10000 (do ~@body) :warmup-laps 20000))
(defn bench1 [freezer thawer & [sizer]]
(let [data-frozen (freezer data)
time-freeze (bench* (freezer data))
time-thaw (bench* (thawer data-frozen))]
{:round (+ time-freeze time-thaw)
:freeze time-freeze
:thaw time-thaw
:size ((or sizer count) data-frozen)}))
(defn freeze-reader [x] (pr-str x))
(defn thaw-reader [x] (edn/read-string x))
(def roundtrip-reader (comp thaw-reader freeze-reader))
(def roundtrip-defaults (comp thaw freeze))
(def roundtrip-encrypted (comp #(thaw % {:password [:cached "p"]})
#(freeze % {:password [:cached "p"]})))
(def roundtrip-fast (comp thaw #(freeze % {:compressor nil})))
(defn bench [{:keys [reader? laps] :or {reader? true laps 1}}]
(println)
(println "Benching (this can take some time)")
(defn bench [{:keys [reader? lzma2? laps] :or {laps 1}}]
(println "\nBenching (this can take some time)")
(println "----------------------------------")
(dotimes [l laps]
(println)
(println (str "Lap " (inc l) "/" laps "..."))
(println (str "\nLap " (inc l) "/" laps "..."))
(when reader?
(println
{:reader
{:round (bench* (roundtrip-reader data))
:freeze (bench* (freeze-reader data))
:thaw (let [frozen (freeze-reader data)] (bench* (thaw-reader frozen)))
:data-size (count (.getBytes ^String (freeze-reader data) "UTF-8"))}}))
(when reader? ; Slow
(println {:reader (bench1 #(pr-str %) #(edn/read-string %)
#(count (.getBytes ^String % "UTF-8")))}))
(println
{:defaults
{:round (bench* (roundtrip-defaults data))
:freeze (bench* (freeze data))
:thaw (let [frozen (freeze data)] (bench* (thaw frozen)))
:data-size (count (freeze data))}})
(println {:default (bench1 #(freeze % {})
#(thaw % {}))})
(println {:encrypted (bench1 #(freeze % {:password [:cached "p"]})
#(thaw % {:password [:cached "p"]}))})
(println {:fast (bench1 #(freeze % {:compressor nil})
#(thaw % {:compressor nil}))})
(println
{:encrypted
{:round (bench* (roundtrip-encrypted data))
:freeze (bench* (freeze data {:password [:cached "p"]}))
:thaw (let [frozen (freeze data {:password [:cached "p"]})]
(bench* (thaw frozen {:password [:cached "p"]})))
:data-size (count (freeze data {:password [:cached "p"]}))}})
(when lzma2? ; Slow as molasses
(println {:lzma2 (bench1 #(freeze % {:compressor compression/lzma2-compressor})
#(thaw % {:compressor compression/lzma2-compressor}))})))
(println
{:fast
{:round (bench* (roundtrip-fast data))
:freeze (bench* (freeze data {:compressor nil}))
:thaw (let [frozen (freeze data {:compressor nil})]
(bench* (thaw frozen)))
:data-size (count (freeze data {:compressor nil}))}}))
(println)
(println "Done! (Time for cake?)")
(println "\nDone! (Time for cake?)")
true)
(comment
;; (bench {:reader? true :laps 2})
;; (bench {:reader? false :laps 1})
;; (bench {:reader? false :laps 2})
;; (bench {:reader? true :lzma2? true :laps 1})
;; (bench {:laps 2})
;;; 19 Oct 2013: Nippy v2.3.0, with lzma2 & (nb!) round=freeze+thaw
;; {:reader {:round 67798, :freeze 23202, :thaw 44596, :size 22971}}
;; {:default {:round 3632, :freeze 2349, :thaw 1283, :size 12369}}
;; {:encrypted {:round 6970, :freeze 4073, :thaw 2897, :size 12388}}
;; {:fast {:round 3294, :freeze 2109, :thaw 1185, :size 13277}}
;; {:lzma2 {:round 145301, :freeze 123650, :thaw 21651, :size 9024}}
;;; 11 Oct 2013: Nippy v2.2.0, with both ztellman mods
;; {:defaults {:round 4319, :freeze 2950, :thaw 1446, :data-size 12369}}

View file

@ -1,7 +1,9 @@
(ns taoensso.nippy.compression
"Alpha - subject to change."
{:author "Peter Taoussanis"}
(:require [taoensso.nippy.utils :as utils]))
(:require [taoensso.nippy.utils :as utils])
(:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream
DataOutputStream]))
;;;; Interface
@ -16,5 +18,45 @@
(compress [_ ba] (org.iq80.snappy.Snappy/compress ba))
(decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba))))
(def snappy-compressor "Default org.iq80.snappy.Snappy compressor."
(->SnappyCompressor))
(def snappy-compressor
"Default org.iq80.snappy.Snappy compressor:
Ratio: low.
Write speed: very high.
Read speed: very high.
A good general-purpose compressor for Redis."
(->SnappyCompressor))
(deftype LZMA2Compressor [compression-level]
;; Compression level ∈ℕ[0,9] (low->high) with 6 LZMA2 default (we use 4)
ICompressor
(compress [_ ba]
(let [ba-len (alength ^bytes ba)
ba-os (ByteArrayOutputStream.)
;; Prefix with uncompressed length:
_ (.writeInt (DataOutputStream. ba-os) ba-len)
xzs (org.tukaani.xz.XZOutputStream. ba-os
(org.tukaani.xz.LZMA2Options. compression-level))]
(.write xzs ^bytes ba)
(.close xzs)
(.toByteArray ba-os)))
(decompress [_ ba]
(let [ba-is (ByteArrayInputStream. ba)
ba-len (.readInt (DataInputStream. ba-is))
ba (byte-array ba-len)
xzs (org.tukaani.xz.XZInputStream. ba-is)]
(.read xzs ba 0 ba-len)
(when (not= -1 (.read xzs)) ; Good practice as extra safety measure
(throw (Exception. "LZMA2 Decompress failed: corrupt data?")))
ba)))
(def lzma2-compressor
"Alpha - subject to change.
Default org.tukaani.xz.LZMA2 compressor:
Ratio: high.
Write speed: very slow.
Read speed: medium.
A specialized compressor for large, low-write, high-read data."
(->LZMA2Compressor 4))

View file

@ -1,7 +1,8 @@
(ns taoensso.nippy.tests.main
(:require [expectations :as test :refer :all]
[taoensso.nippy :as nippy :refer (freeze thaw)]
[taoensso.nippy.benchmarks :as benchmarks]))
[taoensso.nippy.compression :as compression]
[taoensso.nippy.benchmarks :as benchmarks]))
;; Remove stuff from stress-data that breaks roundtrip equality
(def test-data (dissoc nippy/stress-data :bytes))
@ -14,6 +15,14 @@
(expect test-data ((comp #(thaw % {:password [:salted "p"]})
#(freeze % {:password [:salted "p"]}))
test-data))
(expect test-data ((comp #(thaw % {:compressor compression/lzma2-compressor})
#(freeze % {:compressor compression/lzma2-compressor}))
test-data))
(expect test-data ((comp #(thaw % {:compressor compression/lzma2-compressor
:password [:salted "p"]})
#(freeze % {:compressor compression/lzma2-compressor
:password [:salted "p"]}))
test-data))
(expect AssertionError (thaw (freeze test-data {:password "malformed"})))
(expect Exception (thaw (freeze test-data {:password [:salted "p"]})))
@ -46,4 +55,4 @@
(nippy/extend-thaw 2 [s] (->MyRec (.readUTF s)))
(= (->MyRec "fast-val") (thaw (freeze (->MyRec "val"))))))
(expect (benchmarks/bench {:reader? false})) ; Also tests :cached passwords
(expect (benchmarks/bench {})) ; Also tests :cached passwords