From a6aba2c92a97defa8b6aa6914c52d79df2748361 Mon Sep 17 00:00:00 2001 From: Peter Taoussanis Date: Sat, 5 Apr 2014 14:20:38 +0700 Subject: [PATCH] Add experimental LZ4 compressors --- project.clj | 3 +- src/taoensso/nippy/compression.clj | 68 ++++++++++++++++++++++++++++-- 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/project.clj b/project.clj index 03daf3e..fb20648 100644 --- a/project.clj +++ b/project.clj @@ -14,7 +14,8 @@ [org.clojure/tools.reader "0.8.4"] [com.taoensso/encore "1.3.1"] [org.iq80.snappy/snappy "0.3"] - [org.tukaani/xz "1.5"]] + [org.tukaani/xz "1.5"] + [net.jpountz.lz4/lz4 "1.2.0"]] :test-paths ["test" "src"] :profiles diff --git a/src/taoensso/nippy/compression.clj b/src/taoensso/nippy/compression.clj index 3cc5f87..41e49b4 100644 --- a/src/taoensso/nippy/compression.clj +++ b/src/taoensso/nippy/compression.clj @@ -1,6 +1,6 @@ -(ns taoensso.nippy.compression - "Alpha - subject to change." +(ns taoensso.nippy.compression "Alpha - subject to change." {:author "Peter Taoussanis"} + (:require [taoensso.encore :as encore]) (:import [java.io ByteArrayInputStream ByteArrayOutputStream DataInputStream DataOutputStream])) @@ -23,7 +23,7 @@ Write speed: very high. Read speed: very high. - A good general-purpose compressor for Redis." + A good general-purpose compressor." (->SnappyCompressor)) (deftype LZMA2Compressor [compression-level] @@ -57,5 +57,65 @@ Write speed: _very_ slow (also currently single-threaded). Read speed: slow. - A specialized compressor for large, low-write data." + A specialized compressor for large, low-write data in space-sensitive + environments." (->LZMA2Compressor 0)) + +(deftype LZ4Compressor [^net.jpountz.lz4.LZ4Compressor compressor + ^net.jpountz.lz4.LZ4SafeDecompressor decompressor] + ICompressor + (compress [_ ba] + (let [in-len (alength ^bytes ba) + max-out-len (.maxCompressedLength compressor in-len) + ba-out* (byte-array max-out-len) + out-len (.compress compressor ba 0 in-len ba-out* 0 max-out-len) + ba-out (java.util.Arrays/copyOf ba-out* out-len)] + ba-out)) + + (decompress [_ ba] + (let [in-len (alength ^bytes ba) + max-out-len in-len + ba-out* (byte-array (* max-out-len 3.0)) ; Nb over-sized! + out-len (.decompress decompressor ba 0 in-len ba-out* 0) + ba-out (java.util.Arrays/copyOf ba-out* out-len)] + ba-out))) + +(def ^:private ^net.jpountz.lz4.LZ4Factory lz4-factory + (net.jpountz.lz4.LZ4Factory/fastestInstance)) + +(def lz4-compressor + "Default net.jpountz.lz4 compressor: + Ratio: low. + Write speed: very high. + Read speed: very high. + + A good general-purpose compressor, competitive with Snappy." + (->LZ4Compressor (.fastCompressor lz4-factory) + (.safeDecompressor lz4-factory))) + +(def lz4hc-compressor "Like `lz4-compressor` but trades some speed for ratio." + (->LZ4Compressor (.highCompressor lz4-factory) + (.safeDecompressor lz4-factory))) + +(comment + (def ba-bench (.getBytes (apply str (repeatedly 1000 rand)) "UTF-8")) + (defn bench1 [compressor] + {:time (encore/bench 10000 {:nlaps-warmup 10000} + (->> ba-bench (compress compressor) (decompress compressor))) + :ratio (encore/round2 (/ (count (compress compressor ba-bench)) + (count ba-bench)))}) + + (println + {:snappy (bench1 snappy-compressor) + ;; :lzma (bench1 lzma2-compressor) ; Slow! + :lz4 (bench1 lz4-compressor) + :lz4hc (bench1 lz4hc-compressor)}) + + ;;; 2014 April 5, initial benchmarks + {:snappy {:time 2214 :ratio 0.848} + :lzma {:time 46684 :ratio 0.494} + :lz4 {:time 1327 :ratio 0.819} ; w/o uncompressed size prefix + :lz4hc {:time 5762 :ratio 0.763} ; '' + ;; :lz4 {:time 1404 :ratio 0.819} ; with uncompressed size prefix + ;; :lz4hc {:time 6028 :ratio 0.763} ; '' + })