From d53c58224904e450142c0d5f0aa48093238a7e16 Mon Sep 17 00:00:00 2001 From: Max Penet Date: Wed, 2 Oct 2013 16:13:55 +0200 Subject: [PATCH] add initial implementation of LZ4 compression --- project.clj | 4 ++- src/taoensso/nippy/benchmarks.clj | 29 ++++++++++++++++-- src/taoensso/nippy/compression.clj | 47 ++++++++++++++++++++++++++++-- test/taoensso/nippy/tests/main.clj | 15 ++++++++-- 4 files changed, 87 insertions(+), 8 deletions(-) diff --git a/project.clj b/project.clj index a33e7a0..110b7b2 100644 --- a/project.clj +++ b/project.clj @@ -6,7 +6,9 @@ :dependencies [[org.clojure/clojure "1.4.0"] [org.clojure/tools.reader "0.7.7"] [expectations "1.4.55"] - [org.iq80.snappy/snappy "0.3"]] + [org.iq80.snappy/snappy "0.3"] + [net.jpountz.lz4/lz4 "1.2.0"] + [primitive-math "0.1.3"]] :profiles {:1.4 {:dependencies [[org.clojure/clojure "1.4.0"]]} :1.5 {:dependencies [[org.clojure/clojure "1.5.1"]]} :1.6 {:dependencies [[org.clojure/clojure "1.6.0-master-SNAPSHOT"]]} diff --git a/src/taoensso/nippy/benchmarks.clj b/src/taoensso/nippy/benchmarks.clj index 464611c..b2efd12 100644 --- a/src/taoensso/nippy/benchmarks.clj +++ b/src/taoensso/nippy/benchmarks.clj @@ -2,7 +2,8 @@ {:author "Peter Taoussanis"} (:require [clojure.tools.reader.edn :as edn] [taoensso.nippy :as nippy :refer (freeze thaw)] - [taoensso.nippy.utils :as utils])) + [taoensso.nippy.utils :as utils] + [taoensso.nippy.compression :refer (lz4-compressor lz4hc-compressor)])) ;; Remove stuff from stress-data that breaks reader (def data (dissoc nippy/stress-data :queue :queue-empty :bytes)) @@ -16,6 +17,10 @@ (def roundtrip-defaults (comp thaw freeze)) (def roundtrip-encrypted (comp #(thaw % {:password [:cached "p"]}) #(freeze % {:password [:cached "p"]}))) +(def roundtrip-lz4 (comp #(thaw % {:compressor lz4-compressor}) + #(freeze % {:compressor lz4-compressor}))) +(def roundtrip-lz4hc (comp #(thaw % {:compressor lz4hc-compressor}) + #(freeze % {:compressor lz4hc-compressor}))) (def roundtrip-fast (comp thaw #(freeze % {:compressor nil}))) (defn bench [{:keys [reader? laps] :or {reader? true laps 1}}] @@ -55,7 +60,25 @@ :freeze (bench* (freeze data {:compressor nil})) :thaw (let [frozen (freeze data {:compressor nil})] (bench* (thaw frozen))) - :data-size (count (freeze data {:compressor nil}))}})) + :data-size (count (freeze data {:compressor nil}))}}) + + (println + {:LZ4 + {:round (bench* (roundtrip-lz4 data)) + :freeze (bench* (freeze data {:compressor lz4-compressor})) + :thaw (let [frozen (freeze data {:compressor lz4-compressor})] + (bench* (thaw frozen {:compressor lz4-compressor}))) + :data-size (count (freeze data {:compressor lz4-compressor}))}}) + + (println + {:LZ4-hc + {:round (bench* (roundtrip-lz4hc data)) + :freeze (bench* (freeze data {:compressor lz4hc-compressor})) + :thaw (let [frozen (freeze data {:compressor lz4hc-compressor})] + (bench* (thaw frozen {:compressor lz4hc-compressor}))) + :data-size (count (freeze data {:compressor lz4hc-compressor}))}}) + + ) (println) (println "Done! (Time for cake?)") @@ -113,4 +136,4 @@ (println (bench* (roundtrip data))) ; Snappy implementations ;; {:no-snappy [6163 6064 6042 6176] :JNI [6489 6446 6542 6412] ;; :native-array-copy [6569 6419 6414 6590]} - ) \ No newline at end of file + ) diff --git a/src/taoensso/nippy/compression.clj b/src/taoensso/nippy/compression.clj index e5e895e..cf2a53a 100644 --- a/src/taoensso/nippy/compression.clj +++ b/src/taoensso/nippy/compression.clj @@ -1,7 +1,9 @@ (ns taoensso.nippy.compression "Alpha - subject to change." {:author "Peter Taoussanis"} - (:require [taoensso.nippy.utils :as utils])) + (:require [taoensso.nippy.utils :as utils] + [primitive-math :as pm :refer [<< >>>]]) + (:import (net.jpountz.lz4 LZ4Factory))) ;;;; Interface @@ -17,4 +19,45 @@ (decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba)))) (def snappy-compressor "Default org.iq80.snappy.Snappy compressor." - (->SnappyCompressor)) \ No newline at end of file + (->SnappyCompressor)) + + +;;;; LZ4 Compression + +(def ^net.jpountz.lz4.LZ4Factory LZ4-factory + (net.jpountz.lz4.LZ4Factory/fastestInstance)) + +(def ^:const int-bytes (int 4)) + +;; Stolen impl from cassandra where we prefix the compressed bytes +;; array with the compressed length to make decompression faster +(deftype LZ4Compressor + [^net.jpountz.lz4.LZ4Compressor compressor + ^net.jpountz.lz4.LZ4Decompressor decompressor] + ICompressor + (compress [_ ba] + (let [input-len (alength ^bytes ba) + max-compressed-length (.maxCompressedLength compressor input-len) + output (byte-array (pm/+ int-bytes max-compressed-length))] + (aset-byte output 0 (pm/byte (>>> input-len 24))) + (aset-byte output 1 (pm/byte (>>> input-len 16))) + (aset-byte output 2 (pm/byte (>>> input-len 8))) + (aset-byte output 3 (pm/byte input-len)) + (.compress compressor ba 0 input-len output int-bytes max-compressed-length) + output)) + (decompress [_ ba] + (let [uncompressed-len (pm/bit-or (<< (pm/byte->ubyte (aget ^bytes ba 0)) 24) + (<< (pm/byte->ubyte (aget ^bytes ba 1)) 16) + (<< (pm/byte->ubyte (aget ^bytes ba 2)) 8) + (pm/byte->ubyte (aget ^bytes ba 3))) + output (byte-array uncompressed-len)] + (.decompress decompressor ba int-bytes output 0 uncompressed-len) + output))) + +(def lz4-compressor "Default net.jpountz.lz4 compressor." + (->LZ4Compressor (.fastCompressor LZ4-factory) + (.fastDecompressor LZ4-factory))) + +(def lz4hc-compressor "High compression net.jpountz.lz4 compressor." + (->LZ4Compressor (.highCompressor LZ4-factory) + (.fastDecompressor LZ4-factory))) diff --git a/test/taoensso/nippy/tests/main.clj b/test/taoensso/nippy/tests/main.clj index a9d0344..bf58879 100644 --- a/test/taoensso/nippy/tests/main.clj +++ b/test/taoensso/nippy/tests/main.clj @@ -1,7 +1,8 @@ (ns taoensso.nippy.tests.main (:require [expectations :as test :refer :all] [taoensso.nippy :as nippy :refer (freeze thaw)] - [taoensso.nippy.benchmarks :as benchmarks])) + [taoensso.nippy.benchmarks :as benchmarks] + [taoensso.nippy.compression :refer [lz4-compressor lz4hc-compressor]])) ;; Remove stuff from stress-data that breaks roundtrip equality (def test-data (dissoc nippy/stress-data :bytes)) @@ -30,6 +31,16 @@ (thaw (org.iq80.snappy.Snappy/uncompress iq80-ba 0 (alength iq80-ba))) (thaw (org.iq80.snappy.Snappy/uncompress xerial-ba 0 (alength xerial-ba)))))) +(expect + (= test-data (thaw (freeze test-data + {:compressor lz4-compressor}) + {:compressor lz4-compressor}))) + +(expect + (= test-data (thaw (freeze test-data + {:compressor lz4hc-compressor}) + {:compressor lz4hc-compressor}))) + ;;; Records (reflecting) (defrecord MyRec [data]) (expect (let [rec (->MyRec "val")] (= rec (thaw (freeze rec))))) @@ -46,4 +57,4 @@ (nippy/extend-thaw 2 [s] (->MyRec (.readUTF s))) (= (->MyRec "fast-val") (thaw (freeze (->MyRec "val")))))) -(expect (benchmarks/bench {:reader? false})) ; Also tests :cached passwords \ No newline at end of file +(expect (benchmarks/bench {:reader? false})) ; Also tests :cached passwords