add initial implementation of LZ4 compression

This commit is contained in:
Max Penet 2013-10-02 16:13:55 +02:00
parent b80fcc8552
commit d53c582249
4 changed files with 87 additions and 8 deletions

View file

@ -6,7 +6,9 @@
:dependencies [[org.clojure/clojure "1.4.0"]
[org.clojure/tools.reader "0.7.7"]
[expectations "1.4.55"]
[org.iq80.snappy/snappy "0.3"]]
[org.iq80.snappy/snappy "0.3"]
[net.jpountz.lz4/lz4 "1.2.0"]
[primitive-math "0.1.3"]]
:profiles {:1.4 {:dependencies [[org.clojure/clojure "1.4.0"]]}
:1.5 {:dependencies [[org.clojure/clojure "1.5.1"]]}
:1.6 {:dependencies [[org.clojure/clojure "1.6.0-master-SNAPSHOT"]]}

View file

@ -2,7 +2,8 @@
{:author "Peter Taoussanis"}
(:require [clojure.tools.reader.edn :as edn]
[taoensso.nippy :as nippy :refer (freeze thaw)]
[taoensso.nippy.utils :as utils]))
[taoensso.nippy.utils :as utils]
[taoensso.nippy.compression :refer (lz4-compressor lz4hc-compressor)]))
;; Remove stuff from stress-data that breaks reader
(def data (dissoc nippy/stress-data :queue :queue-empty :bytes))
@ -16,6 +17,10 @@
(def roundtrip-defaults (comp thaw freeze))
(def roundtrip-encrypted (comp #(thaw % {:password [:cached "p"]})
#(freeze % {:password [:cached "p"]})))
(def roundtrip-lz4 (comp #(thaw % {:compressor lz4-compressor})
#(freeze % {:compressor lz4-compressor})))
(def roundtrip-lz4hc (comp #(thaw % {:compressor lz4hc-compressor})
#(freeze % {:compressor lz4hc-compressor})))
(def roundtrip-fast (comp thaw #(freeze % {:compressor nil})))
(defn bench [{:keys [reader? laps] :or {reader? true laps 1}}]
@ -55,7 +60,25 @@
:freeze (bench* (freeze data {:compressor nil}))
:thaw (let [frozen (freeze data {:compressor nil})]
(bench* (thaw frozen)))
:data-size (count (freeze data {:compressor nil}))}}))
:data-size (count (freeze data {:compressor nil}))}})
(println
{:LZ4
{:round (bench* (roundtrip-lz4 data))
:freeze (bench* (freeze data {:compressor lz4-compressor}))
:thaw (let [frozen (freeze data {:compressor lz4-compressor})]
(bench* (thaw frozen {:compressor lz4-compressor})))
:data-size (count (freeze data {:compressor lz4-compressor}))}})
(println
{:LZ4-hc
{:round (bench* (roundtrip-lz4hc data))
:freeze (bench* (freeze data {:compressor lz4hc-compressor}))
:thaw (let [frozen (freeze data {:compressor lz4hc-compressor})]
(bench* (thaw frozen {:compressor lz4hc-compressor})))
:data-size (count (freeze data {:compressor lz4hc-compressor}))}})
)
(println)
(println "Done! (Time for cake?)")
@ -113,4 +136,4 @@
(println (bench* (roundtrip data))) ; Snappy implementations
;; {:no-snappy [6163 6064 6042 6176] :JNI [6489 6446 6542 6412]
;; :native-array-copy [6569 6419 6414 6590]}
)
)

View file

@ -1,7 +1,9 @@
(ns taoensso.nippy.compression
"Alpha - subject to change."
{:author "Peter Taoussanis"}
(:require [taoensso.nippy.utils :as utils]))
(:require [taoensso.nippy.utils :as utils]
[primitive-math :as pm :refer [<< >>>]])
(:import (net.jpountz.lz4 LZ4Factory)))
;;;; Interface
@ -17,4 +19,45 @@
(decompress [_ ba] (org.iq80.snappy.Snappy/uncompress ba 0 (alength ^bytes ba))))
(def snappy-compressor "Default org.iq80.snappy.Snappy compressor."
(->SnappyCompressor))
(->SnappyCompressor))
;;;; LZ4 Compression
(def ^net.jpountz.lz4.LZ4Factory LZ4-factory
(net.jpountz.lz4.LZ4Factory/fastestInstance))
(def ^:const int-bytes (int 4))
;; Stolen impl from cassandra where we prefix the compressed bytes
;; array with the compressed length to make decompression faster
(deftype LZ4Compressor
[^net.jpountz.lz4.LZ4Compressor compressor
^net.jpountz.lz4.LZ4Decompressor decompressor]
ICompressor
(compress [_ ba]
(let [input-len (alength ^bytes ba)
max-compressed-length (.maxCompressedLength compressor input-len)
output (byte-array (pm/+ int-bytes max-compressed-length))]
(aset-byte output 0 (pm/byte (>>> input-len 24)))
(aset-byte output 1 (pm/byte (>>> input-len 16)))
(aset-byte output 2 (pm/byte (>>> input-len 8)))
(aset-byte output 3 (pm/byte input-len))
(.compress compressor ba 0 input-len output int-bytes max-compressed-length)
output))
(decompress [_ ba]
(let [uncompressed-len (pm/bit-or (<< (pm/byte->ubyte (aget ^bytes ba 0)) 24)
(<< (pm/byte->ubyte (aget ^bytes ba 1)) 16)
(<< (pm/byte->ubyte (aget ^bytes ba 2)) 8)
(pm/byte->ubyte (aget ^bytes ba 3)))
output (byte-array uncompressed-len)]
(.decompress decompressor ba int-bytes output 0 uncompressed-len)
output)))
(def lz4-compressor "Default net.jpountz.lz4 compressor."
(->LZ4Compressor (.fastCompressor LZ4-factory)
(.fastDecompressor LZ4-factory)))
(def lz4hc-compressor "High compression net.jpountz.lz4 compressor."
(->LZ4Compressor (.highCompressor LZ4-factory)
(.fastDecompressor LZ4-factory)))

View file

@ -1,7 +1,8 @@
(ns taoensso.nippy.tests.main
(:require [expectations :as test :refer :all]
[taoensso.nippy :as nippy :refer (freeze thaw)]
[taoensso.nippy.benchmarks :as benchmarks]))
[taoensso.nippy.benchmarks :as benchmarks]
[taoensso.nippy.compression :refer [lz4-compressor lz4hc-compressor]]))
;; Remove stuff from stress-data that breaks roundtrip equality
(def test-data (dissoc nippy/stress-data :bytes))
@ -30,6 +31,16 @@
(thaw (org.iq80.snappy.Snappy/uncompress iq80-ba 0 (alength iq80-ba)))
(thaw (org.iq80.snappy.Snappy/uncompress xerial-ba 0 (alength xerial-ba))))))
(expect
(= test-data (thaw (freeze test-data
{:compressor lz4-compressor})
{:compressor lz4-compressor})))
(expect
(= test-data (thaw (freeze test-data
{:compressor lz4hc-compressor})
{:compressor lz4hc-compressor})))
;;; Records (reflecting)
(defrecord MyRec [data])
(expect (let [rec (->MyRec "val")] (= rec (thaw (freeze rec)))))
@ -46,4 +57,4 @@
(nippy/extend-thaw 2 [s] (->MyRec (.readUTF s)))
(= (->MyRec "fast-val") (thaw (freeze (->MyRec "val"))))))
(expect (benchmarks/bench {:reader? false})) ; Also tests :cached passwords
(expect (benchmarks/bench {:reader? false})) ; Also tests :cached passwords