diff --git a/README.md b/README.md index 04a6584..e74ffa2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ Current [semantic](http://semver.org/) version: ```clojure -[com.taoensso/nippy "1.2.1"] +[com.taoensso/nippy "1.2.1"] ; Stable +[com.taoensso/nippy "1.3.0-alpha1"] ; Development (adds crypto support) ``` # Nippy, a Clojure serialization library @@ -17,6 +18,7 @@ Nippy is an attempt to provide a drop-in, high-performance alternative to the re * **Reader-fallback** for difficult/future types (including Clojure 1.4+ tagged literals). * **Full test coverage** for every supported type. * [Snappy](http://code.google.com/p/snappy/) **integrated de/compression** for efficient storage and network transfer. + * Enable **high-strength encryption** with a single option. (1.3.0+) ## Getting started @@ -102,9 +104,7 @@ Couldn't be simpler! ## Performance -![Performance comparison chart](https://github.com/ptaoussanis/nippy/raw/master/benchmarks/chart1.png) - -![Data size chart](https://github.com/ptaoussanis/nippy/raw/master/benchmarks/chart2.png) +![Comparison chart](https://github.com/ptaoussanis/nippy/raw/master/benchmarks/chart.png) [Detailed benchmark information](https://docs.google.com/spreadsheet/ccc?key=0AuSXb68FH4uhdE5kTTlocGZKSXppWG9sRzA5Y2pMVkE&pli=1#gid=0) is available on Google Docs. @@ -127,4 +127,4 @@ Otherwise reach me (Peter Taoussanis) at [taoensso.com](https://www.taoensso.com ## License -Copyright © 2012, 2013 Peter Taoussanis. Distributed under the [Eclipse Public License](http://www.eclipse.org/legal/epl-v10.html), the same as Clojure. +Copyright © 2012, 2013 Peter Taoussanis. Distributed under the [Eclipse Public License](http://www.eclipse.org/legal/epl-v10.html), the same as Clojure. \ No newline at end of file diff --git a/benchmarks/chart.png b/benchmarks/chart.png new file mode 100644 index 0000000..77608db Binary files /dev/null and b/benchmarks/chart.png differ diff --git a/benchmarks/chart1.png b/benchmarks/chart1.png deleted file mode 100644 index 02ce9a1..0000000 Binary files a/benchmarks/chart1.png and /dev/null differ diff --git a/benchmarks/chart2.png b/benchmarks/chart2.png deleted file mode 100644 index 92a7dab..0000000 Binary files a/benchmarks/chart2.png and /dev/null differ diff --git a/project.clj b/project.clj index 04227de..12f82bb 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject com.taoensso/nippy "1.2.1" +(defproject com.taoensso/nippy "1.3.0-alpha1" :description "Clojure serialization library" :url "https://github.com/ptaoussanis/nippy" :license {:name "Eclipse Public License" diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index 0817e23..6df0e03 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -2,7 +2,8 @@ "Simple, high-performance Clojure serialization library. Adapted from Deep-Freeze." {:author "Peter Taoussanis"} - (:require [taoensso.nippy.utils :as utils]) + (:require [taoensso.nippy.utils :as utils] + [taoensso.nippy.crypto :as crypto]) (:import [java.io DataInputStream DataOutputStream ByteArrayOutputStream ByteArrayInputStream] [clojure.lang Keyword BigInt Ratio PersistentQueue PersistentTreeMap @@ -166,14 +167,17 @@ (defn freeze-to-bytes "Serializes x to a byte array and returns the array." - ^bytes [x & {:keys [compress? print-dup?] - :or {compress? true + ^bytes [x & {:keys [crypto compress? print-dup? salt password] + :or {crypto crypto/crypto-default + compress? true print-dup? true}}] (let [ba (ByteArrayOutputStream.) stream (DataOutputStream. ba)] (freeze-to-stream! stream x print-dup?) - (let [ba (.toByteArray ba)] - (if compress? (utils/compress-bytes ba) ba)))) + (let [ba (.toByteArray ba) + ba (if compress? (utils/compress-bytes ba) ba) + ba (if password (crypto/encrypt crypto salt password ba) ba)] + ba))) ;;;; Thawing @@ -251,10 +255,13 @@ (defn thaw-from-bytes "Deserializes an object from given byte array." - [ba & {:keys [read-eval? compressed?] - :or {read-eval? false ; For `read-string` injection safety - NB!!! + [ba & {:keys [crypto compressed? read-eval? salt password] + :or {crypto crypto/crypto-default + read-eval? false ; For `read-string` injection safety - NB!!! compressed? true}}] - (-> (if compressed? (utils/uncompress-bytes ba) ba) + (-> (let [ba (if password (crypto/decrypt crypto salt password ba) ba) + ba (if compressed? (utils/uncompress-bytes ba) ba)] + ba) (ByteArrayInputStream.) (DataInputStream.) (thaw-from-stream! read-eval?))) diff --git a/src/taoensso/nippy/benchmarks.clj b/src/taoensso/nippy/benchmarks.clj index ea538f4..03f9c2e 100644 --- a/src/taoensso/nippy/benchmarks.clj +++ b/src/taoensso/nippy/benchmarks.clj @@ -1,7 +1,8 @@ (ns taoensso.nippy.benchmarks {:author "Peter Taoussanis"} (:use [taoensso.nippy :as nippy :only (freeze-to-bytes thaw-from-bytes)]) - (:require [taoensso.nippy.utils :as utils])) + (:require [taoensso.nippy.utils :as utils] + [taoensso.nippy.crypto :as crypto])) ;; Remove stuff from stress-data that breaks reader (def data (dissoc nippy/stress-data :queue :queue-empty :bytes)) @@ -10,52 +11,80 @@ (defn reader-freeze [x] (binding [*print-dup* false] (pr-str x))) (defn reader-thaw [x] (binding [*read-eval* false] (read-string x))) +(def reader-roundtrip (comp reader-thaw reader-freeze)) -(def roundtrip (comp thaw-from-bytes freeze-to-bytes)) -(def reader-roundtrip (comp reader-thaw reader-freeze)) +(def crypto-opts [:password "secret" :crypto crypto/crypto-default-cached]) -(defn autobench [] (bench (roundtrip data))) +(def roundtrip-defaults (comp nippy/thaw-from-bytes nippy/freeze-to-bytes)) +(def roundtrip-encrypted (comp #(apply nippy/thaw-from-bytes % crypto-opts) + #(apply nippy/freeze-to-bytes % crypto-opts))) +(def roundtrip-fast (comp #(nippy/thaw-from-bytes % :compressed? false) + #(nippy/freeze-to-bytes % :compress? false))) + +(defn autobench [] (bench (roundtrip-defaults data) + (roundtrip-encrypted data))) (comment - ;;; Times - (println - "---\n" - {:reader {:freeze (bench (reader-freeze data)) - :thaw (let [frozen (reader-freeze data)] - (bench (reader-thaw frozen))) - :round (bench (reader-roundtrip data))} + (do ; Roundtrip times + (println "Benching (this can take some time)...") + (println "-------------------------------------") - :nippy {:freeze (bench (freeze-to-bytes data)) - :thaw (let [frozen (freeze-to-bytes data)] - (bench (thaw-from-bytes frozen))) - :round (bench (roundtrip data))}}) + (println + {:reader + {:freeze (bench (reader-freeze data)) + :thaw (let [frozen (reader-freeze data)] + (bench (reader-thaw frozen))) + :round (bench (reader-roundtrip data)) + :data-size (count (.getBytes ^String (reader-freeze data) "UTF-8"))}}) - ;; Clojure 1.3.0, Nippy 0.9.2 - ;; {:reader {:freeze 28505, :thaw 36451, :round 59545}, - ;; :nippy {:freeze 3751, :thaw 4184, :round 7769}} - ;; (float (/ 59545 7769)) = 7.6644354 + (println + {:defaults + {:freeze (bench (freeze-to-bytes data)) + :thaw (let [frozen (freeze-to-bytes data)] + (bench (thaw-from-bytes frozen))) + :round (bench (roundtrip-defaults data)) + :data-size (count (freeze-to-bytes data))}}) - ;; Clojure 1.4.0, Nippy 1.0.0 (+ tagged-uuid, tagged-date) - ;; {:reader {:freeze 22595, :thaw 31148, :round 54059} - ;; :nippy {:freeze 3324, :thaw 3725, :round 6918}} - ;; (float (/ 54059 6918)) = 7.814253 + (println + {:encrypted + {:freeze (bench (apply freeze-to-bytes data crypto-opts)) + :thaw (let [frozen (apply freeze-to-bytes data crypto-opts)] + (bench (apply thaw-from-bytes frozen crypto-opts))) + :round (bench (roundtrip-encrypted data)) + :data-size (count (apply freeze-to-bytes data crypto-opts))}}) - ;; Clojure 1.5.1, Nippy 1.2.1 (+ sorted-set, sorted-map) + (println + {:fast + {:freeze (bench (freeze-to-bytes data :compress? false)) + :thaw (let [frozen (freeze-to-bytes data :compress? false)] + (bench (thaw-from-bytes frozen :compressed? false))) + :round (bench (roundtrip-fast data)) + :data-size (count (freeze-to-bytes data :compress? false))}}) + + (println "Done! (Time for cake?)")) + + ;;; 11 June 2013: Clojure 1.5.1, Nippy 1.3.0-alpha1 + ;; {:reader {:freeze 17042, :thaw 31579, :round 48379, :data-size 22954}} + ;; {:defaults {:freeze 3810, :thaw 5295, :round 9052, :data-size 12394}} + ;; {:encrypted {:freeze 5800, :thaw 6862, :round 12317, :data-size 12416}} + ;; {:fast {:freeze 3078, :thaw 4684, :round 8117, :data-size 13274}} + + ;;; Clojure 1.5.1, Nippy 1.2.1 (+ sorted-set, sorted-map) ;; (def data (dissoc data :sorted-set :sorted-map)) ;; {:reader {:freeze 15037, :thaw 27885, :round 43945}, ;; :nippy {:freeze 3194, :thaw 4734, :round 8380}} - ;; (float (/ 43945 8380)) = 5.2440333 + ;; {:reader-size 22975, :defaults-size 12400, :encrypted-size 12400} -;;; Data size - (let [frozen (reader-freeze data)] (count (.getBytes frozen "UTF8"))) - (let [frozen (freeze-to-bytes data)] (count frozen)) - ;; 22955, 12402 - ;; (float (/ 22955 12402)) = 1.8509111 + ;;; Clojure 1.4.0, Nippy 1.0.0 (+ tagged-uuid, tagged-date) + ;; {:reader {:freeze 22595, :thaw 31148, :round 54059} + ;; :nippy {:freeze 3324, :thaw 3725, :round 6918}} -;;; Snappy implementations - (println (bench (roundtrip data))) - ;; No Snappy: 6163 6064 6042 6176 - ;; Snappy JNI: 6489 6446 6542 6412 - ;; Snappy native array copy: 6569 6419 6414 6590 - ) \ No newline at end of file + ;;; Clojure 1.3.0, Nippy 0.9.2 + ;; {:reader {:freeze 28505, :thaw 36451, :round 59545}, + ;; :nippy {:freeze 3751, :thaw 4184, :round 7769}} + + (println (bench (roundtrip data))) ; Snappy implementations + ;; {:no-snappy [6163 6064 6042 6176] :JNI [6489 6446 6542 6412] + ;; :native-array-copy [6569 6419 6414 6590]} + ) \ No newline at end of file diff --git a/src/taoensso/nippy/crypto.clj b/src/taoensso/nippy/crypto.clj new file mode 100644 index 0000000..70606d8 --- /dev/null +++ b/src/taoensso/nippy/crypto.clj @@ -0,0 +1,131 @@ +(ns taoensso.nippy.crypto + "Alpha - subject to change. + Simple no-nonsense crypto with reasonable defaults. Because your Clojure data + deserves some privacy." + {:author "Peter Taoussanis"} + (:require [taoensso.nippy.utils :as utils])) + +(defprotocol ICrypto "Simple cryptography interface." + (gen-key ^javax.crypto.spec.SecretKeySpec [crypto salt pwd] + "Returns an appropriate SecretKeySpec.") + (encrypt ^bytes [crypto salt pwd ba] "Returns encrypted bytes.") + (decrypt ^bytes [crypto salt pwd ba] "Returns decrypted bytes.")) + +(defrecord CryptoAES [cipher-type default-salt key-gen-opts cache]) + +(def ^:private ^java.security.MessageDigest sha-md + (java.security.MessageDigest/getInstance "SHA-512")) + +(def ^:private ^:const aes128-block-size (int 16)) + +(defn- sha512-key + "Default SHA512-based key generator. Good JVM availability without extra + dependencies (PBKDF2, bcrypt, scrypt, etc.). Decent security with multiple + rounds. VERY aggressive multiples (>64) possible+recommended when cached." + [^String salted-pwd & [{:keys [rounds-multiple] + :or {rounds-multiple 5}}]] ; Cacheable + (loop [^bytes ba (.getBytes salted-pwd "UTF-8") + n (* (int Short/MAX_VALUE) (or rounds-multiple 5))] + (if-not (zero? n) + (recur (.digest sha-md ba) (dec n)) + (-> ba + ;; 128bit keys have good JVM availability and are + ;; entirely sufficient, Ref. http://goo.gl/2YRQG + (java.util.Arrays/copyOf aes128-block-size) + (javax.crypto.spec.SecretKeySpec. "AES"))))) + +(comment + (time (sha512-key "hi" {:rounds-multiple 1})) ; ~40ms per hash (fast) + (time (sha512-key "hi" {:rounds-multiple 5})) ; ~180ms (default) + (time (sha512-key "hi" {:rounds-multiple 32})) ; ~1200ms (conservative) + (time (sha512-key "hi" {:rounds-multiple 128})) ; ~4500ms (paranoid) + ) + +(def ^:private cipher* (memoize #(javax.crypto.Cipher/getInstance %))) +(defn- cipher ^javax.crypto.Cipher [cipher-type] (cipher* cipher-type)) + +(def ^:private ^java.security.SecureRandom rand-gen + (java.security.SecureRandom/getInstance "SHA1PRNG")) +(defn- rand-bytes [size] (let [seed (make-array Byte/TYPE size)] + (.nextBytes rand-gen seed) seed)) + +(extend-type CryptoAES + ICrypto + (gen-key [{:keys [default-salt key-gen-opts cache]} salt pwd] + (utils/apply-memoized cache + sha512-key (str (or salt default-salt) pwd) key-gen-opts)) + + (encrypt [{:keys [cipher-type cache] :as crypto} salt pwd ba] + (let [cipher (cipher cipher-type) + key (gen-key crypto salt pwd) + iv-ba (rand-bytes aes128-block-size) + iv (javax.crypto.spec.IvParameterSpec. iv-ba)] + (.init cipher javax.crypto.Cipher/ENCRYPT_MODE key iv) + (.doFinal cipher (utils/ba-concat iv-ba ba)))) + + (decrypt [{:keys [cipher-type cache] :as crypto} salt pwd ba] + (let [cipher (cipher cipher-type) + key (gen-key crypto salt pwd) + [iv-ba data-ba] (utils/ba-split ba aes128-block-size) + iv (javax.crypto.spec.IvParameterSpec. iv-ba)] + (.init cipher javax.crypto.Cipher/DECRYPT_MODE key iv) + (.doFinal cipher data-ba)))) + +(defn crypto-aes128 + "Returns a new CryptoAES object with options: + :default-salt - Shared fallback password salt when none is provided. If + the use case allows it, a unique random salt per + encrypted item is better. + :cache-keys? - IMPORTANT. DO enable this if and ONLY if your use case + involves only a small, finite number of unique secret + keys (salt+password)s. Dramatically improves `gen-key` + performance in those cases and (as a result) allows for + a *much* stronger `key-work-factor`. + :key-work-factor - O(n) CPU time needed to generate keys. Larger factors + provide more protection against brute-force attacks but + make encryption+decryption slower if `:cache-keys?` is + not enabled. + + Some sensible values (from fast to strong): + Without caching: 1, 5, 10 + With caching: 5, 32, 64, 128 + + See also `crypto-default` and `crypto-default-cached` for sensible ready-made + CryptoAES objects." + [& [{:keys [default-salt cache-keys? key-work-factor] + :or {default-salt "XA~I3(:]3'ck5!M[z\\m`l^0mltR~y/]Arq_d9+$`e#yJssN^8" + key-work-factor 5}}]] + (CryptoAES. "AES/CBC/PKCS5Padding" + default-salt + {:rounds-multiple (int key-work-factor)} + (when cache-keys? (atom {})))) + +(def crypto-default (crypto-aes128)) +(def crypto-default-cached (crypto-aes128 {:cache-keys? true + :key-work-factor 64})) + +(comment + (time (gen-key crypto-default "my-salt" "my-password")) + (time (gen-key crypto-default-cached "my-salt" "my-password")) + (time (->> (.getBytes "Secret message" "UTF-8") + (encrypt crypto-default "s" "p") + (encrypt crypto-default "s" "p") + (decrypt crypto-default "s" "p") + (decrypt crypto-default "s" "p") + (String.)))) + +;; TODO Nippy: Compress _then_ encode. Decode _then_ decompress. +;; TODO Move tests to actual unit tests ns. +;; TODO Update benchmarks with crypto (so 3 bars). +;; TODO Add tests for bad decryption (e.g. wrong key) +;; ;; (comment +;; ;; (let [data (dissoc nippy/stress-data :bytes)] +;; ;; (= data (->> data +;; ;; (nippy/freeze-to-bytes) +;; ;; (encrypt-aes "my-password") +;; ;; (decrypt-aes "my-password") +;; ;; (nippy/thaw-from-bytes)))) + +;; ;; (let [ba (nippy/freeze-to-bytes nippy/stress-data)] +;; ;; (time (dotimes [_ 10000] (->> ba (encrypt-aes "my-password") +;; ;; (decrypt-aes "my-password")))))) \ No newline at end of file diff --git a/src/taoensso/nippy/utils.clj b/src/taoensso/nippy/utils.clj index 30488e1..e3d90ff 100644 --- a/src/taoensso/nippy/utils.clj +++ b/src/taoensso/nippy/utils.clj @@ -59,4 +59,32 @@ (catch Exception _ false))) (defn compress-bytes [^bytes ba] (Snappy/compress ba)) -(defn uncompress-bytes [^bytes ba] (Snappy/uncompress ba 0 (alength ba))) \ No newline at end of file +(defn uncompress-bytes [^bytes ba] (Snappy/uncompress ba 0 (alength ba))) + +(defn apply-memoized + "A cross between `memoize` and `apply`. Operates like `apply` but accepts an + optional { ...} cache atom." + [cache f & args] + (if-not cache + (apply f args) + (if-let [dv (@cache args)] + @dv + (let [dv (delay (apply f args))] + (swap! cache assoc args dv) + @dv)))) + +(defn ba-concat ^bytes [^bytes ba1 ^bytes ba2] + (let [s1 (alength ba1) + s2 (alength ba2) + out (byte-array (+ s1 s2))] + (System/arraycopy ba1 0 out 0 s1) + (System/arraycopy ba2 0 out s1 s2) + out)) + +(defn ba-split [^bytes ba ^Integer idx] + [(java.util.Arrays/copyOfRange ba 0 idx) + (java.util.Arrays/copyOfRange ba idx (alength ba))]) + +(comment (String. (ba-concat (.getBytes "foo") (.getBytes "bar"))) + (let [[x y] (ba-split (.getBytes "foobar") 3)] + [(String. x) (String. y)])) \ No newline at end of file diff --git a/test/test_nippy/main.clj b/test/test_nippy/main.clj index 185bd68..7a7eb59 100644 --- a/test/test_nippy/main.clj +++ b/test/test_nippy/main.clj @@ -6,9 +6,12 @@ ;; Remove stuff from stress-data that breaks roundtrip equality (def test-data (dissoc nippy/stress-data :bytes)) -(def roundtrip (comp nippy/thaw-from-bytes nippy/freeze-to-bytes)) +(def roundtrip-defaults (comp nippy/thaw-from-bytes nippy/freeze-to-bytes)) +(def roundtrip-encrypted (comp #(nippy/thaw-from-bytes % :password "secret") + #(nippy/freeze-to-bytes % :password "secret"))) -(deftest test-roundtrip (is (= test-data (roundtrip test-data)))) +(deftest test-roundtrip-defaults (is (= test-data (roundtrip-defaults test-data)))) +(deftest test-roundtrip-encrypted (is (= test-data (roundtrip-encrypted test-data)))) (println "Benchmarking roundtrips (x3)") (println "----------------------------")