From 146f82e079ae500bb54e78802ce09b9cd27a955c Mon Sep 17 00:00:00 2001 From: Peter Taoussanis Date: Sat, 7 Jul 2012 02:12:59 +0700 Subject: [PATCH] Initial source, tests, benchmarks. --- README.md | 68 ++++++++- benchmarks/benchmarks.clj | 48 ++++++ benchmarks/chart1.png | Bin 0 -> 4825 bytes benchmarks/chart2.png | Bin 0 -> 4029 bytes src/taoensso/nippy.clj | 284 ++++++++++++++++++++++++++++++++++- src/taoensso/nippy/utils.clj | 13 ++ test/test_nippy/main.clj | 10 +- 7 files changed, 415 insertions(+), 8 deletions(-) create mode 100644 benchmarks/benchmarks.clj create mode 100644 benchmarks/chart1.png create mode 100644 benchmarks/chart2.png create mode 100644 src/taoensso/nippy/utils.clj diff --git a/README.md b/README.md index 27f6cf7..df21094 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Current [semantic](http://semver.org/) version: Clojure's [rich data types](http://clojure.org/datatypes) are *awesome*. And its [reader](http://clojure.org/reader) allows you to take your data just about anywhere. But the reader can be painfully slow when you've got a lot of data to crunch (like when you're serializing to a database). -Nippy is an attempt to provide a drop-in, high-performance alternative to the reader. It's a fork of [Deep-Freeze](https://github.com/halgari/deep-freeze). +Nippy is an attempt to provide a drop-in, high-performance alternative to the reader. It's a fork of [Deep-Freeze](https://github.com/halgari/deep-freeze) and is used as the [Carmine Redis client](https://github.com/ptaoussanis/carmine) serializer. ## What's In The Box? * Simple, **high-performance** all-Clojure de/serializer. @@ -19,7 +19,7 @@ Nippy is an attempt to provide a drop-in, high-performance alternative to the re ## Status [![Build Status](https://secure.travis-ci.org/ptaoussanis/nippy.png?branch=master)](http://travis-ci.org/ptaoussanis/nippy) -Nippy is relatively mature and is used as the [Carmine Redis client](https://github.com/ptaoussanis/carmine) serializer. The API is expected to remain more or less stable. To run tests against all supported Clojure versions, use: +Nippy is still currently *experimental*. It **has not yet been thoroughly tested in production** and its API is subject to change. To run tests against all supported Clojure versions, use: ```bash lein2 all test @@ -47,13 +47,71 @@ and `require` the library: ### De/Serializing -TODO +As an example of what Nippy can do, let's take a look at its own reference stress data: + +```clojure +nippy/stress-data +=> +{:bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + :nil nil + :boolean true + + :char-utf8 \ಬ + :string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" + :string-long (apply str (range 1000)) + :keyword :keyword + + :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10))) + :list-quoted '(1 2 3 4 5 (6 7 8 (9 10))) + :list-empty (list) + :vector [1 2 3 4 5 [6 7 8 [9 10]]] + :vector-empty [] + :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}} + :map-empty {} + :set #{1 2 3 4 5 #{6 7 8 #{9 10}}} + :set-empty #{} + :meta (with-meta {:a :A} {:metakey :metaval}) + :queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g)) + :queue-empty (PersistentQueue/EMPTY) + :coll (repeatedly 1000 rand) + + :byte (byte 16) + :short (short 42) + :integer (int 3) + :long (long 3) + :bigint (bigint 31415926535897932384626433832795) + + :float (float 3.14) + :double (double 3.14) + :bigdec (bigdec 3.1415926535897932384626433832795) + + :ratio 22/7} +``` + +Serialize it: + +```clojure +(def frozen-stress-data (nippy/freeze-to-bytes nippy/stress-data)) +=> # +``` + +Deserialize it: + +```clojure +(nippy/thaw-from-bytes frozen-stress-data) +=> {:bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + :nil nil + :boolean true + <...> } +``` + +Couldn't be simpler! ## Performance -TODO +![Performance comparison chart](https://github.com/ptaoussanis/nippy/raw/master/benchmarks/chart1.png) -![Performance comparison chart]() +![Data size chart](https://github.com/ptaoussanis/nippy/raw/master/benchmarks/chart2.png) [Detailed benchmark information](https://docs.google.com/spreadsheet/ccc?key=0AuSXb68FH4uhdE5kTTlocGZKSXppWG9sRzA5Y2pMVkE&pli=1#gid=0) is available on Google Docs. diff --git a/benchmarks/benchmarks.clj b/benchmarks/benchmarks.clj new file mode 100644 index 0000000..d83c1ad --- /dev/null +++ b/benchmarks/benchmarks.clj @@ -0,0 +1,48 @@ +(ns taoensso.nippy.benchmarks + {:author "Peter Taoussanis"} + (:use [taoensso.nippy :as nippy :only (freeze-to-bytes thaw-from-bytes)])) + +;; Remove stuff from stress-data that breaks reader +(def bench-data (dissoc nippy/stress-data :queue :queue-empty :bytes)) + +(defn reader-freeze [x] (binding [*print-dup* false] (pr-str x))) +(defn reader-thaw [x] (binding [*read-eval* false] (read-string x))) + +(def roundtrip (comp thaw-from-bytes freeze-to-bytes)) +(def reader-roundtrip (comp reader-thaw reader-freeze)) + +(defmacro time-requests + "Warms up, then executes given number of requests and returns total execution + times in msecs." + [num-requests & body] + `(do (dotimes [_# (int (/ ~num-requests 4))] ~@body) ; Warm-up + (let [start-time# (System/nanoTime)] + (dotimes [_# ~num-requests] ~@body) + (Math/round (/ (- (System/nanoTime) start-time#) 1000000.0))))) + +(comment + + ;;; Times + (println + "---\n" + (let [num 10000] + {:reader {:freeze (time-requests num (reader-freeze bench-data)) + :thaw (let [frozen (reader-freeze bench-data)] + (time-requests num (reader-thaw frozen))) + :round (time-requests num (reader-roundtrip bench-data))} + + :nippy {:freeze (time-requests num (freeze-to-bytes bench-data)) + :thaw (let [frozen (freeze-to-bytes bench-data)] + (time-requests num (thaw-from-bytes frozen))) + :round (time-requests num (roundtrip bench-data))}})) + + ;; Clojure 1.3.0, Nippy 0.9.0 + ;; {:reader {:freeze 23573, :thaw 31923, :round 53253}, + ;; :nippy {:freeze 3805, :thaw 3789, :round 7522}} + ;; (float (/ 53253 7522)) = 7.079633 + + ;;; Data size + (let [frozen (reader-freeze bench-data)] (count (.getBytes frozen "UTF8"))) + (let [frozen (freeze-to-bytes bench-data)] (count frozen)) + ;; 22711, 12168 + ) \ No newline at end of file diff --git a/benchmarks/chart1.png b/benchmarks/chart1.png new file mode 100644 index 0000000000000000000000000000000000000000..f32eca4eece49fc4d2e5cfd59ebe697df93798cf GIT binary patch literal 4825 zcmcJTc{tSl*T+#&Dee|}6zR5>AxT-95Rt}M3d7hkmKh;q9UnZ*Y(UFb7szUKHtyhywB^L?|0_KJzdS?N6#K* zVPQEAx~s0w!omh+VPSPYe1Pfc1crq&9ntr+4K;*>goK5KMMXu$#KgqK#V=mGc0|Ej9 z0|SGCf}TBl791QL5)u*`8X6WB_Wb$t7cX9fhlfW*M7(_YGBPqUDk|#Lt5?y{(HIN{ zi^ayo#Ncqa*x1;(xVYD^U&qJCCnO{!CMG5&B_$^(r=+B$rl!7m^XBc_w`pl$t*vcsZ5RsU0vPX-Cw_c?dj?1 z?d|RB>-(m7^4qs>{r&v|0|SGDgF{0@!^6YhzkeSY85tcN9UB`PA0MaF=@SzZlarG_ ze*Bo4nwp-To|&1Motji%UyO%gf6vD=Vw3t7~g(>+9=3fBxLq z*x20M+}hgO-rnAcXW7}=dDwEyo`vO)10Z!(L$BVM{*A}&sFa5J((f0c!6#U)R4?4( zJap#`EJSrwU!N>=Sm+b&7RPDm`x2pxLFW8*Ruxx}s$o1GtZ_j!jr;6ooW%r!yqb>L zh5l~KB9+m^(ZnzO*t17N=!^WH#eFfK#Rr{f&zw&Ush?`q`Q&xvF!Z1#8}A9yt`&mq zTJ@S;`#eBn*Rn?JS~lKYyBo+F@&B>_bNcJ%+9m_f?mYf2Tj2mJV4Vv|aeJ6oY!4m_ z-`sPI@7%_W<{H{c`-TE!VzB;LO#d&{aA-6EGzwS1*uY_oQIUh1D8f(v=*;c zMY^*8I7u??y8Mv4%ijkDhTV_zM~@w|2RI^q(F6y*WDmfAO>44iUAf# z*Y^ap#(}P6)Z276E5j0#uX$Q>5NcUL`e<1o8E?{yh1alk9XFPwPhm9A$9Z_fbWlJT z3dL4`{`*QkG&&zDr_93!7(jn?QpE5QV~gG1F76!JMEK2JX2iJ;*f5X42dxR`%yH5x zFqCt3>SqdG38sG14%=%sFB z5l~`@VcsXSK)@kllsReKTV%$|_*d6`Wrf!K_;hH>5sep(DD3XiGBF+#6a=y`EB7AO zD12$^?qZhOCE+)>e5Qx8@mTO-qecM>;%^%F-5NERZQR|xU>?X=;hhyKPaAML`2Tkd~KwaXE3C zm{`fC2W9}c1$^L3?~(Ar^t_r53>i@A!fG zT~E2dZo2;!O@DK<2WdZqRKCDCyszfIcKc%f)41CWV6IscVBN=WpY^{f1?WGMcSrJ& z0&qX%oU+WLxg}wQ8_rb{2<9}+x;&M=Il^pz!?@jBqC>ho%JZ=h(Zuh%&dGjwzb`Z= z+c3VF^ElD4w3Os&!zz7^q;FyWAiXsD%-Q`1fLU`W0`WA%SlshVDmAt`weK;~q@&7J zFRvW!{_bXJcZ>ven~2r5N4p!kauYpUo|(`^L8$euSa$3209>{(unnvwr z3ly2PNW^3x@jQAa&ToOs>+?sf(G^onIIotpGj3^h3InZr+#PXcvnu6U6yfULg zw<0a0n3ULb}(Z190^TLe zyyrh$A;!a-<3d8a2U>ytukh8m4n9!KwY6~C3S^0&q`!O~-~^9cGQ8CkEpk2n+EP$; z@f^NW3v(w4TQ0AuYhUl3+#D4InHp$7^%}R$Zd4d57h)egV%|y)cgf(mIgg7PEO*c` zOF02~gW@35lJ#%ICp=0V#18(RTxpos?Z`ek!-dBT;+YCB*Tb=TRPNx=ngDQ>ndqf=plUx1KHGb% zKx&Rdei+VVmCJleY&8wzV$Qyr8n0$GA9-ix^0-)6$%3cXS;EcTXwPF)Tken zW_)brpPJflLG(G5B)}w?QA<(!!+hdN6{Bc-2%UmNZ#M>xNxpc5Y@6gmOHxFXEIYh2 ziu(e|0a|%Sd zQkGvJ2S@~t=6fh7`;2^>-H144uC)9dDMJiXX8jKyumK?)wg^O_0*B`#H@Cz(v+irX zB7+t0G$~UN8|L#7Ju`Dd>SwrqF4f=Oh!>^|7c_{8Z(L=WqFXME{-J(eplEi@xdqQt zVpEYj%|e{u$-Qj}@)%Bw?gqLDwW6;ZaB%TKKb)azBLt%+R}sUnmQIy#TiO(b=L5TC z?Q3mLaj}a`_HK*+{G4-Kr;PXY3}0cJxU5~4$r@r#F+Bk!Of0Xz)1*yjUjf9~#$tJV zUNbsB0F%LXwlk{{>|EZHnD}^i1B^1sdet5HSOJi9+e3`|D@Q9dZ3Q*Mw|o#$5nAcf zgOZqSmuYTfGjb#GCSNgIQ;}2CFd}sh9P5AI-&KVa1nEcT?tDx+vU>ul9@|4#Ii;>+ z9nVNrA;gqyb91}E2oOi>Zv{p6Xlq)jxd22^=Fs^{aZuBW?PEeWLI8_ zc;{*#)no3Qe2M%hJ|s4wksMLfTGho%dIDd`tYT=(1q;eGYEjTNZek74`X{*`>S#Kk zcq6(WlFnrp;{KdADhfu2@>Dt7>J?>LCT)EXtI(@M-9Q}Qyr_NBsAgTSs-?vzcy`)~ z>Nh0}s>(Il+ik1kk&*?NI@(*;llgcee7WW}Ho4JpIBdCiJpBUkmTlr^!K>%IpopXW zF&+JR@Bfh>wOn&AKRa7N5Q0jL9arY*o_uX_zz9S7=xmq)CAia-#3ISe)G9@&1%mjw zV8_57d4gOgdFeb;RJzGkVtVhLrs)X>Ag`YT-Ksrz+8OZ^XOW8%P+b)FmeXE^uBGFT zE&gN69$UUvQrc?dG+&P<#>6{b2bCH=<$-Kpowqp9Wn{K$@=9OOu>)bYS}uUWKP{Pc zUknvXgGpU3rsffz5?jd;i;`1L8&N|U9~W1(xm3uDB&U@-De-MHi*Z7IHyy)v5JjC0 zXK%}kR>9!2bkA8ABZaUD?G*A&&7hG#eFi_JL_ZaYAEQ`>?3Pk@VvNm*17=})oy3TU z6Sep6^{}6L^CGnv)YkcQgrlY6bi(r>GOFeWAdj%oo?hYUnnFMgmfZ&i87K02N-lNK zYJJyJwh%w9=l-gVcKUIKTc8ZD&5R5~YDpCSAB-brq^+PI(4O})@0&dAj=k9w?_q_N1WX~plRp0HrV#>r~m2)^+eaoP9bOpmr*pI R!~DyN1*D;?o~H&2`d?E$BTN7Q literal 0 HcmV?d00001 diff --git a/benchmarks/chart2.png b/benchmarks/chart2.png new file mode 100644 index 0000000000000000000000000000000000000000..3ceabc3d9be09d4770bbff0bc892264c7c8997d9 GIT binary patch literal 4029 zcmd6qc{JPG+Q+rMs)wr6b6UOCmU9qusJX<@R*9IZh6qZO;;5-L&uXZ#s01mhN`sn% zpk`_&qC#k5s#(nQ*vL(Kdhh$kec!e2yVkwyeSd%KXZ@bvexAMeXMdl)emh!U56sTW z#mdCQ#17HCXTZdC8pgzQ%Kh9Kx`zmgjG#MS=<67%^YZfY@$vEV^8)|?0RaI)K|vuQ zAz@)*5fKqlQBg55F(42qAt7<|=1oaSNhv8QY5HS_laY~;m6er~larU1zjf=Df`Wpg zq9OIypHxJ3G6$xVXBy zK7IQ1*|TSEZf@@G?j9ZB zs;cVh>W?2k*3{J0*4Eb5)z#P6H#9UfHa0dlH@CF3eERgMwY9aat*yPiy|c5ktE;QK zySt~Sr?U3S$0sHxCMPGqe*HQ%H8njwJu@>iJ3Bi!H#a{&zp${dxVT86P?naKmY0`TR#sM5 zSJ&3o*4NiJHa0dlH@CL7wzs!;c6N4mclY-8_V@P>4h{|v508$Hj*pMOefvhGQcq4! zXf#@`&bix6Oy{>D_wE?^kXK36V8gkqottctLi#*sLo2dx4{@>y0n~viI;{C-A~(+p zrR!hRX4c@j{krS=J41oX0?EJgW;+?Ds->o+ggURB(G|AP(r(>fCvhl1kYO0P(!ah7 zQfYeFt+0S!R+-Y}iiutn?DAnd2m6cIY0irnhUK+l*n7}gJhWpq3h z`yUBFcziotAqGJdrxP!2*j_#w4|^`VF>k^w5(98}3pOBibX-8x6=XaP2{y3P%sRgxm7iuXUddKy*MRl~&n6~a zF|v>M|2Y%()6hc#U-!(WkCH(dJnoPl$mIh@c_<_(A-s&RDJgX5T+ zVLd2krLLV4vtq9|bXP(ceXDNvcSM)R#fip@oq^HUMDkZc?A%7x=0Y0fWN~gBR`g|6 zcJo1ECtXX;#C-iPy_x53J`Lsdw zMj87IFmYnePUI_aL(<@vKM1At$jOxm{mII9ee+hrX+r=!oy)F<1w$7l`BI3d5`1e< z=vJR{kT~YxTH=_*`bC&)fPihprJkn=uM3VJEYSiAkb}gUO-F8MpC82cj_3SGmP}ZH zHQ+*F4I2EXAkEQH@m5O)?@KFX*z z{5AHX$0p{l<{at1A7^-W3tKPF3HPfmb#$kNM5umY!@4|;x7?af(*SWSZM$~wI_|Yx z8YiUVOT=r+hP(M1^7tWnu^r;+vami#bZLsVfM<~=t(CNBANHcuJw=m?>VGS~uQ+TLt z<>Qo!evOVcKcX+cgIla#IE}XacJN7N$$?F?)-WDld>DtN^zNy;q^!}j-#H~=1JtS4 z>7x|iK*DnyDWc7IcfhnbVP`EY+z;o|WUx9g}n3QFZ zMQM`|#HN<0poQ=lF(tly@J-SRundBdwfhJ&zdOY9=MSQPnq0=HbC?zKQznpvjgm%l zzpW%amFp~pvU{G^F2_#fT{vWSKWDLT?KR_E{z zdbvu%>FsaKk$FpfC1$^b5VK0I@$q?pdyB^rNu>PvG6X-ZzAU}9ZWVO4=<%x4_H_tH z28!R;WL4FL(BOIWDVrr@_5P8D*+CjQLjeIvz%I50UWBpZSx^;ifur^s5cqvP67cW> z2owK7Tw0M6*oWlrQ_VRNPV99gNT|@abYA(F-o28~#tE!|hNvS(Ve z%mI+n%hZkIxv3^_vb`3xj;=cgP+^nchl-r){sNRrn#Zvm5`X!ns#VN&k1E>8T}Wi$ zd+zZYmS4*JaDy!y32(ES{6<3N#3=sHknzgCqg$K~F1AZ94s~Z72fr{Y4-7n-)JDYH zCcFtSlcH#^e_5z)^*_&ZX87`AK*vBV$JxS&J@>-_+=S?E2WVzSnCme+f<`#}W4^t! zYEqxrPn70ZWNoeW<@w+I1erJ;+u2c@Zfz|NUPUR7kDSy^bHMALj)76;!TG3!raij* z%P*Lc>qX(>YwP7}KlQ^eg!2PyOXWiv+Tf0I&7f1zCswI_n_A0j_>+V&15QZbvN-^hQFY2p@F;J^K|5liOeEF$TGJNpFNMU-kAwJv z@jX4Il!HP1xy<;$a_4V7()h~xq3UG?ODl^T7%w@;*kTLkb#1a^g|Wk2`igNXP{rz3 zeus=?pe)kWf8kDVQYtS8K-msX8xNQoi7^#(C?j?`Q4~di<#B#QE!-uA)gnc^?deDD znO+(P9xKS450oC_DI3r!#inTrJDIb(7RB(}2DZB2wDk`n62P!lZYbFdG; z->H&M|8!J!)$&Fl*H`eA@m92_CD$X1JJ)~>*6BIsIGje;N~k*|En``3kkfzGT{=~G zOP?SppPsXhGW})^XV#yQpXC)bmcDVz?0UbJOI*E~pp~})`fgZ7-GPKU} zE7QP?zNCKcN?!J|#=$A$j47VHK+N=gFdYsKd8C+rwZBMWWKx% zV_E3Wb!;_yAie;_J;;DxVbntqnQQ|mIsLrIrG&xSjqL`}2>YOi%&j!() za@9<$fwF@oSV8*6RsvIBojkO_HLIT;VU3^BDBh-lf*W$PpA#&$2x0ZQ&0J4;?t5^L`B2-u ngLh)0BYjZ+>h)pKY$@1g2z)k84YZdx8azA}2 literal 0 HcmV?d00001 diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index c8c86ef..d3e85ae 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -1 +1,283 @@ -(ns taoensso.nippy) +(ns taoensso.nippy + "Simple, high-performance Clojure serialization library. Adapted from + Deep-Freeze." + {:author "Peter Taoussanis"} + (:require [taoensso.nippy.utils :as utils]) + (:import [java.io DataInputStream DataOutputStream ByteArrayOutputStream + ByteArrayInputStream] + [org.xerial.snappy Snappy] + [clojure.lang IPersistentList IPersistentVector IPersistentMap + IPersistentSet PersistentQueue IPersistentCollection Keyword + BigInt Ratio])) + +;;;; Define type IDs + +(def ^:const schema-header "\u0000~0.9.0") + +(def ^:const id-reader (int 1)) ; Fallback: *print-dup* pr-str output +(def ^:const id-bytes (int 2)) +(def ^:const id-nil (int 3)) +(def ^:const id-boolean (int 4)) + +(def ^:const id-char (int 10)) +(def ^:const id-string (int 11)) +(def ^:const id-keyword (int 12)) + +(def ^:const id-list (int 20)) +(def ^:const id-vector (int 21)) +(def ^:const id-old-map (int 22)) ; DEPRECATED as of 0.9.0 +(def ^:const id-set (int 23)) +(def ^:const id-coll (int 24)) ; Fallback: non-specific collection +(def ^:const id-meta (int 25)) +(def ^:const id-queue (int 26)) +(def ^:const id-map (int 27)) + +(def ^:const id-byte (int 40)) +(def ^:const id-short (int 41)) +(def ^:const id-integer (int 42)) +(def ^:const id-long (int 43)) +(def ^:const id-bigint (int 44)) + +(def ^:const id-float (int 60)) +(def ^:const id-double (int 61)) +(def ^:const id-bigdec (int 62)) + +(def ^:const id-ratio (int 70)) + +;;;; Shared low-level stream stuff + +(defn- write-id! [^DataOutputStream stream ^Integer id] (.writeByte stream id)) + +(defn- write-bytes! + [^DataOutputStream stream ^bytes ba] + (let [size (alength ba)] + (.writeInt stream size) ; Encode size of byte array + (.write stream ba 0 size))) + +(defn- read-bytes! + ^bytes [^DataInputStream stream] + (let [size (.readInt stream) + ba (byte-array size)] + (.read stream ba 0 size) ba)) + +(defn- write-as-bytes! + "Write arbitrary object as bytes using reflection." + [^DataOutputStream stream obj] + (write-bytes! stream (.toByteArray obj))) + +(defn- read-biginteger! + "Wrapper around read-bytes! for common case of reading to a BigInteger. + Note that as of Clojure 1.3, java.math.BigInteger ≠ clojure.lang.BigInt." + ^BigInteger [^DataInputStream stream] + (BigInteger. (read-bytes! stream))) + +;;;; Freezing + +(defprotocol Freezable (freeze [this stream])) + +(comment (meta '^:DataOutputStream s)) + +(defmacro freezer + "Helper to extend Freezable protocol." + [type id & body] + `(extend-type ~type + ~'Freezable + (~'freeze [~'x ~(with-meta 's {:tag 'DataOutputStream})] + (write-id! ~'s ~id) + ~@body))) + +(defmacro coll-freezer + "Helper to freeze simple collection types." + [type id & body] + `(freezer + ~type ~id + (.writeInt ~'s (count ~'x)) ; Encode collection length + (doseq [i# ~'x] (freeze-to-stream!* ~'s i#)))) + +(freezer (Class/forName "[B") id-bytes (write-bytes! s x)) +(freezer nil id-nil) +(freezer Boolean id-boolean (.writeBoolean s x)) + +(freezer Character id-char (.writeChar s (int x))) +(freezer String id-string (.writeUTF s x)) +(freezer Keyword id-keyword (.writeUTF s (name x))) + +(declare freeze-to-stream!*) + +(coll-freezer IPersistentList id-list) +(coll-freezer IPersistentVector id-vector) +(freezer IPersistentMap id-map + (.writeInt s (* 2 (count x))) ; Encode num kvs + (doseq [[k v] x] + (freeze-to-stream!* s k) + (freeze-to-stream!* s v))) +(coll-freezer IPersistentSet id-set) +(coll-freezer PersistentQueue id-queue) +(coll-freezer IPersistentCollection id-coll) ; Must be LAST collection freezer! + +(freezer Byte id-byte (.writeByte s x)) +(freezer Short id-short (.writeShort s x)) +(freezer Integer id-integer (.writeInt s x)) +(freezer Long id-long (.writeLong s x)) +(freezer BigInt id-bigint (write-as-bytes! s (.toBigInteger x))) +(freezer BigInteger id-bigint (write-as-bytes! s x)) + +(freezer Float id-float (.writeFloat s x)) +(freezer Double id-double (.writeDouble s x)) +(freezer BigDecimal id-bigdec + (write-as-bytes! s (.unscaledValue x)) + (.writeInt s (.scale x))) + +(freezer Ratio id-ratio + (write-as-bytes! s (.numerator x)) + (write-as-bytes! s (.denominator x))) + +;; Use Clojure's own reader as final fallback +(freezer Object id-reader (.writeUTF s (pr-str x))) + +(defn- freeze-to-stream!* [^DataOutputStream s x] + (if-let [m (meta x)] + (do (write-id! s id-meta) + (freeze-to-stream!* s m))) + (freeze x s)) + +(defn freeze-to-stream! + "Serializes x to given output stream." + [data-output-stream x] + (binding [*print-dup* true] ; For `pr-str` + (freeze-to-stream!* data-output-stream schema-header) + (freeze-to-stream!* data-output-stream x))) + +(defn freeze-to-bytes + "Serializes x to a byte array and returns the array." + (^bytes [x] (freeze-to-bytes x true)) + (^bytes [x compress?] + (let [ba (ByteArrayOutputStream.) + stream (DataOutputStream. ba)] + (freeze-to-stream! stream x) + (let [ba (.toByteArray ba)] + (if compress? (Snappy/compress ba) ba))))) + +;;;; Thawing + +(declare thaw-from-stream!*) + +(defn coll-thaw! + "Helper to thaw simple collection types." + [^DataInputStream s] + (repeatedly (.readInt s) (partial thaw-from-stream!* s))) + +(defn- thaw-from-stream!* + [^DataInputStream s] + (let [type-id (.readByte s)] + (utils/case-eval + type-id + + id-reader (read-string (.readUTF s)) + id-bytes (read-bytes! s) + id-nil nil + id-boolean (.readBoolean s) + + id-char (.readChar s) + id-string (.readUTF s) + id-keyword (keyword (.readUTF s)) + + id-list (apply list (coll-thaw! s)) + id-vector (into [] (coll-thaw! s)) + id-set (into #{} (coll-thaw! s)) + id-map (apply hash-map (coll-thaw! s)) + id-coll (doall (coll-thaw! s)) + id-queue (into (PersistentQueue/EMPTY) (coll-thaw! s)) + + ;; DEPRECATED as of 0.9.0 + id-old-map (apply hash-map (repeatedly (* 2 (.readInt s)) + (partial thaw-from-stream!* s))) + + id-meta (let [m (thaw-from-stream!* s)] (with-meta (thaw-from-stream!* s) m)) + + id-byte (.readByte s) + id-short (.readShort s) + id-integer (.readInt s) + id-long (.readLong s) + id-bigint (bigint (read-biginteger! s)) + + id-float (.readFloat s) + id-double (.readDouble s) + id-bigdec (BigDecimal. (read-biginteger! s) (.readInt s)) + + id-ratio (/ (bigint (read-biginteger! s)) + (bigint (read-biginteger! s))) + + (throw (Exception. (str "Failed to thaw unknown type ID: " type-id)))))) + +;; TODO Scheduled for Carmine version 1.0.0 +;; (defn thaw-from-stream! +;; "Deserializes an object from given input stream." +;; [data-input-stream] +;; (binding [*read-eval* false] ; For `read-string` injection safety - NB!!! +;; (let [schema-header (thaw-from-stream!* data-input-stream)] +;; (thaw-from-stream!* data-input-stream)))) + +;; DEPRECATED: Includes temporary support for older versions of serialization +;; schema that didn't include a version header. This is for people that used +;; Carmine < 0.8.3 and haven't yet migrated their databases. +(defn thaw-from-stream! + "Deserializes an object from given input stream." + [data-input-stream] + (binding [*read-eval* false] ; For `read-string` injection safety - NB!!! + (let [maybe-schema-header (thaw-from-stream!* data-input-stream)] + (if (and (string? maybe-schema-header) + (.startsWith ^String maybe-schema-header "\u0000~")) + (thaw-from-stream!* data-input-stream) + maybe-schema-header)))) + +(defn thaw-from-bytes + "Deserializes an object from given byte array." + ([ba] (thaw-from-bytes ba true)) + ([ba compressed?] + (->> (if compressed? (Snappy/uncompress ba) ba) + (ByteArrayInputStream.) + (DataInputStream.) + (thaw-from-stream!)))) + +(def stress-data + "Reference data used for tests & benchmarks." + {;; Breaks reader, roundtrip equality + :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) + + :nil nil + :boolean true + + :char-utf8 \ಬ + :string-utf8 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ" + :string-long (apply str (range 1000)) + :keyword :keyword + + :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10))) + :list-quoted '(1 2 3 4 5 (6 7 8 (9 10))) + :list-empty (list) + :vector [1 2 3 4 5 [6 7 8 [9 10]]] + :vector-empty [] + :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}} + :map-empty {} + :set #{1 2 3 4 5 #{6 7 8 #{9 10}}} + :set-empty #{} + :meta (with-meta {:a :A} {:metakey :metaval}) + + ;; Breaks reader + :queue (-> (PersistentQueue/EMPTY) (conj :a :b :c :d :e :f :g)) + :queue-empty (PersistentQueue/EMPTY) + + :coll (repeatedly 1000 rand) + + :byte (byte 16) + :short (short 42) + :integer (int 3) + :long (long 3) + :bigint (bigint 31415926535897932384626433832795) + + :float (float 3.14) + :double (double 3.14) + :bigdec (bigdec 3.1415926535897932384626433832795) + + :ratio 22/7}) \ No newline at end of file diff --git a/src/taoensso/nippy/utils.clj b/src/taoensso/nippy/utils.clj new file mode 100644 index 0000000..6668f1c --- /dev/null +++ b/src/taoensso/nippy/utils.clj @@ -0,0 +1,13 @@ +(ns taoensso.nippy.utils + {:author "Peter Taoussanis"}) + +(defmacro case-eval + "Like `case` but evaluates test constants for their compile-time value." + [e & clauses] + (let [;; Don't evaluate default expression! + default (when (odd? (count clauses)) (last clauses)) + clauses (if default (butlast clauses) clauses)] + `(case ~e + ~@(map-indexed (fn [i# form#] (if (even? i#) (eval form#) form#)) + clauses) + ~(when default default)))) \ No newline at end of file diff --git a/test/test_nippy/main.clj b/test/test_nippy/main.clj index d3fbca6..ec4e41a 100644 --- a/test/test_nippy/main.clj +++ b/test/test_nippy/main.clj @@ -1,4 +1,10 @@ (ns test-nippy.main - (:use [clojure.test])) + (:use [clojure.test]) + (:require [taoensso.nippy :as nippy])) -(deftest test-nothing) +;; Remove stuff from stress-data that breaks roundtrip equality +(def test-data (dissoc nippy/stress-data :bytes)) + +(def roundtrip (comp nippy/thaw-from-bytes nippy/freeze-to-bytes)) + +(deftest test-roundtrip (is (= test-data (roundtrip test-data)))) \ No newline at end of file