From 780e06c69306c4f29e1761df63422f0ef1a591aa Mon Sep 17 00:00:00 2001 From: Peter Taoussanis Date: Sat, 21 Jul 2012 01:47:54 +0700 Subject: [PATCH] Support strings (incl. reader strings) of length > 64K. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Luc Préfontaine kindly pointed out that `.writeUTF` and `.readUTF` have a maximum length of 64k. This commit switches to a custom integer-length prefix for strings and reader types to boost their max length. Keywords are unchanged. Old string and reader type IDs are now deprecated but will continue to be supported for thawing. --- src/taoensso/nippy.clj | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/taoensso/nippy.clj b/src/taoensso/nippy.clj index 952528d..4bbff16 100644 --- a/src/taoensso/nippy.clj +++ b/src/taoensso/nippy.clj @@ -14,18 +14,20 @@ (def ^:const schema-header "\u0000~0.9.0") -(def ^:const id-reader (int 1)) ; Fallback: *print-dup* pr-str output +;; 1 (def ^:const id-bytes (int 2)) (def ^:const id-nil (int 3)) (def ^:const id-boolean (int 4)) +(def ^:const id-reader (int 5)) ; Fallback: *print-dup* pr-str output (def ^:const id-char (int 10)) -(def ^:const id-string (int 11)) +;; 11 (def ^:const id-keyword (int 12)) +(def ^:const id-string (int 13)) (def ^:const id-list (int 20)) (def ^:const id-vector (int 21)) -(def ^:const id-old-map (int 22)) ; DEPRECATED as of 0.9.0 +;; 22 (def ^:const id-set (int 23)) (def ^:const id-coll (int 24)) ; Fallback: non-specific collection (def ^:const id-meta (int 25)) @@ -44,6 +46,11 @@ (def ^:const id-ratio (int 70)) +;;; DEPRECATED +(def ^:const id-old-reader (int 1)) ; as of 0.9.1, for +64k support +(def ^:const id-old-string (int 11)) ; as of 0.9.1, for +64k support +(def ^:const id-old-map (int 22)) ; as of 0.9.0, for more efficient thaw + ;;;; Shared low-level stream stuff (defn- write-id! [^DataOutputStream stream ^Integer id] (.writeByte stream id)) @@ -101,7 +108,7 @@ (freezer Boolean id-boolean (.writeBoolean s x)) (freezer Character id-char (.writeChar s (int x))) -(freezer String id-string (.writeUTF s x)) +(freezer String id-string (write-bytes! s (.getBytes x "UTF-8"))) (freezer Keyword id-keyword (.writeUTF s (if-let [ns (namespace x)] (str ns "/" (name x)) (name x)))) @@ -137,7 +144,7 @@ (write-biginteger! s (.denominator x))) ;; Use Clojure's own reader as final fallback -(freezer Object id-reader (.writeUTF s (pr-str x))) +(freezer Object id-reader (write-bytes! s (.getBytes (pr-str x) "UTF-8"))) (defn- freeze-to-stream!* [^DataOutputStream s x] (if-let [m (meta x)] @@ -177,13 +184,13 @@ (utils/case-eval type-id - id-reader (read-string (.readUTF s)) + id-reader (read-string (String. (read-bytes! s) "UTF-8")) id-bytes (read-bytes! s) id-nil nil id-boolean (.readBoolean s) id-char (.readChar s) - id-string (.readUTF s) + id-string (String. (read-bytes! s) "UTF-8") id-keyword (keyword (.readUTF s)) id-list (apply list (coll-thaw! s)) @@ -193,10 +200,6 @@ id-coll (doall (coll-thaw! s)) id-queue (into (PersistentQueue/EMPTY) (coll-thaw! s)) - ;; DEPRECATED as of 0.9.0 - id-old-map (apply hash-map (repeatedly (* 2 (.readInt s)) - (partial thaw-from-stream!* s))) - id-meta (let [m (thaw-from-stream!* s)] (with-meta (thaw-from-stream!* s) m)) id-byte (.readByte s) @@ -212,6 +215,12 @@ id-ratio (/ (bigint (read-biginteger! s)) (bigint (read-biginteger! s))) + ;;; DEPRECATED + id-old-reader (read-string (.readUTF s)) + id-old-string (.readUTF s) + id-old-map (apply hash-map (repeatedly (* 2 (.readInt s)) + (partial thaw-from-stream!* s))) + (throw (Exception. (str "Failed to thaw unknown type ID: " type-id)))))) ;; TODO Scheduled for Carmine version 1.0.0