From 0b4d1d2ee1fee4f415f17cd83edd584fbd75d478 Mon Sep 17 00:00:00 2001 From: Tommi Reiman Date: Wed, 1 Aug 2018 18:05:14 +0300 Subject: [PATCH 1/9] url-encode & url-decode --- modules/reitit-core/src/reitit/impl.cljc | 39 ++++++-- perf-test/clj/reitit/impl_perf_test.clj | 116 +++++++++++++++++++++++ test/cljc/reitit/impl_test.cljc | 93 ++++++++++++++++++ 3 files changed, 241 insertions(+), 7 deletions(-) create mode 100644 perf-test/clj/reitit/impl_perf_test.clj diff --git a/modules/reitit-core/src/reitit/impl.cljc b/modules/reitit-core/src/reitit/impl.cljc index d5a41969..2d74dce2 100644 --- a/modules/reitit-core/src/reitit/impl.cljc +++ b/modules/reitit-core/src/reitit/impl.cljc @@ -166,15 +166,40 @@ ;; Path-parameters, see https://github.com/metosin/reitit/issues/75 ;; +#?(:clj + (def hex-digit + {0 "0" 1 "1" 2 "2" 3 "3" + 4 "4" 5 "5" 6 "6" 7 "7" + 8 "8" 9 "9" 10 "A" 11 "B" + 12 "C" 13 "D" 14 "E" 15 "F"})) + +#?(:clj + (defn byte->percent [byte] + (let [byte (bit-and 0xFF byte) + low-nibble (bit-and 0xF byte) + high-nibble (bit-shift-right byte 4)] + (str "%" (hex-digit high-nibble) (hex-digit low-nibble))))) + +#?(:clj + (defn percent-encode [^String unencoded] + (->> (.getBytes unencoded "UTF-8") (map byte->percent) (str/join)))) + +;; + is safe, but removed so it would work the same as with js (defn url-encode [s] - (some-> s - #?(:clj (URLEncoder/encode "UTF-8") - :cljs (js/encodeURIComponent)) - #?(:clj (.replace "+" "%20")))) + (if s + #?(:clj (str/replace s #"[^A-Za-z0-9\!'\(\)\*_~.-]+" percent-encode) + :cljs (js/encodeURIComponent s)))) (defn url-decode [s] - (some-> s #?(:clj (URLDecoder/decode "UTF-8") - :cljs (js/decodeURIComponent)))) + (if s + #?(:clj (if (.contains ^String s "%") + (URLDecoder/decode + (if (.contains ^String s "+") + (.replace ^String s "+" "%2B") + s) + "UTF-8") + s) + :cljs (js/decodeURIComponent s)))) (defprotocol IntoString (into-string [_])) @@ -203,7 +228,7 @@ (into-string [this] (str this)) nil - (into-string [this])) + (into-string [_])) (defn path-params "shallow transform of the path parameters values into strings" diff --git a/perf-test/clj/reitit/impl_perf_test.clj b/perf-test/clj/reitit/impl_perf_test.clj new file mode 100644 index 00000000..20b28d6c --- /dev/null +++ b/perf-test/clj/reitit/impl_perf_test.clj @@ -0,0 +1,116 @@ +(ns reitit.impl-perf-test + (:require [criterium.core :as cc] + [reitit.perf-utils :refer :all] + [ring.util.codec] + [reitit.impl]) + (:import (java.net URLDecoder URLEncoder))) + +;; +;; start repl with `lein perf repl` +;; perf measured with the following setup: +;; +;; Model Name: MacBook Pro +;; Model Identifier: MacBookPro11,3 +;; Processor Name: Intel Core i7 +;; Processor Speed: 2,5 GHz +;; Number of Processors: 1 +;; Total Number of Cores: 4 +;; L2 Cache (per Core): 256 KB +;; L3 Cache: 6 MB +;; Memory: 16 GB +;; + + +(defn test! [f input] + (do + (println "\u001B[33m") + (println (pr-str input) "=>" (pr-str (f input))) + (println "\u001B[0m") + (cc/quick-bench (f input)))) + +(defn url-decode-naive [s] + (URLDecoder/decode + (.replace ^String s "+" "%2B") + "UTF-8")) + +(defn decode! [] + + ;; ring + + ;; 890ns + ;; 190ns + ;; 90ns + ;; 80ns + + ;; naive + + ;; 750ns + ;; 340ns + ;; 420ns + ;; 200ns + + ;; reitit + + ;; 630ns (-29%) + ;; 12ns (-94%) + ;; 8ns (-91%) + ;; 8ns (-90%) + + (doseq [fs ['ring.util.codec/url-decode + 'url-decode-naive + 'reitit.impl/url-decode] + :let [f (deref (resolve fs))]] + (suite (str fs)) + (doseq [s ["aja%20hiljaa+sillalla" + "aja_hiljaa_sillalla" + "1+1" + "1"]] + (test! f s)))) + +(defn url-encode-naive [^String s] + (cond-> (.replace (URLEncoder/encode s "UTF-8") "+" "%20") + (.contains s "+") (.replace "%2B" "+") + (.contains s "~") (.replace "%7E" "~") + (.contains s "=") (.replace "%3D" "=") + (.contains s "!") (.replace "%21" "!") + (.contains s "'") (.replace "%27" "'") + (.contains s "(") (.replace "%28" "(") + (.contains s ")") (.replace "%29" ")"))) + +(defn encode! [] + + ;; ring + + ;; 2500ns + ;; 610ns + ;; 160ns + ;; 120ns + + ;; naive + + ;; 1000ns + ;; 440ns + ;; 570ns + ;; 200ns + + ;; reitit + + ;; 1400ns + ;; 740ns + ;; 180ns + ;; 130ns + + (doseq [fs ['ring.util.codec/url-encode + 'url-encode-naive + 'reitit.impl/url-encode] + :let [f (deref (resolve fs))]] + (suite (str fs)) + (doseq [s ["aja hiljaa+sillalla" + "aja_hiljaa_sillalla" + "1+1" + "1"]] + (test! f s)))) + +(comment + (decode!) + (encode!)) diff --git a/test/cljc/reitit/impl_test.cljc b/test/cljc/reitit/impl_test.cljc index 1d437ed8..782a3435 100644 --- a/test/cljc/reitit/impl_test.cljc +++ b/test/cljc/reitit/impl_test.cljc @@ -64,3 +64,96 @@ ;{:a ["c" "b"]} "a=c&a=b" ;{:a (seq [1 2])} "a=1&a=2" ;{:a #{"c" "b"}} "a=b&a=c" + +(deftest url-encode-test + (are [in out] + (= out (impl/url-encode in)) + + "/" "%2F" + "?" "%3F" + "#" "%23" + "[" "%5B" + "]" "%5D" + "!" "!" + #_#_"$" "$" + #_#_"&" "&" + "'" "'" + "(" "(" + ")" ")" + "*" "*" + #_#_"+" "+" + #_#_"," "," + #_#_";" ";" + #_#_"=" "=" + #_#_":" ":" + #_#_"@" "@" + "a" "a" + "z" "z" + "A" "A" + "Z" "Z" + "0" "0" + "9" "9" + "-" "-" + "." "." + "_" "_" + "~" "~" + "\000" "%00" + "\037" "%1F" + " " "%20" + "\"" "%22" + "%" "%25" + "<" "%3C" + ">" "%3E" + "\\" "%5C" + "^" "%5E" + "`" "%60" + "{" "%7B" + "|" "%7C" + "}" "%7D" + "\177" "%7F" + #_#_"\377" "%FF" + + "£0.25" "%C2%A30.25" + "€100" "%E2%82%AC100" + "«küßî»" "%C2%ABk%C3%BC%C3%9F%C3%AE%C2%BB" + "“ЌύБЇ”" "%E2%80%9C%D0%8C%CF%8D%D0%91%D0%87%E2%80%9D" + + "\000" "%00" + #_#_"\231" "%99" + #_#_"\252" "%AA" + #_#_"\377" "%FF" + + "" "" + "1" "1" + "12" "12" + "123" "123" + "1234567890" "1234567890" + + "Hello world" "Hello%20world" + "/home/foo" "%2Fhome%2Ffoo" + + " " "%20" + "+" "%2B" #_"+" + " +" "%20%2B" #_"%20+" + #_#_"1+2=3" "1+2=3" + #_#_"1 + 2 = 3" "1%20+%202%20=%203")) + +(deftest url-decode-test + (are [in out] + (= out (impl/url-decode in)) + + "1+1" "1+1" + "%21" "!" + "%61" "a" + "%31%32%33" "123" + "%2b" "+" + "%7e" "~" + "hello%20world" "hello world" + "a%2fb" "a/b" + "a/.." "a/.." + "a/." "a/." + "//a" "//a" + "a//b" "a//b" + "a//" "a//" + "/path/%C2%ABk%C3%BC%C3%9F%C3%AE%C2%BB" "/path/«küßî»" + "/path/%E2%80%9C%D0%8C%CF%8D%D0%91%D0%87%E2%80%9D" "/path/“ЌύБЇ”")) From 303b12497312fd1818e5727f1e4e32d5aa89dd51 Mon Sep 17 00:00:00 2001 From: Tommi Reiman Date: Wed, 1 Aug 2018 18:56:58 +0300 Subject: [PATCH 2/9] format --- modules/reitit-core/src/reitit/impl.cljc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/reitit-core/src/reitit/impl.cljc b/modules/reitit-core/src/reitit/impl.cljc index 2d74dce2..0ade6c7e 100644 --- a/modules/reitit-core/src/reitit/impl.cljc +++ b/modules/reitit-core/src/reitit/impl.cljc @@ -256,7 +256,7 @@ (goog/inherits ~type ~base-type) ~@(map - (fn [method] - `(set! (.. ~type -prototype ~(symbol (str "-" (first method)))) - (fn ~@(rest method)))) - methods))) + (fn [method] + `(set! (.. ~type -prototype ~(symbol (str "-" (first method)))) + (fn ~@(rest method)))) + methods))) From 6c23a5562a1fa3dbb4d5dd801b3a615f077c108c Mon Sep 17 00:00:00 2001 From: Tommi Reiman Date: Wed, 1 Aug 2018 18:57:17 +0300 Subject: [PATCH 3/9] form-encode & form-decode --- modules/reitit-core/src/reitit/impl.cljc | 17 +++++++++++++++++ test/cljc/reitit/impl_test.cljc | 12 ++++++++++++ 2 files changed, 29 insertions(+) diff --git a/modules/reitit-core/src/reitit/impl.cljc b/modules/reitit-core/src/reitit/impl.cljc index 0ade6c7e..e4426600 100644 --- a/modules/reitit-core/src/reitit/impl.cljc +++ b/modules/reitit-core/src/reitit/impl.cljc @@ -201,6 +201,23 @@ s) :cljs (js/decodeURIComponent s)))) +(defn form-encode [s] + (if s + #?(:clj (-> s + (str/replace #"[^A-Za-z0-9\!'\(\)\*_~.-\\\ ]+" percent-encode) + (^String .replace " " "+")) + :cljs (str/replace (js/encodeURIComponent s) "%20" "+")))) + +(defn form-decode [s] + (if s + #?(:clj (let [s (if (.contains ^String s "+") + (.replace ^String s "+" " ") + s)] + (if (.contains ^String s "%") + (URLDecoder/decode s "UTF-8") + s)) + :cljs (js/decodeURIComponent (str/replace s "+" " "))))) + (defprotocol IntoString (into-string [_])) diff --git a/test/cljc/reitit/impl_test.cljc b/test/cljc/reitit/impl_test.cljc index 782a3435..85526eb4 100644 --- a/test/cljc/reitit/impl_test.cljc +++ b/test/cljc/reitit/impl_test.cljc @@ -157,3 +157,15 @@ "a//" "a//" "/path/%C2%ABk%C3%BC%C3%9F%C3%AE%C2%BB" "/path/«küßî»" "/path/%E2%80%9C%D0%8C%CF%8D%D0%91%D0%87%E2%80%9D" "/path/“ЌύБЇ”")) + +(deftest form-encode-test + (are [in out] + (= out (impl/form-encode in)) + + "+632 905 123 4567" "%2B632+905+123+4567")) + +(deftest form-decode-test + (are [in out] + (= out (impl/form-decode in)) + + "%2B632+905+123+4567" "+632 905 123 4567")) From 682dd0556874c0846849055ca8c5d14c5ffab66c Mon Sep 17 00:00:00 2001 From: Tommi Reiman Date: Wed, 1 Aug 2018 19:28:52 +0300 Subject: [PATCH 4/9] tune perf --- modules/reitit-core/src/reitit/impl.cljc | 13 ++--- perf-test/clj/reitit/impl_perf_test.clj | 64 ++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/modules/reitit-core/src/reitit/impl.cljc b/modules/reitit-core/src/reitit/impl.cljc index e4426600..ccad2761 100644 --- a/modules/reitit-core/src/reitit/impl.cljc +++ b/modules/reitit-core/src/reitit/impl.cljc @@ -203,19 +203,14 @@ (defn form-encode [s] (if s - #?(:clj (-> s - (str/replace #"[^A-Za-z0-9\!'\(\)\*_~.-\\\ ]+" percent-encode) - (^String .replace " " "+")) + #?(:clj (URLEncoder/encode ^String s "UTF-8") :cljs (str/replace (js/encodeURIComponent s) "%20" "+")))) (defn form-decode [s] (if s - #?(:clj (let [s (if (.contains ^String s "+") - (.replace ^String s "+" " ") - s)] - (if (.contains ^String s "%") - (URLDecoder/decode s "UTF-8") - s)) + #?(:clj (if (or (.contains ^String s "%") (.contains ^String s "+")) + (URLDecoder/decode ^String s "UTF-8") + s) :cljs (js/decodeURIComponent (str/replace s "+" " "))))) (defprotocol IntoString diff --git a/perf-test/clj/reitit/impl_perf_test.clj b/perf-test/clj/reitit/impl_perf_test.clj index 20b28d6c..65f54b04 100644 --- a/perf-test/clj/reitit/impl_perf_test.clj +++ b/perf-test/clj/reitit/impl_perf_test.clj @@ -33,7 +33,7 @@ (.replace ^String s "+" "%2B") "UTF-8")) -(defn decode! [] +(defn url-decode! [] ;; ring @@ -77,7 +77,7 @@ (.contains s "(") (.replace "%28" "(") (.contains s ")") (.replace "%29" ")"))) -(defn encode! [] +(defn url-encode! [] ;; ring @@ -111,6 +111,62 @@ "1"]] (test! f s)))) +(defn form-decode! [] + + ;; ring + + ;; 280ns + ;; 130ns + ;; 43ns + ;; 25ns + + ;; reitit + + ;; 270ns (-4%) + ;; 20ns (-84%) + ;; 47ns (+8%) + ;; 12ns (-52%) + + (doseq [fs ['ring.util.codec/form-decode-str + 'reitit.impl/form-decode] + :let [f (deref (resolve fs))]] + (suite (str fs)) + (doseq [s ["%2Baja%20hiljaa+sillalla" + "aja_hiljaa_sillalla" + "1+1" + "1"]] + (test! f s)))) + +(defn form-encode! [] + + ;; ring + + ;; 240ns + ;; 120ns + ;; 130ns + ;; 31ns + + ;; reitit + + ;; 210ns + ;; 120ns + ;; 130ns + ;; 30ns + + (doseq [fs ['ring.util.codec/form-encode + 'reitit.impl/form-encode] + :let [f (deref (resolve fs))]] + (suite (str fs)) + (doseq [s ["aja hiljaa+sillalla" + "aja_hiljaa_sillalla" + "1+1" + "1"]] + (test! f s)))) + (comment - (decode!) - (encode!)) + (url-decode!) + (url-encode!) + (form-decode!) + (form-encode!)) + +(ring.util.codec/form-decode-str "%2B632+905+123+4567") From 077a1887cb7162e397568f062160d428cccf2c09 Mon Sep 17 00:00:00 2001 From: Tommi Reiman Date: Wed, 1 Aug 2018 19:38:16 +0300 Subject: [PATCH 5/9] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 560e5910..377db133 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * **BREAKING**: the router option key to extract body format has been renamed: `:extract-request-format` => `:reitit.coercion/extract-request-format` * should only concern you if you are not using [Muuntaja](https://github.com/metosin/muuntaja). * the `r/routes` returns just the path + data tuples as documented, not the compiled route results. To get the compiled results, use `r/compiled-routes` instead. +* new [faster](https://github.com/metosin/reitit/blob/master/perf-test/clj/reitit/impl_perf_test.clj) and more correct encoders and decoders for query & path params. * welcome route name conflict resolution! If router has routes with same names, router can't be created. fix 'em. * sequential child routes are allowed, enabling this: From bf3fb64088c08542552586f2b864522d8b410bda Mon Sep 17 00:00:00 2001 From: Tommi Reiman Date: Wed, 1 Aug 2018 19:43:58 +0300 Subject: [PATCH 6/9] query is a form-param --- modules/reitit-core/src/reitit/impl.cljc | 4 ++-- test/cljc/reitit/impl_test.cljc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/reitit-core/src/reitit/impl.cljc b/modules/reitit-core/src/reitit/impl.cljc index ccad2761..58fb67e6 100644 --- a/modules/reitit-core/src/reitit/impl.cljc +++ b/modules/reitit-core/src/reitit/impl.cljc @@ -256,9 +256,9 @@ [params] (->> params (map (fn [[k v]] - (str (url-encode (into-string k)) + (str (form-encode (into-string k)) "=" - (url-encode (into-string v))))) + (form-encode (into-string v))))) (str/join "&"))) (defmacro goog-extend [type base-type ctor & methods] diff --git a/test/cljc/reitit/impl_test.cljc b/test/cljc/reitit/impl_test.cljc index 85526eb4..1842d358 100644 --- a/test/cljc/reitit/impl_test.cljc +++ b/test/cljc/reitit/impl_test.cljc @@ -57,7 +57,7 @@ {:a 1} "a=1" {:a nil} "a=" {:a :b :c "d"} "a=b&c=d" - {:a "b c"} "a=b%20c")) + {:a "b c"} "a=b+c")) ; TODO: support seq values? ;{:a ["b" "c"]} "a=b&a=c" From ebecb7ee12ffa38f2b1f0f77b2f3d61379a90e25 Mon Sep 17 00:00:00 2001 From: Tommi Reiman Date: Wed, 1 Aug 2018 19:46:47 +0300 Subject: [PATCH 7/9] update readme --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 377db133..e00a3410 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * should only concern you if you are not using [Muuntaja](https://github.com/metosin/muuntaja). * the `r/routes` returns just the path + data tuples as documented, not the compiled route results. To get the compiled results, use `r/compiled-routes` instead. * new [faster](https://github.com/metosin/reitit/blob/master/perf-test/clj/reitit/impl_perf_test.clj) and more correct encoders and decoders for query & path params. + * query-parameters are encoded with `reitit.impl/form-encode`, so spaces are `+` instead of `%20`. * welcome route name conflict resolution! If router has routes with same names, router can't be created. fix 'em. * sequential child routes are allowed, enabling this: From 7389838b5953f774b424b8b31100619b5dd26d5a Mon Sep 17 00:00:00 2001 From: Tommi Reiman Date: Wed, 1 Aug 2018 19:51:54 +0300 Subject: [PATCH 8/9] mention sources --- modules/reitit-core/src/reitit/impl.cljc | 6 +++++- test/cljc/reitit/impl_test.cljc | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/reitit-core/src/reitit/impl.cljc b/modules/reitit-core/src/reitit/impl.cljc index 58fb67e6..35a13737 100644 --- a/modules/reitit-core/src/reitit/impl.cljc +++ b/modules/reitit-core/src/reitit/impl.cljc @@ -163,7 +163,7 @@ (->> m (remove (comp nil? second)) (into {}))) ;; -;; Path-parameters, see https://github.com/metosin/reitit/issues/75 +;; Parts (c) https://github.com/lambdaisland/uri/tree/master/src/lambdaisland/uri ;; #?(:clj @@ -184,6 +184,10 @@ (defn percent-encode [^String unencoded] (->> (.getBytes unencoded "UTF-8") (map byte->percent) (str/join)))) +;; +;; encoding & decoding +;; + ;; + is safe, but removed so it would work the same as with js (defn url-encode [s] (if s diff --git a/test/cljc/reitit/impl_test.cljc b/test/cljc/reitit/impl_test.cljc index 1842d358..d7286d60 100644 --- a/test/cljc/reitit/impl_test.cljc +++ b/test/cljc/reitit/impl_test.cljc @@ -65,6 +65,8 @@ ;{:a (seq [1 2])} "a=1&a=2" ;{:a #{"c" "b"}} "a=b&a=c" +;; test from https://github.com/playframework/playframework -> UriEncodingSpec.scala + (deftest url-encode-test (are [in out] (= out (impl/url-encode in)) From 230717ba65a537ed8cf60ad27bcc1ff905b85a46 Mon Sep 17 00:00:00 2001 From: Tommi Reiman Date: Thu, 2 Aug 2018 16:06:34 +0300 Subject: [PATCH 9/9] double fast byte formatting --- modules/reitit-core/src/reitit/impl.cljc | 24 +++++------------------- perf-test/clj/reitit/impl_perf_test.clj | 2 -- 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/modules/reitit-core/src/reitit/impl.cljc b/modules/reitit-core/src/reitit/impl.cljc index 35a13737..27f9fa1e 100644 --- a/modules/reitit-core/src/reitit/impl.cljc +++ b/modules/reitit-core/src/reitit/impl.cljc @@ -162,27 +162,13 @@ (defn strip-nils [m] (->> m (remove (comp nil? second)) (into {}))) -;; -;; Parts (c) https://github.com/lambdaisland/uri/tree/master/src/lambdaisland/uri -;; +#?(:clj (def +percents+ (into [] (map #(format "%%%02X" %) (range 0 256))))) -#?(:clj - (def hex-digit - {0 "0" 1 "1" 2 "2" 3 "3" - 4 "4" 5 "5" 6 "6" 7 "7" - 8 "8" 9 "9" 10 "A" 11 "B" - 12 "C" 13 "D" 14 "E" 15 "F"})) +#?(:clj (defn byte->percent [byte] + (nth +percents+ (if (< byte 0) (+ 256 byte) byte)))) -#?(:clj - (defn byte->percent [byte] - (let [byte (bit-and 0xFF byte) - low-nibble (bit-and 0xF byte) - high-nibble (bit-shift-right byte 4)] - (str "%" (hex-digit high-nibble) (hex-digit low-nibble))))) - -#?(:clj - (defn percent-encode [^String unencoded] - (->> (.getBytes unencoded "UTF-8") (map byte->percent) (str/join)))) +#?(:clj (defn percent-encode [^String s] + (->> (.getBytes s "UTF-8") (map byte->percent) (str/join)))) ;; ;; encoding & decoding diff --git a/perf-test/clj/reitit/impl_perf_test.clj b/perf-test/clj/reitit/impl_perf_test.clj index 65f54b04..1add9f05 100644 --- a/perf-test/clj/reitit/impl_perf_test.clj +++ b/perf-test/clj/reitit/impl_perf_test.clj @@ -168,5 +168,3 @@ (url-encode!) (form-decode!) (form-encode!)) - -(ring.util.codec/form-decode-str "%2B632+905+123+4567")