From c26ae0835d7bef1b8a06a126abdcbfc2499d20e3 Mon Sep 17 00:00:00 2001 From: Juho Teperi Date: Thu, 8 Oct 2015 22:53:23 +0300 Subject: [PATCH] Optimize from-db-object performance for DBObjects Creating temporary sequence from DBObject is quite slow. Using .keySet for reduce collection and calling .get inside reduce is considerably faster. The common code between Map and DBObject implementations can't be shared as reflection would completely kill the performance and function can't be type hinted as DBObject doesn't implement Map interface. Added a simple test case for the from-db-object performance. I'm seeing performance increase of 20% on this (170ms -> 140ms). --- src/clojure/monger/conversion.clj | 34 +++++++++++++++---------------- test/monger/test/stress_test.clj | 11 ++++++++-- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/clojure/monger/conversion.clj b/src/clojure/monger/conversion.clj index 2185f44..3d6f6f9 100644 --- a/src/clojure/monger/conversion.clj +++ b/src/clojure/monger/conversion.clj @@ -105,7 +105,6 @@ -(declare associate-pairs) (defprotocol ConvertFromDBObject (from-db-object [input keywordize] "Converts given DBObject instance to a piece of Clojure data")) @@ -118,7 +117,12 @@ Map (from-db-object [^Map input keywordize] - (associate-pairs (.entrySet input) keywordize)) + (reduce (if keywordize + (fn [m ^String k] + (assoc m (keyword k) (from-db-object (.get input k) true))) + (fn [m ^String k] + (assoc m k (from-db-object (.get input k) false)))) + {} (.keySet input))) List (from-db-object [^List input keywordize] @@ -136,22 +140,16 @@ (from-db-object [^DBObject input keywordize] ;; DBObject provides .toMap, but the implementation in ;; subclass GridFSFile unhelpfully throws - ;; UnsupportedOperationException. This part is taken from congomongo and - ;; may need revisiting at a later point. MK. - (associate-pairs (for [key-set (.keySet input)] [key-set (.get input key-set)]) - keywordize))) - - -(defn- associate-pairs [pairs keywordize] - ;; Taking the keywordize test out of the fn reduces derefs - ;; dramatically, which was the main barrier to matching pure-Java - ;; performance for this marshalling. Taken from congomongo. MK. - (reduce (if keywordize - (fn [m [^String k v]] - (assoc m (keyword k) (from-db-object v true))) - (fn [m [^String k v]] - (assoc m k (from-db-object v false)))) - {} (reverse pairs))) + ;; UnsupportedOperationException. + ;; This is the same code as with Map. The code can't be shared using a + ;; function because reflection would kill the performance and DBObject + ;; and Map don't share a interface. + (reduce (if keywordize + (fn [m ^String k] + (assoc m (keyword k) (from-db-object (.get input k) true))) + (fn [m ^String k] + (assoc m k (from-db-object (.get input k) false)))) + {} (.keySet input)))) diff --git a/test/monger/test/stress_test.clj b/test/monger/test/stress_test.clj index 131f48b..ea1a06f 100644 --- a/test/monger/test/stress_test.clj +++ b/test/monger/test/stress_test.clj @@ -1,7 +1,7 @@ (ns monger.test.stress-test (:require [monger.core :as mg] [monger.collection :as mc] - [monger.conversion :refer [to-db-object]] + [monger.conversion :refer [to-db-object from-db-object]] [clojure.test :refer :all]) (:import [com.mongodb WriteConcern] java.util.Date)) @@ -30,4 +30,11 @@ (mc/remove db collection) (println "Inserting " n " documents...") (time (mc/insert-batch db collection docs)) - (is (= n (mc/count db collection))))))) + (is (= n (mc/count db collection)))))) + + (deftest ^{:performance true} convert-large-number-of-dbojects-to-maps + (doseq [n [10 100 1000 20000 40000]] + (let [docs (map (fn [i] + (to-db-object {:title "Untitled" :created-at (Date.) :number i})) + (take n (iterate inc 1)))] + (time (doall (map (fn [x] (from-db-object x true)) docs)))))))