Optimize from-db-object performance for DBObjects

Creating temporary sequence from DBObject is quite slow. Using
.keySet for reduce collection and calling .get inside reduce is
considerably faster. The common code between Map and DBObject
implementations can't be shared as reflection would completely kill the
performance and function can't be type hinted as DBObject doesn't
implement Map interface.

Added a simple test case for the from-db-object performance. I'm seeing
performance increase of 20% on this (170ms -> 140ms).
This commit is contained in:
Juho Teperi 2015-10-08 22:53:23 +03:00
parent fe73144075
commit c26ae0835d
2 changed files with 25 additions and 20 deletions

View file

@ -105,7 +105,6 @@
(declare associate-pairs)
(defprotocol ConvertFromDBObject (defprotocol ConvertFromDBObject
(from-db-object [input keywordize] "Converts given DBObject instance to a piece of Clojure data")) (from-db-object [input keywordize] "Converts given DBObject instance to a piece of Clojure data"))
@ -118,7 +117,12 @@
Map Map
(from-db-object [^Map input keywordize] (from-db-object [^Map input keywordize]
(associate-pairs (.entrySet input) keywordize)) (reduce (if keywordize
(fn [m ^String k]
(assoc m (keyword k) (from-db-object (.get input k) true)))
(fn [m ^String k]
(assoc m k (from-db-object (.get input k) false))))
{} (.keySet input)))
List List
(from-db-object [^List input keywordize] (from-db-object [^List input keywordize]
@ -136,22 +140,16 @@
(from-db-object [^DBObject input keywordize] (from-db-object [^DBObject input keywordize]
;; DBObject provides .toMap, but the implementation in ;; DBObject provides .toMap, but the implementation in
;; subclass GridFSFile unhelpfully throws ;; subclass GridFSFile unhelpfully throws
;; UnsupportedOperationException. This part is taken from congomongo and ;; UnsupportedOperationException.
;; may need revisiting at a later point. MK. ;; This is the same code as with Map. The code can't be shared using a
(associate-pairs (for [key-set (.keySet input)] [key-set (.get input key-set)]) ;; function because reflection would kill the performance and DBObject
keywordize))) ;; and Map don't share a interface.
(defn- associate-pairs [pairs keywordize]
;; Taking the keywordize test out of the fn reduces derefs
;; dramatically, which was the main barrier to matching pure-Java
;; performance for this marshalling. Taken from congomongo. MK.
(reduce (if keywordize (reduce (if keywordize
(fn [m [^String k v]] (fn [m ^String k]
(assoc m (keyword k) (from-db-object v true))) (assoc m (keyword k) (from-db-object (.get input k) true)))
(fn [m [^String k v]] (fn [m ^String k]
(assoc m k (from-db-object v false)))) (assoc m k (from-db-object (.get input k) false))))
{} (reverse pairs))) {} (.keySet input))))

View file

@ -1,7 +1,7 @@
(ns monger.test.stress-test (ns monger.test.stress-test
(:require [monger.core :as mg] (:require [monger.core :as mg]
[monger.collection :as mc] [monger.collection :as mc]
[monger.conversion :refer [to-db-object]] [monger.conversion :refer [to-db-object from-db-object]]
[clojure.test :refer :all]) [clojure.test :refer :all])
(:import [com.mongodb WriteConcern] (:import [com.mongodb WriteConcern]
java.util.Date)) java.util.Date))
@ -30,4 +30,11 @@
(mc/remove db collection) (mc/remove db collection)
(println "Inserting " n " documents...") (println "Inserting " n " documents...")
(time (mc/insert-batch db collection docs)) (time (mc/insert-batch db collection docs))
(is (= n (mc/count db collection))))))) (is (= n (mc/count db collection))))))
(deftest ^{:performance true} convert-large-number-of-dbojects-to-maps
(doseq [n [10 100 1000 20000 40000]]
(let [docs (map (fn [i]
(to-db-object {:title "Untitled" :created-at (Date.) :number i}))
(take n (iterate inc 1)))]
(time (doall (map (fn [x] (from-db-object x true)) docs)))))))