Skip to content

Commit

Permalink
Improve performance of removing elements from sequences
Browse files Browse the repository at this point in the history
Merging `InsertBeforeIndex` protocol into new `IndexedOps` protocol for optimized indexed based operations (which now includes inserting before and removing at)

Updating `nthpath` and `keypath` to use new helper fns

Adding tests and benchmarks
  • Loading branch information
jeff303 committed May 25, 2021
1 parent e8225f0 commit 2ace3be
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 35 deletions.
55 changes: 40 additions & 15 deletions scripts/benchmarks.clj
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@
(run-benchmark "transform values of a list"
(transform ALL inc data)
(doall (sequence (map inc) data))
(reverse (into '() (map inc) data))
))
(reverse (into '() (map inc) data))))


(let [data {:a 1 :b 2 :c 3 :d 4}]
(run-benchmark "transform values of a small map"
Expand All @@ -127,8 +127,8 @@
(into {} (map (fn [e] [(key e) (inc (val e))]) data))
(into {} (map (fn [e] [(key e) (inc (val e))])) data)
(map-vals-map-iterable data inc)
(map-vals-map-iterable-transient data inc)
))
(map-vals-map-iterable-transient data inc)))



(let [data (->> (for [i (range 1000)] [i i]) (into {}))]
Expand All @@ -152,8 +152,8 @@
(first data)
(select-any ALL data)
(select-any FIRST data)
(select-first ALL data)
))
(select-first ALL data)))


(let [data [1 2 3 4 5]]
(run-benchmark "map a function over a vector"
Expand Down Expand Up @@ -192,8 +192,8 @@
(run-benchmark "prepend to a vector"
(vec (cons 0 data))
(setval BEFORE-ELEM 0 data)
(into [0] data)
))
(into [0] data)))


(declarepath TreeValues)

Expand Down Expand Up @@ -314,8 +314,8 @@
(map (fn [[k v]] [(keyword (str *ns*) (name k)) v]))
data)
(reduce-kv (fn [m k v] (assoc m (keyword (str *ns*) (name k)) v)) {} data)
(setval [MAP-KEYS NAMESPACE] (str *ns*) data)
))
(setval [MAP-KEYS NAMESPACE] (str *ns*) data)))



(let [data (->> (for [i (range 1000)] [(keyword (str i)) i]) (into {}))]
Expand All @@ -324,8 +324,8 @@
(map (fn [[k v]] [(keyword (str *ns*) (name k)) v]))
data)
(reduce-kv (fn [m k v] (assoc m (keyword (str *ns*) (name k)) v)) {} data)
(setval [MAP-KEYS NAMESPACE] (str *ns*) data)
))
(setval [MAP-KEYS NAMESPACE] (str *ns*) data)))


(defnav walker-old [afn]
(select* [this structure next-fn]
Expand All @@ -336,8 +336,8 @@
(let [data {:a [1 2 {:c '(3 4) :d {:e [1 2 3] 7 8 9 10}}]}]
(run-benchmark "walker vs. clojure.walk version"
(transform (walker number?) inc data)
(transform (walker-old number?) inc data)
))
(transform (walker-old number?) inc data)))


(let [size 1000
middle-idx (/ size 2)
Expand All @@ -354,4 +354,29 @@
(run-benchmark "before-index at 0 vs. srange vs. cons (list)"
(setval (before-index 0) v data-lst)
(setval (srange 0 0) [v] data-lst)
(cons v data-lst)))
(cons v data-lst))
(run-benchmark "set keypath and nthpath at index to NONE versus srange in middle (vector)"
(setval (nthpath middle-idx) NONE data-vec)
(setval (keypath middle-idx) NONE data-vec)
(setval (srange middle-idx (inc middle-idx)) [] data-vec))
(run-benchmark "set keypath and nthpath at index to NONE versus srange in middle (list)"
;; this case still needs to be optimized in nthpath*
(setval (nthpath middle-idx) NONE data-lst)
(setval (keypath middle-idx) NONE data-lst)
(setval (srange middle-idx (inc middle-idx)) [] data-lst))
(run-benchmark "set keypath and nthpath at beginning to NONE versus srange and subvec (vector)"
(setval (nthpath 0) NONE data-vec)
(setval (keypath 0) NONE data-vec)
(setval (srange 0 1) [] data-vec)
(subvec data-vec 1))
(run-benchmark "set keypath and nthpath at beginning to NONE versus srange and rest (list)"
;; this case still needs to be optimized in nthpath*
(setval (nthpath 0) NONE data-lst)
(setval (keypath 0) NONE data-lst)
(setval (srange 0 1) [] data-lst)
(rest data-lst))
(run-benchmark "set keypath and nthpath at end to NONE versus srange and subvec (vector)"
(setval (nthpath (dec size)) NONE data-vec)
(setval (keypath (dec size)) NONE data-vec)
(setval (srange (dec size) size) [] data-vec)
(subvec data-vec 0 (dec size))))
80 changes: 62 additions & 18 deletions src/clj/com/rpl/specter/navs.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -492,9 +492,6 @@
(defprotocol FastEmpty
(fast-empty? [s]))

(defprotocol InsertBeforeIndex
(insert-before-idx [aseq idx val]))

(defnav PosNavigator [getter updater]
(select* [this structure next-fn]
(if-not (fast-empty? structure)
Expand Down Expand Up @@ -643,7 +640,29 @@
(nth s (-> s count dec))
))

(defprotocol IndexedOps
"Fast indexed operations on sequential types"
(insert-before-idx [aseq idx val])
(remove-at-idx [aseq idx]))

;; helper fns for indexed operations
(defn- insert-before-index-list [lst idx v]
;; an implementation that is most efficient for list style structures
(let [[front back] (split-at idx lst)]
(concat front (cons v back))))

(defn- remove-at-index-vec [aseq idx]
(condp = idx
0 (subvec aseq 1)
(vec-count aseq) (subvec aseq 0 (vec-count aseq))
(into (subvec aseq 0 idx) (subvec aseq (inc idx)))))

(defn- remove-at-index-list [lst idx]
;; an implementation that is most efficient for list style structures
(condp = idx
0 (rest lst)
(let [[front back] (split-at idx lst)]
(concat front (rest back)))))

(extend-protocol FastEmpty
nil
Expand All @@ -664,7 +683,7 @@
(let [newv (next-fn vals (get structure key))]
(if (identical? newv i/NONE)
(if (sequential? structure)
(i/srange-transform* structure key (inc key) (fn [_] []))
(remove-at-idx structure key)
(dissoc structure key))
(assoc structure key newv))))

Expand Down Expand Up @@ -704,8 +723,8 @@
(if (vector? structure)
(let [newv (next-fn vals (nth structure i))]
(if (identical? newv i/NONE)
(i/srange-transform* structure i (inc i) (fn [_] []))
(assoc structure i newv)))
(remove-at-index-vec structure i)
(assoc structure i newv)))
(i/srange-transform* ; can make this much more efficient with alternate impl
structure
i
Expand All @@ -726,35 +745,60 @@
(end-fn structure)
))

(defn- insert-before-index-list [lst idx val]
;; an implementation that is most efficient for list style structures
(let [[front back] (split-at idx lst)]
(concat front (cons val back))))

(extend-protocol InsertBeforeIndex
(extend-protocol IndexedOps
nil
(insert-before-idx [_ idx val]
(if (= 0 idx)
(list val)
(throw (ex-info "For a nil structure, can only insert before index 0"
{:insertion-index idx}))))
(remove-at-idx [_ _]
;; removing from nil structure at any index should just be nil?
nil)

#?(:clj java.lang.String :cljs string)
(insert-before-idx [aseq idx val]
(apply str (insert-before-index-list aseq idx val)))
(insert-before-idx [aseq idx v]
(apply str (insert-before-index-list aseq idx v)))
(remove-at-idx [s idx]
(str (subs s 0 idx) (subs s idx)))

#?(:clj clojure.lang.LazySeq :cljs cljs.core/LazySeq)
(insert-before-idx [aseq idx val]
(insert-before-index-list aseq idx val))
(insert-before-idx [aseq idx v]
(insert-before-index-list aseq idx v))
(remove-at-idx [aseq idx]
(remove-at-index-list aseq idx))

#?(:clj clojure.lang.IPersistentVector :cljs cljs.core/PersistentVector)
(insert-before-idx [aseq idx val]
(let [front (subvec aseq 0 idx)
back (subvec aseq idx)]
(into (conj front val) back)))
(remove-at-idx [aseq idx]
(remove-at-index-vec aseq idx))

;; TODO: incorporate this into the transients namespace instead/in addition to?
#?(:clj clojure.lang.ITransientVector :cljs cljs.core/TransientVector)
(insert-before-idx [aseq idx val]
(loop [v aseq prev-val val curr-idx idx]
(if
(= curr-idx (transient-vec-count v))
(assoc! v curr-idx prev-val)
(let [curr-val (nth v curr-idx)]
(recur (assoc! v curr-idx prev-val) curr-val (inc curr-idx))))))
(remove-at-idx [aseq idx]
(loop [v aseq curr-idx idx]
(if
(< curr-idx (dec (transient-vec-count v)))
(let [next-val (nth v (inc curr-idx))]
(recur (assoc! v curr-idx next-val) (inc curr-idx)))
(pop! v))))

#?(:clj clojure.lang.IPersistentList :cljs cljs.core/List)
(insert-before-idx [aseq idx val]
(cond (= idx 0)
(if (= idx 0)
(cons val aseq)
:else (insert-before-index-list aseq idx val))))
(insert-before-index-list aseq idx val)))
(remove-at-idx [aseq idx]
(if (= idx 0)
(rest aseq)
(remove-at-index-list aseq idx))))
19 changes: 17 additions & 2 deletions test/com/rpl/specter/core_test.cljc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
(ns com.rpl.specter.core-test
#?(:cljs (:require-macros
[cljs.test :refer [is deftest]]
[cljs.test :refer [is deftest testing]]
[clojure.test.check.clojure-test :refer [defspec]]
[com.rpl.specter.cljs-test-helpers :refer [for-all+]]
[com.rpl.specter.test-helpers :refer [ic-test]]
Expand All @@ -13,7 +13,7 @@
defdynamicnav traverse-all satisfies-protpath? end-fn
vtransform]]))
(:use
#?(:clj [clojure.test :only [deftest is]])
#?(:clj [clojure.test :only [deftest is testing]])
#?(:clj [clojure.test.check.clojure-test :only [defspec]])
#?(:clj [com.rpl.specter.test-helpers :only [for-all+ ic-test]])
#?(:clj [com.rpl.specter
Expand All @@ -33,6 +33,7 @@
#?(:cljs [clojure.test.check.generators :as gen])
#?(:cljs [clojure.test.check.properties :as prop :include-macros true])
[com.rpl.specter :as s]
[com.rpl.specter.navs :as n]
[com.rpl.specter.transients :as t]
[clojure.set :as set]))

Expand Down Expand Up @@ -1331,6 +1332,7 @@
(deftest nthpath-test
(is (predand= vector? [1 2 -3 4] (transform (s/nthpath 2) - [1 2 3 4])))
(is (predand= vector? [1 2 4] (setval (s/nthpath 2) s/NONE [1 2 3 4])))
(is (predand= vector? [1 2 3] (setval (s/nthpath 3) s/NONE [1 2 3 4])))
(is (predand= (complement vector?) '(1 -2 3 4) (transform (s/nthpath 1) - '(1 2 3 4))))
(is (predand= (complement vector?) '(1 2 4) (setval (s/nthpath 2) s/NONE '(1 2 3 4))))
(is (= [0 1 [2 4 4]] (transform (s/nthpath 2 1) inc [0 1 [2 3 4]])))
Expand Down Expand Up @@ -1702,3 +1704,16 @@
(is (satisfies-protpath? FooPP "a"))
(is (not (satisfies-protpath? FooPP 1)))
)))

;; adding a separate test because these are not yet exercised by the rest of the suite
(deftest indexed-opts-transient-vectors-test
(testing "IndexedOps fns work properly for transient vectors"
(let [v (vec (range 10))]
(doseq [[f args exp] [[n/remove-at-idx [0] (vec (range 1 10))]
[n/remove-at-idx [5] [0 1 2 3 4 6 7 8 9]]
[n/remove-at-idx [9] (vec (range 9))]
[n/insert-before-idx [0 -1] (vec (range -1 10))]
[n/insert-before-idx [7 -1] [0 1 2 3 4 5 6 -1 7 8 9]]
[n/insert-before-idx [10 10] (vec (range 11))]]]
(is (= exp (-> (apply f (cons (transient v) args))
persistent!)))))))

0 comments on commit 2ace3be

Please sign in to comment.