From 572fbfa14d7c5f440ae2c9359277f9e267bcf9e1 Mon Sep 17 00:00:00 2001 From: Bartek Date: Fri, 31 Jan 2025 23:06:15 +0100 Subject: [PATCH 1/4] Optimize distinctBy implementation for NonEmptyList, NonEmptySeq and NonEmptyVector --- .../src/main/scala/cats/data/NonEmptyList.scala | 17 ++++++++--------- core/src/main/scala/cats/data/NonEmptySeq.scala | 17 ++++++++--------- .../main/scala/cats/data/NonEmptyVector.scala | 17 ++++++++--------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/core/src/main/scala/cats/data/NonEmptyList.scala b/core/src/main/scala/cats/data/NonEmptyList.scala index aae4265fdf..23b06cad3c 100644 --- a/core/src/main/scala/cats/data/NonEmptyList.scala +++ b/core/src/main/scala/cats/data/NonEmptyList.scala @@ -345,16 +345,15 @@ final case class NonEmptyList[+A](head: A, tail: List[A]) extends NonEmptyCollec override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptyList[A] = { implicit val ord: Ordering[B] = O.toOrdering - val buf = ListBuffer.empty[A] - tail.foldLeft(TreeSet(f(head): B)) { (elementsSoFar, a) => - val b = f(a) - if (elementsSoFar(b)) elementsSoFar - else { - buf += a; elementsSoFar + b - } + val bldr = List.newBuilder[A] + val seen = mutable.TreeSet.empty[B] + val it = iterator + while (it.hasNext) { + val next = it.next() + if (seen.add(f(next))) + bldr += next } - - NonEmptyList(head, buf.toList) + NonEmptyList.fromListUnsafe(bldr.result()) } /** diff --git a/core/src/main/scala/cats/data/NonEmptySeq.scala b/core/src/main/scala/cats/data/NonEmptySeq.scala index 457a14b26e..5b4bd69a69 100644 --- a/core/src/main/scala/cats/data/NonEmptySeq.scala +++ b/core/src/main/scala/cats/data/NonEmptySeq.scala @@ -241,16 +241,15 @@ final class NonEmptySeq[+A] private (val toSeq: Seq[A]) extends AnyVal with NonE override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptySeq[A] = { implicit val ord: Ordering[B] = O.toOrdering - val buf = Seq.newBuilder[A] - tail.foldLeft(TreeSet(f(head): B)) { (elementsSoFar, a) => - val b = f(a) - if (elementsSoFar(b)) elementsSoFar - else { - buf += a; elementsSoFar + b - } + val bldr = Seq.newBuilder[A] + val seen = mutable.TreeSet.empty[B] + val it = iterator + while (it.hasNext) { + val next = it.next() + if (seen.add(f(next))) + bldr += next } - - NonEmptySeq(head, buf.result()) + NonEmptySeq.fromSeqUnsafe(bldr.result()) } /** diff --git a/core/src/main/scala/cats/data/NonEmptyVector.scala b/core/src/main/scala/cats/data/NonEmptyVector.scala index 7dffe88ce3..5c8ad1e57a 100644 --- a/core/src/main/scala/cats/data/NonEmptyVector.scala +++ b/core/src/main/scala/cats/data/NonEmptyVector.scala @@ -251,16 +251,15 @@ final class NonEmptyVector[+A] private (val toVector: Vector[A]) override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptyVector[A] = { implicit val ord: Ordering[B] = O.toOrdering - val buf = Vector.newBuilder[A] - tail.foldLeft(TreeSet(f(head): B)) { (elementsSoFar, a) => - val b = f(a) - if (elementsSoFar(b)) elementsSoFar - else { - buf += a; elementsSoFar + b - } + val bldr = Vector.newBuilder[A] + val seen = mutable.TreeSet.empty[B] + val it = iterator + while (it.hasNext) { + val next = it.next() + if (seen.add(f(next))) + bldr += next } - - NonEmptyVector(head, buf.result()) + NonEmptyVector.fromVectorUnsafe(bldr.result()) } /** From 44b076d44a66d263474bf82f3f136f2f4b704562 Mon Sep 17 00:00:00 2001 From: Bartek Date: Sat, 1 Feb 2025 01:06:46 +0100 Subject: [PATCH 2/4] Improve optimizations --- .../main/scala/cats/data/NonEmptyList.scala | 23 ++++++++++++------- .../main/scala/cats/data/NonEmptySeq.scala | 20 +++++++++------- .../main/scala/cats/data/NonEmptyVector.scala | 20 +++++++++------- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/core/src/main/scala/cats/data/NonEmptyList.scala b/core/src/main/scala/cats/data/NonEmptyList.scala index 23b06cad3c..8be1f838fb 100644 --- a/core/src/main/scala/cats/data/NonEmptyList.scala +++ b/core/src/main/scala/cats/data/NonEmptyList.scala @@ -345,15 +345,22 @@ final case class NonEmptyList[+A](head: A, tail: List[A]) extends NonEmptyCollec override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptyList[A] = { implicit val ord: Ordering[B] = O.toOrdering - val bldr = List.newBuilder[A] - val seen = mutable.TreeSet.empty[B] - val it = iterator - while (it.hasNext) { - val next = it.next() - if (seen.add(f(next))) - bldr += next + if (tail.isEmpty) this + else { + val bldr = List.newBuilder[A] + val seen = mutable.TreeSet.empty[B] + var rest = tail + seen.add(f(head)) + while (rest.nonEmpty) { + val next = rest.head + rest = rest.tail + if (seen.add(f(next))) { + bldr += next + } + } + + NonEmptyList(head, bldr.result()) } - NonEmptyList.fromListUnsafe(bldr.result()) } /** diff --git a/core/src/main/scala/cats/data/NonEmptySeq.scala b/core/src/main/scala/cats/data/NonEmptySeq.scala index 5b4bd69a69..f05c1d148d 100644 --- a/core/src/main/scala/cats/data/NonEmptySeq.scala +++ b/core/src/main/scala/cats/data/NonEmptySeq.scala @@ -241,15 +241,19 @@ final class NonEmptySeq[+A] private (val toSeq: Seq[A]) extends AnyVal with NonE override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptySeq[A] = { implicit val ord: Ordering[B] = O.toOrdering - val bldr = Seq.newBuilder[A] - val seen = mutable.TreeSet.empty[B] - val it = iterator - while (it.hasNext) { - val next = it.next() - if (seen.add(f(next))) - bldr += next + if (toSeq.sizeIs == 1) this + else { + val bldr = Seq.newBuilder[A] + val seen = mutable.TreeSet.empty[B] + val it = iterator + while (it.hasNext) { + val next = it.next() + if (seen.add(f(next))) + bldr += next + } + + NonEmptySeq.fromSeqUnsafe(bldr.result()) } - NonEmptySeq.fromSeqUnsafe(bldr.result()) } /** diff --git a/core/src/main/scala/cats/data/NonEmptyVector.scala b/core/src/main/scala/cats/data/NonEmptyVector.scala index 5c8ad1e57a..098000a388 100644 --- a/core/src/main/scala/cats/data/NonEmptyVector.scala +++ b/core/src/main/scala/cats/data/NonEmptyVector.scala @@ -251,15 +251,19 @@ final class NonEmptyVector[+A] private (val toVector: Vector[A]) override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptyVector[A] = { implicit val ord: Ordering[B] = O.toOrdering - val bldr = Vector.newBuilder[A] - val seen = mutable.TreeSet.empty[B] - val it = iterator - while (it.hasNext) { - val next = it.next() - if (seen.add(f(next))) - bldr += next + if (toVector.sizeIs == 1) this + else { + val bldr = Vector.newBuilder[A] + val seen = mutable.TreeSet.empty[B] + val it = iterator + while (it.hasNext) { + val next = it.next() + if (seen.add(f(next))) + bldr += next + } + + NonEmptyVector.fromVectorUnsafe(bldr.result()) } - NonEmptyVector.fromVectorUnsafe(bldr.result()) } /** From b02647fefc383115fa8932ac53737d73d1716173 Mon Sep 17 00:00:00 2001 From: Bartek Date: Sat, 1 Feb 2025 01:32:40 +0100 Subject: [PATCH 3/4] Fix compatibility problem --- core/src/main/scala/cats/data/NonEmptySeq.scala | 2 +- core/src/main/scala/cats/data/NonEmptyVector.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/cats/data/NonEmptySeq.scala b/core/src/main/scala/cats/data/NonEmptySeq.scala index f05c1d148d..f6f3ee8500 100644 --- a/core/src/main/scala/cats/data/NonEmptySeq.scala +++ b/core/src/main/scala/cats/data/NonEmptySeq.scala @@ -241,7 +241,7 @@ final class NonEmptySeq[+A] private (val toSeq: Seq[A]) extends AnyVal with NonE override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptySeq[A] = { implicit val ord: Ordering[B] = O.toOrdering - if (toSeq.sizeIs == 1) this + if (toSeq.lengthCompare(1) == 0) this else { val bldr = Seq.newBuilder[A] val seen = mutable.TreeSet.empty[B] diff --git a/core/src/main/scala/cats/data/NonEmptyVector.scala b/core/src/main/scala/cats/data/NonEmptyVector.scala index 098000a388..23770cee7c 100644 --- a/core/src/main/scala/cats/data/NonEmptyVector.scala +++ b/core/src/main/scala/cats/data/NonEmptyVector.scala @@ -251,7 +251,7 @@ final class NonEmptyVector[+A] private (val toVector: Vector[A]) override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptyVector[A] = { implicit val ord: Ordering[B] = O.toOrdering - if (toVector.sizeIs == 1) this + if (toVector.lengthCompare(1) == 0) this else { val bldr = Vector.newBuilder[A] val seen = mutable.TreeSet.empty[B] From 19f2d92be5266b2cdf9745b05140ed1f1666d745 Mon Sep 17 00:00:00 2001 From: Bartek Date: Sat, 1 Feb 2025 10:21:53 +0100 Subject: [PATCH 4/4] Move Ordering implicit into else --- core/src/main/scala/cats/data/NonEmptyList.scala | 4 ++-- core/src/main/scala/cats/data/NonEmptySeq.scala | 4 ++-- core/src/main/scala/cats/data/NonEmptyVector.scala | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/cats/data/NonEmptyList.scala b/core/src/main/scala/cats/data/NonEmptyList.scala index 8be1f838fb..207fe365a9 100644 --- a/core/src/main/scala/cats/data/NonEmptyList.scala +++ b/core/src/main/scala/cats/data/NonEmptyList.scala @@ -343,10 +343,10 @@ final case class NonEmptyList[+A](head: A, tail: List[A]) extends NonEmptyCollec override def distinct[AA >: A](implicit O: Order[AA]): NonEmptyList[AA] = distinctBy(identity[AA]) override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptyList[A] = { - implicit val ord: Ordering[B] = O.toOrdering - if (tail.isEmpty) this else { + implicit val ord: Ordering[B] = O.toOrdering + val bldr = List.newBuilder[A] val seen = mutable.TreeSet.empty[B] var rest = tail diff --git a/core/src/main/scala/cats/data/NonEmptySeq.scala b/core/src/main/scala/cats/data/NonEmptySeq.scala index f6f3ee8500..4597a70f84 100644 --- a/core/src/main/scala/cats/data/NonEmptySeq.scala +++ b/core/src/main/scala/cats/data/NonEmptySeq.scala @@ -239,10 +239,10 @@ final class NonEmptySeq[+A] private (val toSeq: Seq[A]) extends AnyVal with NonE override def distinct[AA >: A](implicit O: Order[AA]): NonEmptySeq[AA] = distinctBy(identity[AA]) override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptySeq[A] = { - implicit val ord: Ordering[B] = O.toOrdering - if (toSeq.lengthCompare(1) == 0) this else { + implicit val ord: Ordering[B] = O.toOrdering + val bldr = Seq.newBuilder[A] val seen = mutable.TreeSet.empty[B] val it = iterator diff --git a/core/src/main/scala/cats/data/NonEmptyVector.scala b/core/src/main/scala/cats/data/NonEmptyVector.scala index 23770cee7c..b80b6b27ef 100644 --- a/core/src/main/scala/cats/data/NonEmptyVector.scala +++ b/core/src/main/scala/cats/data/NonEmptyVector.scala @@ -249,10 +249,10 @@ final class NonEmptyVector[+A] private (val toVector: Vector[A]) override def distinct[AA >: A](implicit O: Order[AA]): NonEmptyVector[AA] = distinctBy(identity[AA]) override def distinctBy[B](f: A => B)(implicit O: Order[B]): NonEmptyVector[A] = { - implicit val ord: Ordering[B] = O.toOrdering - if (toVector.lengthCompare(1) == 0) this else { + implicit val ord: Ordering[B] = O.toOrdering + val bldr = Vector.newBuilder[A] val seen = mutable.TreeSet.empty[B] val it = iterator