Skip to content

Commit

Permalink
Add option to produce proper join of identical indices
Browse files Browse the repository at this point in the history
  • Loading branch information
pityka committed Feb 10, 2024
1 parent 2bd91c0 commit 2a7918d
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 5 deletions.
30 changes: 28 additions & 2 deletions saddle-core/src/main/scala/org/saddle/index/JoinerImpl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,34 @@ class JoinerImpl[@spec(Boolean, Int, Long, Double) T: ST: ORD]
private implicit def wrapArray(arr: Array[Int]): Option[Array[Int]] =
Some(arr)

def join(left: Index[T], right: Index[T], how: JoinType): ReIndexer[T] = {
if (left == right) {
def join(left: Index[T], right: Index[T], how: JoinType): ReIndexer[T] =
join(left, right, how, false)

/** Perform database joins
*
* @param left
* left index to join
* @param right
* right index to join
* @param how
* mode of operation: inner, left outer, right outer, full outer
* @param forceProperSemantics
* if false, then no join is happening if left == right and right is
* returned This is correct for unique indexes, and also practical
* otherwise. If forceProperSemantics true, then the join is done even
* between identical indexes. At the moment forceProperSemantics=true is
* used at no places in saddle's source code (i.e. all frame joins etc use
* the shortcut to not produce proper joins of identical indexes with
* repeated values)
* @return
*/
def join(
left: Index[T],
right: Index[T],
how: JoinType,
forceProperSemantics: Boolean
): ReIndexer[T] = {
if (left == right && !forceProperSemantics) {
ReIndexer(None, None, right)
} else if (left.isUnique && right.isUnique) {
how match {
Expand Down
17 changes: 14 additions & 3 deletions saddle-core/src/test/scala/org/saddle/IndexSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ import org.saddle.index.OuterJoin
class IndexSpec extends Specification {
"Index methods" should {
"contiguous works" in {
val ix = Index(1,1,0,0)
val ix = Index(1, 1, 0, 0)
ix.isContiguous must_== true
}
}
"over flow in join" in {
val ix1 = Index(array.randInt(1000000, 0, 3))
val ix2 = Index(array.randInt(10000, 0, 3))
Expand Down Expand Up @@ -42,7 +42,18 @@ class IndexSpec extends Specification {
}
"Index Joins" should {

"Outer join of same non-unique indexes " in {
"Outer join of same non-unique indexes with forceProperSemantics = true makes a join " in {
val ix1 = Index(0, 0)
val ix2 = Index(0, 0)
val res = (new org.saddle.index.JoinerImpl[Int])
.join(ix1, ix2, index.OuterJoin, forceProperSemantics = true)

res.index must_== Index(0, 0, 0, 0)
res.lTake.get must_== Array(0, 0, 1, 1)
res.rTake.get must_== Array(0, 1, 0, 1)

}
"Outer join of same non-unique indexes with forceProperSemantics=false (the default) returns the input " in {
val ix1 = Index(0, 0)
val ix2 = Index(0, 0)
val res = ix1.join(ix2, how = index.OuterJoin)
Expand Down

0 comments on commit 2a7918d

Please sign in to comment.