Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix typo Mutlicolumn -> Multicolumn #5

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/scala/com/amazon/deequ/analyzers/Analyzer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ private[deequ] object Analyzers {
}

def entityFrom(columns: Seq[String]): Entity.Value = {
if (columns.size == 1) Entity.Column else Entity.Mutlicolumn
if (columns.size == 1) Entity.Column else Entity.Multicolumn
}

def conditionalSelection(selection: String, where: Option[String]): Column = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ case class Correlation(
secondColumn: String,
where: Option[String] = None)
extends StandardScanShareableAnalyzer[CorrelationState]("Correlation",
s"$firstColumn,$secondColumn", Entity.Mutlicolumn)
s"$firstColumn,$secondColumn", Entity.Multicolumn)
with FilterableAnalyzer {

override def aggregationFunctions(): Seq[Column] = {
Expand Down
18 changes: 10 additions & 8 deletions src/main/scala/com/amazon/deequ/analyzers/MutualInformation.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@

package com.amazon.deequ.analyzers

import com.amazon.deequ.analyzers.Analyzers.COUNT_COL
import com.amazon.deequ.analyzers.Analyzers._
import com.amazon.deequ.metrics.{DoubleMetric, Entity}
import org.apache.spark.sql.functions.{col, sum, udf}
import com.amazon.deequ.metrics.DoubleMetric
import com.amazon.deequ.metrics.Entity
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.functions.sum
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql.types.StructType
import Analyzers.COUNT_COL
import com.amazon.deequ.analyzers.runners.MetricCalculationException

/**
* Mutual Information describes how much information about one column can be inferred from another
Expand Down Expand Up @@ -75,14 +77,14 @@ case class MutualInformation(columns: Seq[String], where: Option[String] = None)
val resultRow = value.head()

if (resultRow.isNullAt(0)) {
metricFromEmpty(this, "MutualInformation", columns.mkString(","), Entity.Mutlicolumn)
metricFromEmpty(this, "MutualInformation", columns.mkString(","), Entity.Multicolumn)
} else {
metricFromValue(resultRow.getDouble(0), "MutualInformation", columns.mkString(","),
Entity.Mutlicolumn)
Entity.Multicolumn)
}

case None =>
metricFromEmpty(this, "MutualInformation", columns.mkString(","), Entity.Mutlicolumn)
metricFromEmpty(this, "MutualInformation", columns.mkString(","), Entity.Multicolumn)
}
}

Expand All @@ -93,7 +95,7 @@ case class MutualInformation(columns: Seq[String], where: Option[String] = None)
}

override def toFailureMetric(exception: Exception): DoubleMetric = {
metricFromFailure(exception, "MutualInformation", columns.mkString(","), Entity.Mutlicolumn)
metricFromFailure(exception, "MutualInformation", columns.mkString(","), Entity.Multicolumn)
}

override def filterCondition: Option[String] = where
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/com/amazon/deequ/metrics/Metric.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import org.apache.spark.sql.Column
import scala.util.{Failure, Success, Try}

object Entity extends Enumeration {
val Dataset, Column, Mutlicolumn = Value
val Dataset, Column, Multicolumn = Value
}

/** Common trait for all data quality metrics */
Expand Down
8 changes: 4 additions & 4 deletions src/test/scala/com/amazon/deequ/VerificationResultTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class VerificationResultTest extends WordSpec with Matchers with SparkContextSpe
("Column", "att2", "Completeness", 1.0),
("Column", "item", "Distinctness", 1.0),
("Column", "att1", "Completeness", 1.0),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25)
("Multicolumn", "att1,att2", "Uniqueness", 0.25)
)
.toDF("entity", "instance", "name", "value")

Expand All @@ -63,7 +63,7 @@ class VerificationResultTest extends WordSpec with Matchers with SparkContextSpe
import session.implicits._
val expected = Seq(
("Column", "att1", "Completeness", 1.0),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25)
("Multicolumn", "att1,att2", "Uniqueness", 0.25)
)
.toDF("entity", "instance", "name", "value")

Expand All @@ -82,7 +82,7 @@ class VerificationResultTest extends WordSpec with Matchers with SparkContextSpe
"""[{"entity":"Column","instance":"item","name":"Distinctness","value":1.0},
|{"entity": "Column", "instance":"att2","name":"Completeness","value":1.0},
|{"entity":"Column","instance":"att1","name":"Completeness","value":1.0},
|{"entity":"Mutlicolumn","instance":"att1,att2",
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25},
|{"entity":"Dataset","instance":"*","name":"Size","value":4.0}]"""
.stripMargin.replaceAll("\n", "")
Expand All @@ -103,7 +103,7 @@ class VerificationResultTest extends WordSpec with Matchers with SparkContextSpe

val expectedJson =
"""[{"entity":"Column","instance":"att1","name":"Completeness","value":1.0},
|{"entity":"Mutlicolumn","instance":"att1,att2",
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25}]"""
.stripMargin.replaceAll("\n", "")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ class VerificationSuiteTest extends WordSpec with Matchers with SparkContextSpec
("Column", "att1", "Completeness", 1.0),
("Column", "att2", "Completeness", 1.0),
("Column", "att2", "Uniqueness", 0.25),
("Mutlicolumn", "att1,att2", "MutualInformation",
("Multicolumn", "att1,att2", "MutualInformation",
-(0.75 * math.log(0.75) + 0.25 * math.log(0.25))))
.toDF("entity", "instance", "name", "value")

Expand Down
6 changes: 3 additions & 3 deletions src/test/scala/com/amazon/deequ/analyzers/AnalysisTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class AnalysisTest extends AnyWordSpec with Matchers with SparkContextSpec with
("Dataset", "*", "Size", 4.0),
("Column", "item", "Distinctness", 1.0),
("Column", "att1", "Completeness", 1.0),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25))
("Multicolumn", "att1,att2", "Uniqueness", 0.25))
.toDF("entity", "instance", "name", "value")

assertSameRows(successMetricsAsDataFrame, expected)
Expand All @@ -76,7 +76,7 @@ class AnalysisTest extends AnyWordSpec with Matchers with SparkContextSpec with
("Dataset", "*", "Size", 4.0),
("Column", "`item.one`", "Distinctness", 1.0),
("Column", "att1", "Completeness", 1.0),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25))
("Multicolumn", "att1,att2", "Uniqueness", 0.25))
.toDF("entity", "instance", "name", "value")

assertSameRows(successMetricsAsDataFrame, expected)
Expand Down Expand Up @@ -104,7 +104,7 @@ class AnalysisTest extends AnyWordSpec with Matchers with SparkContextSpec with
("Dataset", "*", "Size", 4.0),
("Column", "ITEM", "Distinctness", 1.0),
("Column", "ATT1", "Completeness", 1.0),
("Mutlicolumn", "ATT1,ATT2", "Uniqueness", 0.25))
("Multicolumn", "ATT1,ATT2", "Uniqueness", 0.25))
.toDF("entity", "instance", "name", "value")
assertSameRows(successMetricsAsDataFrame, expected)
}
Expand Down
12 changes: 6 additions & 6 deletions src/test/scala/com/amazon/deequ/analyzers/AnalyzerTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,14 @@ class AnalyzerTests extends AnyWordSpec with Matchers with SparkContextSpec with
DoubleMetric(Entity.Column, "Uniqueness", "uniqueWithNulls", Success(1.0), uniqueWithNulls.fullColumn))
val multiColUnique = Uniqueness(Seq("unique", "nonUnique")).calculate(dfFull)
assert(multiColUnique ==
DoubleMetric(Entity.Mutlicolumn, "Uniqueness", "unique,nonUnique", Success(1.0), multiColUnique.fullColumn))
DoubleMetric(Entity.Multicolumn, "Uniqueness", "unique,nonUnique", Success(1.0), multiColUnique.fullColumn))
val multiColUniqueWithNull = Uniqueness(Seq("unique", "nonUniqueWithNulls")).calculate(dfFull)
assert(multiColUniqueWithNull ==
DoubleMetric(Entity.Mutlicolumn, "Uniqueness", "unique,nonUniqueWithNulls",
DoubleMetric(Entity.Multicolumn, "Uniqueness", "unique,nonUniqueWithNulls",
Success(1.0), multiColUniqueWithNull.fullColumn))
val multiColUniqueComb = Uniqueness(Seq("nonUnique", "onlyUniqueWithOtherNonUnique")).calculate(dfFull)
assert(multiColUniqueComb ==
DoubleMetric(Entity.Mutlicolumn, "Uniqueness", "nonUnique,onlyUniqueWithOtherNonUnique",
DoubleMetric(Entity.Multicolumn, "Uniqueness", "nonUnique,onlyUniqueWithOtherNonUnique",
Success(1.0), multiColUniqueComb.fullColumn))

}
Expand All @@ -149,7 +149,7 @@ class AnalyzerTests extends AnyWordSpec with Matchers with SparkContextSpec with

Uniqueness(Seq("nonExistingColumn", "unique")).calculate(dfFull) match {
case metric =>
assert(metric.entity == Entity.Mutlicolumn)
assert(metric.entity == Entity.Multicolumn)
assert(metric.name == "Uniqueness")
assert(metric.instance == "nonExistingColumn,unique")
assert(metric.value.compareFailureTypes(Failure(new NoSuchColumnException(""))))
Expand All @@ -175,7 +175,7 @@ class AnalyzerTests extends AnyWordSpec with Matchers with SparkContextSpec with
"compute correct metrics " in withSparkSession { sparkSession =>
val dfFull = getDfFull(sparkSession)
assert(MutualInformation("att1", "att2").calculate(dfFull) ==
DoubleMetric(Entity.Mutlicolumn, "MutualInformation", "att1,att2",
DoubleMetric(Entity.Multicolumn, "MutualInformation", "att1,att2",
Success(-(0.75 * math.log(0.75) + 0.25 * math.log(0.25)))))
}
"yields 0 for conditionally uninformative columns" in withSparkSession { sparkSession =>
Expand Down Expand Up @@ -677,7 +677,7 @@ class AnalyzerTests extends AnyWordSpec with Matchers with SparkContextSpec with
"yield 1.0 for maximal conditionally informative columns" in withSparkSession { sparkSession =>
val df = getDfWithConditionallyInformativeColumns(sparkSession)
Correlation("att1", "att2").calculate(df) shouldBe DoubleMetric(
Entity.Mutlicolumn,
Entity.Multicolumn,
"Correlation",
"att1,att2",
Success(1.0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class AnalyzerContextTest extends AnyWordSpec
val expected = Seq(
("Column", "att1", "Histogram.abs.a", 3.0),
("Dataset", "*", "Size", 4.0),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25),
("Multicolumn", "att1,att2", "Uniqueness", 0.25),
("Column", "att1", "Histogram.bins", 2.0),
("Column", "att1", "Completeness", 1.0),
("Column", "item", "Distinctness", 1.0),
Expand All @@ -67,7 +67,7 @@ class AnalyzerContextTest extends AnyWordSpec
import session.implicits._
val expected = Seq(
("Column", "att1", "Completeness", 1.0),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25))
("Multicolumn", "att1,att2", "Uniqueness", 0.25))
.toDF("entity", "instance", "name", "value")

assertSameRows(successMetricsAsDataFrame, expected)
Expand All @@ -85,7 +85,7 @@ class AnalyzerContextTest extends AnyWordSpec
"""[
|{"entity":"Column","instance":"item","name":"Distinctness","value":1.0},
|{"entity":"Column","instance":"att1","name":"Completeness","value":1.0},
|{"entity":"Mutlicolumn","instance":"att1,att2","name":"Uniqueness","value":0.25},
|{"entity":"Multicolumn","instance":"att1,att2","name":"Uniqueness","value":0.25},
|{"entity":"Column","instance":"att1","name":"Histogram.bins","value":2.0},
|{"entity":"Column","instance":"att1","name":"Histogram.abs.a","value":3.0},
|{"entity":"Column","instance":"att1","name":"Histogram.ratio.a","value":0.75},
Expand All @@ -112,7 +112,7 @@ class AnalyzerContextTest extends AnyWordSpec

val expectedJson =
"""[{"entity":"Column","instance":"att1","name":"Completeness","value":1.0},
|{"entity":"Mutlicolumn","instance":"att1,att2",
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25}]"""
.stripMargin.replaceAll("\n", "")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ class SimpleResultSerdeTest extends WordSpec with Matchers with SparkContextSpec
|"instance":"att2","name":"Completeness","value":1.0},
|{"dataset_date":1507975810,"entity":"Column","region":"EU",
|"instance":"att1","name":"Completeness","value":1.0},
|{"dataset_date":1507975810,"entity":"Mutlicolumn","region":"EU",
|{"dataset_date":1507975810,"entity":"Multicolumn","region":"EU",
|"instance":"att1,att2","name":"MutualInformation","value":0.5623351446188083},
|{"dataset_date":1507975810,"entity":"Dataset","region":"EU",
|"instance":"*","name":"Size","value":4.0},
Expand Down
20 changes: 10 additions & 10 deletions src/test/scala/com/amazon/deequ/repository/AnalysisResultTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class AnalysisResultTest extends AnyWordSpec
("Dataset", "*", "Size", 4.0, DATE_ONE, "EU"),
("Column", "item", "Distinctness", 1.0, DATE_ONE, "EU"),
("Column", "att1", "Completeness", 1.0, DATE_ONE, "EU"),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
("Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
.toDF("entity", "instance", "name", "value", "dataset_date", "region")

assertSameRows(analysisResultsAsDataFrame, expected)
Expand All @@ -78,7 +78,7 @@ class AnalysisResultTest extends AnyWordSpec
|"region":"EU", "dataset_date":$DATE_ONE},
|{"entity":"Column","instance":"item","name":"Distinctness","value":1.0,
|"region":"EU", "dataset_date":$DATE_ONE},
|{"entity":"Mutlicolumn","instance":"att1,att2",
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25,
|"region":"EU", "dataset_date":$DATE_ONE}]"""
.stripMargin.replaceAll("\n", "")
Expand All @@ -103,7 +103,7 @@ class AnalysisResultTest extends AnyWordSpec

val expected = Seq(
("Column", "att1", "Completeness", 1.0, DATE_ONE, "EU"),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
("Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
.toDF("entity", "instance", "name", "value", "dataset_date", "region")

assertSameRows(analysisResultsAsDataFrame, expected)
Expand All @@ -126,7 +126,7 @@ class AnalysisResultTest extends AnyWordSpec
val expected =
s"""[{"entity":"Column","instance":"att1","name":"Completeness","value":1.0,
|"region":"EU", "dataset_date":$DATE_ONE},
|{"entity":"Mutlicolumn","instance":"att1,att2",
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25,
|"region":"EU", "dataset_date":$DATE_ONE}]"""
.stripMargin.replaceAll("\n", "")
Expand All @@ -150,7 +150,7 @@ class AnalysisResultTest extends AnyWordSpec
("Dataset", "*", "Size", 4.0, DATE_ONE, "EU"),
("Column", "item", "Distinctness", 1.0, DATE_ONE, "EU"),
("Column", "att1", "Completeness", 1.0, DATE_ONE, "EU"),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
("Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
.toDF("entity", "instance", "name", "value", "dataset_date", "region")

assertSameRows(analysisResultsAsDataFrame, expected)
Expand All @@ -173,7 +173,7 @@ class AnalysisResultTest extends AnyWordSpec
|"region":"EU", "dataset_date":$DATE_ONE},
|{"entity":"Column","instance":"item","name":"Distinctness","value":1.0,
|"region":"EU", "dataset_date":$DATE_ONE},
|{"entity":"Mutlicolumn","instance":"att1,att2",
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25,
|"region":"EU", "dataset_date":$DATE_ONE}]"""
.stripMargin.replaceAll("\n", "")
Expand All @@ -197,7 +197,7 @@ class AnalysisResultTest extends AnyWordSpec
("Dataset", "*", "Size", 4.0, DATE_ONE, "EU"),
("Column", "item", "Distinctness", 1.0, DATE_ONE, "EU"),
("Column", "att1", "Completeness", 1.0, DATE_ONE, "EU"),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
("Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
.toDF("entity", "instance", "name", "value", "dataset_date", "name_2")

assertSameRows(analysisResultsAsDataFrame, expected)
Expand All @@ -220,7 +220,7 @@ class AnalysisResultTest extends AnyWordSpec
|"name_2":"EU", "dataset_date":$DATE_ONE},
|{"entity":"Column","instance":"item","name":"Distinctness","value":1.0,
|"name_2":"EU", "dataset_date":$DATE_ONE},
|{"entity":"Mutlicolumn","instance":"att1,att2",
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25,
|"name_2":"EU", "dataset_date":$DATE_ONE}]"""
.stripMargin.replaceAll("\n", "")
Expand All @@ -246,7 +246,7 @@ class AnalysisResultTest extends AnyWordSpec
("Dataset", "*", "Size", 4.0, DATE_ONE, "EU"),
("Column", "item", "Distinctness", 1.0, DATE_ONE, "EU"),
("Column", "att1", "Completeness", 1.0, DATE_ONE, "EU"),
("Mutlicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
("Multicolumn", "att1,att2", "Uniqueness", 0.25, DATE_ONE, "EU"))
.toDF("entity", "instance", "name", "value", "dataset_date", "region")

assertSameRows(analysisResultsAsDataFrame, expected)
Expand All @@ -271,7 +271,7 @@ class AnalysisResultTest extends AnyWordSpec
|"region":"EU", "dataset_date":$DATE_ONE},
|{"entity":"Column","instance":"item","name":"Distinctness","value":1.0,
|"region":"EU", "dataset_date":$DATE_ONE},
|{"entity":"Mutlicolumn","instance":"att1,att2",
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25,
|"region":"EU", "dataset_date":$DATE_ONE}]"""
.stripMargin.replaceAll("\n", "")
Expand Down
Loading