diff --git a/src/main/resources/configs/deduplication_dbpedia.xml b/src/main/resources/configs/deduplication_dbpedia.xml
index b381b5ff..42bdbb09 100644
--- a/src/main/resources/configs/deduplication_dbpedia.xml
+++ b/src/main/resources/configs/deduplication_dbpedia.xml
@@ -19,14 +19,14 @@
id_dbpedia
- ExactMatchString
+ ExactMatch
1
id_wikidata
- ExactMatchString
+ ExactMatch
1
diff --git a/src/main/scala/de/hpi/ingestion/deduplication/similarity/ExactMatch.scala b/src/main/scala/de/hpi/ingestion/deduplication/similarity/ExactMatch.scala
index 4ceff6e4..4d30509e 100644
--- a/src/main/scala/de/hpi/ingestion/deduplication/similarity/ExactMatch.scala
+++ b/src/main/scala/de/hpi/ingestion/deduplication/similarity/ExactMatch.scala
@@ -18,9 +18,8 @@ package de.hpi.ingestion.deduplication.similarity
/**
* An abstract binary similarity measure for exact matching
- * @tparam T the type of data to be compared
*/
-abstract class ExactMatch[T] extends SimilarityMeasure[T] {
+object ExactMatch extends SimilarityMeasure[Any] {
/**
* Comparing the given objects on exact matching
* @param x object to be compared to y
@@ -28,15 +27,5 @@ abstract class ExactMatch[T] extends SimilarityMeasure[T] {
* @param u has no specific use in here
* @return 1.0 if given objects match exactly, 0.0 otherwise
*/
- override def compare(x: T, y: T, u: Int = 1) = if(x == y) 1.0 else 0.0
+ override def compare(x: Any, y: Any, u: Int = 1) = if(x == y) 1.0 else 0.0
}
-
-/**
- * A specific exact match similarity measure comparing strings
- */
-object ExactMatchString extends ExactMatch[String]
-
-/**
- * A specific exact match similarity measure comparing Doubles
- */
-object ExactMatchDouble extends ExactMatch[Double]
diff --git a/src/main/scala/de/hpi/ingestion/deduplication/similarity/SimilarityMeasure.scala b/src/main/scala/de/hpi/ingestion/deduplication/similarity/SimilarityMeasure.scala
index 20c285fc..d39a2104 100644
--- a/src/main/scala/de/hpi/ingestion/deduplication/similarity/SimilarityMeasure.scala
+++ b/src/main/scala/de/hpi/ingestion/deduplication/similarity/SimilarityMeasure.scala
@@ -20,7 +20,7 @@ package de.hpi.ingestion.deduplication.similarity
* Provides a method to measure the similarity of two objects
* @tparam T the type of the objects to be compared
*/
-trait SimilarityMeasure[T] extends Serializable {
+trait SimilarityMeasure[-T] extends Serializable {
/**
* Calculates a similarity score for two objects
@@ -38,8 +38,7 @@ trait SimilarityMeasure[T] extends Serializable {
*/
object SimilarityMeasure {
val dataTypes: Map[String, SimilarityMeasure[_]] = Map(
- "ExactMatchString" -> ExactMatchString,
- "ExactMatchDouble" -> ExactMatchDouble,
+ "ExactMatch" -> ExactMatch,
"MongeElkan" -> MongeElkan,
"Jaccard" -> Jaccard,
"DiceSorensen" -> DiceSorensen,
@@ -60,6 +59,6 @@ object SimilarityMeasure {
* @return the requested Similarity Measure if it exists or else Exact Match String as default
*/
def get[T](similarityMeasure: String): SimilarityMeasure[T] = {
- dataTypes.getOrElse(similarityMeasure, ExactMatchString).asInstanceOf[SimilarityMeasure[T]]
+ dataTypes.getOrElse(similarityMeasure, ExactMatch).asInstanceOf[SimilarityMeasure[T]]
}
}
diff --git a/src/test/resources/defaultDeduplication b/src/test/resources/defaultDeduplication
index 26496fa0..818ed6a9 100644
--- a/src/test/resources/defaultDeduplication
+++ b/src/test/resources/defaultDeduplication
@@ -27,7 +27,7 @@
-
name
- ExactMatchString
+ ExactMatch
0.2
1
diff --git a/src/test/resources/framework/test3.xml b/src/test/resources/framework/test3.xml
index 8dfc0995..d5edb8ef 100644
--- a/src/test/resources/framework/test3.xml
+++ b/src/test/resources/framework/test3.xml
@@ -15,7 +15,7 @@
category
- ExactMatchString
+ ExactMatch
1
diff --git a/src/test/scala/de/hpi/ingestion/deduplication/FeatureCalculationTest.scala b/src/test/scala/de/hpi/ingestion/deduplication/FeatureCalculationTest.scala
index 9f8040ce..86274983 100644
--- a/src/test/scala/de/hpi/ingestion/deduplication/FeatureCalculationTest.scala
+++ b/src/test/scala/de/hpi/ingestion/deduplication/FeatureCalculationTest.scala
@@ -19,12 +19,12 @@ package de.hpi.ingestion.deduplication
import com.holdenkarau.spark.testing.{RDDComparisons, SharedSparkContext}
import de.hpi.ingestion.deduplication.models.FeatureEntry
import de.hpi.ingestion.deduplication.models.config.SimilarityMeasureConfig
-import de.hpi.ingestion.deduplication.similarity.{ExactMatchString, SimilarityMeasure}
+import de.hpi.ingestion.deduplication.similarity.{ExactMatch, SimilarityMeasure}
import org.scalatest.{FlatSpec, Matchers}
class FeatureCalculationTest extends FlatSpec with Matchers with SharedSparkContext with RDDComparisons {
"compare" should "calculate a similarity score of two subjects from a given config" in {
- val config = SimilarityMeasureConfig[String, SimilarityMeasure[String]](ExactMatchString, 1.0)
+ val config = SimilarityMeasureConfig[String, SimilarityMeasure[String]](ExactMatch, 1.0)
val attribute = "geo_city"
val subject = TestData.subjects.head.get(attribute)
val staging = TestData.stagings.head.get(attribute)
@@ -35,7 +35,7 @@ class FeatureCalculationTest extends FlatSpec with Matchers with SharedSparkCont
}
it should "return 0.0 if one of the given subjects doesn't hold a property" in {
- val config = SimilarityMeasureConfig[String, SimilarityMeasure[String]](ExactMatchString, 1.0)
+ val config = SimilarityMeasureConfig[String, SimilarityMeasure[String]](ExactMatch, 1.0)
val attribute = "geo_city"
val subject = TestData.subjects.head.get(attribute)
val staging = TestData.subjects.last.get(attribute)
diff --git a/src/test/scala/de/hpi/ingestion/deduplication/similarity/ExactMatchUnitTest.scala b/src/test/scala/de/hpi/ingestion/deduplication/similarity/ExactMatchUnitTest.scala
index 8737277b..15277365 100644
--- a/src/test/scala/de/hpi/ingestion/deduplication/similarity/ExactMatchUnitTest.scala
+++ b/src/test/scala/de/hpi/ingestion/deduplication/similarity/ExactMatchUnitTest.scala
@@ -26,7 +26,7 @@ class ExactMatchUnitTest extends FlatSpec with Matchers {
("context", "context", 1.0))
testData.foreach(tuple =>
- ExactMatchString.compare(tuple._1, tuple._2) shouldEqual tuple._3)
+ ExactMatch.compare(tuple._1, tuple._2) shouldEqual tuple._3)
}
it should "return 1.0 or 0.0 for given doubles" in {
@@ -35,6 +35,6 @@ class ExactMatchUnitTest extends FlatSpec with Matchers {
(0.2, 0.4, 0.0))
testData.foreach(tuple =>
- ExactMatchDouble.compare(tuple._1, tuple._2) shouldEqual tuple._3)
+ ExactMatch.compare(tuple._1, tuple._2) shouldEqual tuple._3)
}
}
diff --git a/src/test/scala/de/hpi/ingestion/deduplication/similarity/SimilarityMeasureTest.scala b/src/test/scala/de/hpi/ingestion/deduplication/similarity/SimilarityMeasureTest.scala
index 226e530d..984a0d0f 100644
--- a/src/test/scala/de/hpi/ingestion/deduplication/similarity/SimilarityMeasureTest.scala
+++ b/src/test/scala/de/hpi/ingestion/deduplication/similarity/SimilarityMeasureTest.scala
@@ -21,8 +21,7 @@ import org.scalatest.{FlatSpec, Matchers}
class SimilarityMeasureTest extends FlatSpec with Matchers {
"Similarity Measure" should "be returned given its name" in {
- SimilarityMeasure.get[String]("ExactMatchString") shouldEqual ExactMatchString
- SimilarityMeasure.get[Double]("ExactMatchDouble") shouldEqual ExactMatchDouble
+ SimilarityMeasure.get[String]("ExactMatch") shouldEqual ExactMatch
SimilarityMeasure.get[String]("MongeElkan") shouldEqual MongeElkan
SimilarityMeasure.get[String]("Jaccard") shouldEqual Jaccard
SimilarityMeasure.get[String]("DiceSorensen") shouldEqual DiceSorensen
@@ -32,6 +31,6 @@ class SimilarityMeasureTest extends FlatSpec with Matchers {
SimilarityMeasure.get[String]("Overlap") shouldEqual Overlap
SimilarityMeasure.get[String]("EuclidianDistance") shouldEqual EuclidianDistance
SimilarityMeasure.get[String]("RelativeNumbersSimilarity") shouldEqual RelativeNumbersSimilarity
- SimilarityMeasure.get[String]("Not existing") shouldEqual ExactMatchString
+ SimilarityMeasure.get[Any]("Not existing") shouldEqual ExactMatch
}
}
diff --git a/src/test/scala/de/hpi/ingestion/framework/TestData.scala b/src/test/scala/de/hpi/ingestion/framework/TestData.scala
index 2637a93d..dd30a987 100644
--- a/src/test/scala/de/hpi/ingestion/framework/TestData.scala
+++ b/src/test/scala/de/hpi/ingestion/framework/TestData.scala
@@ -17,7 +17,7 @@ limitations under the License.
package de.hpi.ingestion.framework
import de.hpi.ingestion.deduplication.models.config.{AttributeConfig, SimilarityMeasureConfig}
-import de.hpi.ingestion.deduplication.similarity.{ExactMatchString, JaroWinkler, MongeElkan}
+import de.hpi.ingestion.deduplication.similarity.{ExactMatch, JaroWinkler, MongeElkan, SimilarityMeasure}
import scala.xml.{Node, XML}
@@ -48,7 +48,7 @@ object TestData {
"category",
0.5,
List(
- SimilarityMeasureConfig(similarityMeasure = ExactMatchString, weight = 1.0)
+ SimilarityMeasureConfig(similarityMeasure = ExactMatch, weight = 1.0)
)
)
)