-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #20 from pinecone-io/rshah/formatVectors
Reformat sparse vectors
- Loading branch information
Showing
13 changed files
with
220 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
[ | ||
{ | ||
"values": [ | ||
1, | ||
2, | ||
3 | ||
] | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[ | ||
{ | ||
"id": "v1" | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[ | ||
{ | ||
"id": "v1", | ||
"values": [ | ||
3, | ||
2, | ||
1 | ||
], | ||
"sparse_values": { | ||
} | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[ | ||
{ | ||
"id": "v1", | ||
"values": [ | ||
3, | ||
2, | ||
1 | ||
], | ||
"sparse_values": { | ||
"values": [ | ||
100, | ||
101 | ||
] | ||
} | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[ | ||
{ | ||
"id": "v1", | ||
"values": [ | ||
3, | ||
2, | ||
1 | ||
], | ||
"sparse_values": { | ||
"indices": [ | ||
1, | ||
2 | ||
] | ||
} | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
[ | ||
{ | ||
"id": "v1", | ||
"values": [ | ||
3, | ||
2, | ||
1 | ||
], | ||
"sparse_values": { | ||
"indices": [ | ||
null | ||
], | ||
"values": [ | ||
1 | ||
] | ||
} | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
[ | ||
{ | ||
"id": "v1", | ||
"values": [ | ||
3, | ||
2, | ||
1 | ||
], | ||
"sparse_values": { | ||
"indices": [ | ||
1 | ||
], | ||
"values": [ | ||
null | ||
] | ||
} | ||
} | ||
] |
This file was deleted.
Oops, something went wrong.
69 changes: 69 additions & 0 deletions
69
src/test/scala/io/pinecone/spark/pinecone/ParseCommonSchemaTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
package io.pinecone.spark.pinecone | ||
|
||
import org.apache.spark.sql.{SaveMode, SparkSession} | ||
import org.scalatest.flatspec.AnyFlatSpec | ||
import org.scalatest.matchers.should | ||
|
||
class ParseCommonSchemaTest extends AnyFlatSpec with should.Matchers { | ||
private val spark: SparkSession = SparkSession.builder() | ||
.appName("SchemaValidationTest") | ||
.master("local[2]") | ||
.getOrCreate() | ||
|
||
private val inputFilePath = System.getProperty("user.dir") + "/src/test/resources" | ||
|
||
private val apiKey = "some_api_key" | ||
private val environment = "us-east4-gcp" | ||
private val projectName = "f8e8d52" | ||
private val indexName = "step-test" | ||
|
||
private val pineconeOptions: Map[String, String] = Map( | ||
PineconeOptions.PINECONE_API_KEY_CONF -> apiKey, | ||
PineconeOptions.PINECONE_ENVIRONMENT_CONF -> environment, | ||
PineconeOptions.PINECONE_PROJECT_NAME_CONF -> projectName, | ||
PineconeOptions.PINECONE_INDEX_NAME_CONF -> indexName | ||
) | ||
|
||
def afterAll(): Unit = { | ||
if (spark != null) { | ||
spark.stop() | ||
} | ||
} | ||
|
||
def testInvalidJSON(file: String, testName: String): Unit = { | ||
it should testName in { | ||
val sparkException = intercept[org.apache.spark.SparkException] { | ||
val df = spark.read | ||
.option("multiLine", value = true) | ||
.option("mode", "PERMISSIVE") | ||
.schema(COMMON_SCHEMA) | ||
.json(file) | ||
.repartition(2) | ||
|
||
df.write | ||
.options(pineconeOptions) | ||
.format("io.pinecone.spark.pinecone.Pinecone") | ||
.mode(SaveMode.Append) | ||
.save() | ||
} | ||
sparkException | ||
.getCause | ||
.toString should include("java.lang.NullPointerException: Null value appeared in non-nullable field:") | ||
} | ||
} | ||
|
||
testInvalidJSON(s"$inputFilePath/invalidUpsertInput1.jsonl", | ||
"throw exception for missing id") | ||
testInvalidJSON(s"$inputFilePath/invalidUpsertInput2.jsonl", | ||
"throw exception for missing values") | ||
testInvalidJSON(s"$inputFilePath/invalidUpsertInput3.jsonl", | ||
"throw exception for missing sparse vector indices and values if sparse_values is defined") | ||
testInvalidJSON(s"$inputFilePath/invalidUpsertInput4.jsonl", | ||
"throw exception for missing sparse vector indices if sparse_values and its values are defined") | ||
testInvalidJSON(s"$inputFilePath/invalidUpsertInput5.jsonl", | ||
"throw exception for missing sparse vector values if sparse_values and its indices are defined") | ||
testInvalidJSON(s"$inputFilePath/invalidUpsertInput6.jsonl", | ||
"throw exception for null in sparse vector indices") | ||
testInvalidJSON(s"$inputFilePath/invalidUpsertInput7.jsonl", | ||
"throw exception for null in sparse vector values") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
ThisBuild / version := "0.2.0" | ||
ThisBuild / version := "0.2.1" |