From 88e0f5b97e1927ca39da724f4ff713f9906f77dd Mon Sep 17 00:00:00 2001 From: eldernewborn Date: Wed, 18 Sep 2024 09:29:28 -0700 Subject: [PATCH] [spark] [hadoop] [vpj] Refactor to make spark a first class citizen of com.linkedin.venice (#1183) * Refactor to make spark a first class citizen of com.linkedin.venice * refactor tests in the same fashion --- .../{hadoop => }/spark/SparkConstants.java | 2 +- .../jobs/AbstractDataWriterSparkJob.java | 40 +++++++++---------- .../datawriter/jobs/DataWriterSparkJob.java | 4 +- .../datawriter/partition/PartitionSorter.java | 5 ++- .../partition/VeniceSparkPartitioner.java | 2 +- .../SparkInputRecordProcessor.java | 19 ++++----- .../SparkInputRecordProcessorFactory.java | 4 +- .../task/DataWriterAccumulators.java | 2 +- .../task/SparkDataWriterTaskTracker.java | 2 +- .../writer/SparkPartitionWriter.java | 12 +++--- .../writer/SparkPartitionWriterFactory.java | 4 +- .../engine/SparkEngineTaskConfigProvider.java | 4 +- .../input/VeniceAbstractPartitionReader.java | 2 +- .../input/hdfs/VeniceHdfsInputPartition.java | 2 +- .../hdfs/VeniceHdfsInputPartitionReader.java | 4 +- ...VeniceHdfsInputPartitionReaderFactory.java | 2 +- .../spark/input/hdfs/VeniceHdfsInputScan.java | 4 +- .../hdfs/VeniceHdfsInputScanBuilder.java | 2 +- .../input/hdfs/VeniceHdfsInputTable.java | 4 +- .../spark/input/hdfs/VeniceHdfsSource.java | 4 +- .../spark/utils/SparkPartitionUtils.java | 2 +- .../spark/utils/SparkScalaUtils.java | 2 +- .../jobs/AbstractDataWriterSparkJobTest.java | 12 +++--- .../partition/PartitionSorterTest.java | 2 +- .../partition/VeniceSparkPartitionerTest.java | 4 +- .../task/SparkDataWriterTaskTrackerTest.java | 4 +- .../SparkEngineTaskConfigProviderTest.java | 4 +- .../input/hdfs/TestSparkInputFromHdfs.java | 6 +-- .../spark/utils/SparkScalaUtilsTest.java | 6 +-- .../venice/endToEnd/PartialUpdateTest.java | 2 +- .../linkedin/venice/endToEnd/TestBatch.java | 2 +- .../venice/endToEnd/TestVsonStoreBatch.java | 2 +- 32 files changed, 85 insertions(+), 87 deletions(-) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/SparkConstants.java (98%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/jobs/AbstractDataWriterSparkJob.java (93%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/jobs/DataWriterSparkJob.java (95%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/partition/PartitionSorter.java (78%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/partition/VeniceSparkPartitioner.java (97%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/recordprocessor/SparkInputRecordProcessor.java (73%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/recordprocessor/SparkInputRecordProcessorFactory.java (88%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/task/DataWriterAccumulators.java (97%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/task/SparkDataWriterTaskTracker.java (98%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/writer/SparkPartitionWriter.java (78%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/datawriter/writer/SparkPartitionWriterFactory.java (86%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/engine/SparkEngineTaskConfigProvider.java (85%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/input/VeniceAbstractPartitionReader.java (96%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/input/hdfs/VeniceHdfsInputPartition.java (93%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/input/hdfs/VeniceHdfsInputPartitionReader.java (93%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/input/hdfs/VeniceHdfsInputPartitionReaderFactory.java (94%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/input/hdfs/VeniceHdfsInputScan.java (95%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/input/hdfs/VeniceHdfsInputScanBuilder.java (89%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/input/hdfs/VeniceHdfsInputTable.java (91%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/input/hdfs/VeniceHdfsSource.java (89%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/utils/SparkPartitionUtils.java (96%) rename clients/venice-push-job/src/main/java/com/linkedin/venice/{hadoop => }/spark/utils/SparkScalaUtils.java (94%) rename clients/venice-push-job/src/test/java/com/linkedin/venice/{hadoop => }/spark/datawriter/jobs/AbstractDataWriterSparkJobTest.java (96%) rename clients/venice-push-job/src/test/java/com/linkedin/venice/{hadoop => }/spark/datawriter/partition/PartitionSorterTest.java (95%) rename clients/venice-push-job/src/test/java/com/linkedin/venice/{hadoop => }/spark/datawriter/partition/VeniceSparkPartitionerTest.java (96%) rename clients/venice-push-job/src/test/java/com/linkedin/venice/{hadoop => }/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java (98%) rename clients/venice-push-job/src/test/java/com/linkedin/venice/{hadoop => }/spark/engine/SparkEngineTaskConfigProviderTest.java (95%) rename clients/venice-push-job/src/test/java/com/linkedin/venice/{hadoop => }/spark/input/hdfs/TestSparkInputFromHdfs.java (98%) rename clients/venice-push-job/src/test/java/com/linkedin/venice/{hadoop => }/spark/utils/SparkScalaUtilsTest.java (80%) diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/SparkConstants.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/SparkConstants.java similarity index 98% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/SparkConstants.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/SparkConstants.java index 657c8d0e79..e8ba243b85 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/SparkConstants.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/SparkConstants.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark; +package com.linkedin.venice.spark; import static org.apache.spark.sql.types.DataTypes.BinaryType; import static org.apache.spark.sql.types.DataTypes.IntegerType; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/jobs/AbstractDataWriterSparkJob.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/jobs/AbstractDataWriterSparkJob.java similarity index 93% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/jobs/AbstractDataWriterSparkJob.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/jobs/AbstractDataWriterSparkJob.java index 5587c86fbc..769e5ee3a7 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/jobs/AbstractDataWriterSparkJob.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/jobs/AbstractDataWriterSparkJob.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.datawriter.jobs; +package com.linkedin.venice.spark.datawriter.jobs; import static com.linkedin.venice.ConfigKeys.KAFKA_BOOTSTRAP_SERVERS; import static com.linkedin.venice.ConfigKeys.KAFKA_PRODUCER_DELIVERY_TIMEOUT_MS; @@ -36,17 +36,17 @@ import static com.linkedin.venice.hadoop.VenicePushJobConstants.ZSTD_COMPRESSION_LEVEL; import static com.linkedin.venice.hadoop.VenicePushJobConstants.ZSTD_DICTIONARY_CREATION_REQUIRED; import static com.linkedin.venice.hadoop.VenicePushJobConstants.ZSTD_DICTIONARY_CREATION_SUCCESS; -import static com.linkedin.venice.hadoop.spark.SparkConstants.DEFAULT_SCHEMA; -import static com.linkedin.venice.hadoop.spark.SparkConstants.DEFAULT_SCHEMA_WITH_PARTITION; -import static com.linkedin.venice.hadoop.spark.SparkConstants.DEFAULT_SPARK_CLUSTER; -import static com.linkedin.venice.hadoop.spark.SparkConstants.KEY_COLUMN_NAME; -import static com.linkedin.venice.hadoop.spark.SparkConstants.PARTITION_COLUMN_NAME; -import static com.linkedin.venice.hadoop.spark.SparkConstants.SPARK_CASE_SENSITIVE_CONFIG; -import static com.linkedin.venice.hadoop.spark.SparkConstants.SPARK_CLUSTER_CONFIG; -import static com.linkedin.venice.hadoop.spark.SparkConstants.SPARK_DATA_WRITER_CONF_PREFIX; -import static com.linkedin.venice.hadoop.spark.SparkConstants.SPARK_LEADER_CONFIG; -import static com.linkedin.venice.hadoop.spark.SparkConstants.SPARK_SESSION_CONF_PREFIX; -import static com.linkedin.venice.hadoop.spark.SparkConstants.VALUE_COLUMN_NAME; +import static com.linkedin.venice.spark.SparkConstants.DEFAULT_SCHEMA; +import static com.linkedin.venice.spark.SparkConstants.DEFAULT_SCHEMA_WITH_PARTITION; +import static com.linkedin.venice.spark.SparkConstants.DEFAULT_SPARK_CLUSTER; +import static com.linkedin.venice.spark.SparkConstants.KEY_COLUMN_NAME; +import static com.linkedin.venice.spark.SparkConstants.PARTITION_COLUMN_NAME; +import static com.linkedin.venice.spark.SparkConstants.SPARK_CASE_SENSITIVE_CONFIG; +import static com.linkedin.venice.spark.SparkConstants.SPARK_CLUSTER_CONFIG; +import static com.linkedin.venice.spark.SparkConstants.SPARK_DATA_WRITER_CONF_PREFIX; +import static com.linkedin.venice.spark.SparkConstants.SPARK_LEADER_CONFIG; +import static com.linkedin.venice.spark.SparkConstants.SPARK_SESSION_CONF_PREFIX; +import static com.linkedin.venice.spark.SparkConstants.VALUE_COLUMN_NAME; import com.github.luben.zstd.Zstd; import com.linkedin.venice.compression.CompressionStrategy; @@ -55,16 +55,16 @@ import com.linkedin.venice.hadoop.PushJobSetting; import com.linkedin.venice.hadoop.input.kafka.ttl.TTLResolutionPolicy; import com.linkedin.venice.hadoop.jobs.DataWriterComputeJob; -import com.linkedin.venice.hadoop.spark.datawriter.partition.PartitionSorter; -import com.linkedin.venice.hadoop.spark.datawriter.partition.VeniceSparkPartitioner; -import com.linkedin.venice.hadoop.spark.datawriter.recordprocessor.SparkInputRecordProcessorFactory; -import com.linkedin.venice.hadoop.spark.datawriter.task.DataWriterAccumulators; -import com.linkedin.venice.hadoop.spark.datawriter.task.SparkDataWriterTaskTracker; -import com.linkedin.venice.hadoop.spark.datawriter.writer.SparkPartitionWriterFactory; -import com.linkedin.venice.hadoop.spark.utils.SparkPartitionUtils; -import com.linkedin.venice.hadoop.spark.utils.SparkScalaUtils; import com.linkedin.venice.hadoop.ssl.TempFileSSLConfigurator; import com.linkedin.venice.hadoop.task.datawriter.DataWriterTaskTracker; +import com.linkedin.venice.spark.datawriter.partition.PartitionSorter; +import com.linkedin.venice.spark.datawriter.partition.VeniceSparkPartitioner; +import com.linkedin.venice.spark.datawriter.recordprocessor.SparkInputRecordProcessorFactory; +import com.linkedin.venice.spark.datawriter.task.DataWriterAccumulators; +import com.linkedin.venice.spark.datawriter.task.SparkDataWriterTaskTracker; +import com.linkedin.venice.spark.datawriter.writer.SparkPartitionWriterFactory; +import com.linkedin.venice.spark.utils.SparkPartitionUtils; +import com.linkedin.venice.spark.utils.SparkScalaUtils; import com.linkedin.venice.utils.VeniceProperties; import com.linkedin.venice.writer.VeniceWriter; import java.io.IOException; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/jobs/DataWriterSparkJob.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/jobs/DataWriterSparkJob.java similarity index 95% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/jobs/DataWriterSparkJob.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/jobs/DataWriterSparkJob.java index 5cd2e171a1..5bf7b5b82b 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/jobs/DataWriterSparkJob.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/jobs/DataWriterSparkJob.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.datawriter.jobs; +package com.linkedin.venice.spark.datawriter.jobs; import static com.linkedin.venice.hadoop.VenicePushJobConstants.ETL_VALUE_SCHEMA_TRANSFORMATION; import static com.linkedin.venice.hadoop.VenicePushJobConstants.FILE_KEY_SCHEMA; @@ -12,7 +12,7 @@ import static com.linkedin.venice.hadoop.VenicePushJobConstants.VSON_PUSH; import com.linkedin.venice.hadoop.PushJobSetting; -import com.linkedin.venice.hadoop.spark.input.hdfs.VeniceHdfsSource; +import com.linkedin.venice.spark.input.hdfs.VeniceHdfsSource; import org.apache.hadoop.fs.Path; import org.apache.spark.sql.DataFrameReader; import org.apache.spark.sql.Dataset; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/partition/PartitionSorter.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/partition/PartitionSorter.java similarity index 78% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/partition/PartitionSorter.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/partition/PartitionSorter.java index 61ca53d2d0..f4b591ce69 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/partition/PartitionSorter.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/partition/PartitionSorter.java @@ -1,5 +1,6 @@ -package com.linkedin.venice.hadoop.spark.datawriter.partition; +package com.linkedin.venice.spark.datawriter.partition; +import com.linkedin.venice.spark.datawriter.writer.SparkPartitionWriter; import com.linkedin.venice.utils.ArrayUtils; import java.io.Serializable; import java.util.Comparator; @@ -10,7 +11,7 @@ * Sort the rows based on the key and value in ascending order using unsigned byte comparison. * */ public class PartitionSorter implements Comparator, Serializable { diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/partition/VeniceSparkPartitioner.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitioner.java similarity index 97% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/partition/VeniceSparkPartitioner.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitioner.java index d7caa4a2e4..ebd9727ac2 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/partition/VeniceSparkPartitioner.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitioner.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.datawriter.partition; +package com.linkedin.venice.spark.datawriter.partition; import static com.linkedin.venice.hadoop.VenicePushJobConstants.PARTITION_COUNT; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/recordprocessor/SparkInputRecordProcessor.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/recordprocessor/SparkInputRecordProcessor.java similarity index 73% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/recordprocessor/SparkInputRecordProcessor.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/recordprocessor/SparkInputRecordProcessor.java index 256f8d7678..54c683461e 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/recordprocessor/SparkInputRecordProcessor.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/recordprocessor/SparkInputRecordProcessor.java @@ -1,16 +1,13 @@ -package com.linkedin.venice.hadoop.spark.datawriter.recordprocessor; - -import static com.linkedin.venice.hadoop.spark.SparkConstants.DEFAULT_SCHEMA; -import static com.linkedin.venice.hadoop.spark.SparkConstants.KEY_COLUMN_NAME; -import static com.linkedin.venice.hadoop.spark.SparkConstants.VALUE_COLUMN_NAME; +package com.linkedin.venice.spark.datawriter.recordprocessor; import com.linkedin.venice.hadoop.input.recordreader.AbstractVeniceRecordReader; import com.linkedin.venice.hadoop.input.recordreader.avro.IdentityVeniceRecordReader; -import com.linkedin.venice.hadoop.spark.datawriter.task.DataWriterAccumulators; -import com.linkedin.venice.hadoop.spark.datawriter.task.SparkDataWriterTaskTracker; -import com.linkedin.venice.hadoop.spark.engine.SparkEngineTaskConfigProvider; import com.linkedin.venice.hadoop.task.datawriter.AbstractInputRecordProcessor; import com.linkedin.venice.hadoop.task.datawriter.DataWriterTaskTracker; +import com.linkedin.venice.spark.SparkConstants; +import com.linkedin.venice.spark.datawriter.task.DataWriterAccumulators; +import com.linkedin.venice.spark.datawriter.task.SparkDataWriterTaskTracker; +import com.linkedin.venice.spark.engine.SparkEngineTaskConfigProvider; import com.linkedin.venice.utils.VeniceProperties; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -36,8 +33,8 @@ public SparkInputRecordProcessor(Properties jobProperties, DataWriterAccumulator public Iterator processRecord(Row record) { List outputRows = new ArrayList<>(); - ByteBuffer keyBB = ByteBuffer.wrap(record.getAs(KEY_COLUMN_NAME)); - byte[] value = record.getAs(VALUE_COLUMN_NAME); + ByteBuffer keyBB = ByteBuffer.wrap(record.getAs(SparkConstants.KEY_COLUMN_NAME)); + byte[] value = record.getAs(SparkConstants.VALUE_COLUMN_NAME); ByteBuffer valueBB = value == null ? null : ByteBuffer.wrap(value); super.processRecord(keyBB, valueBB, getRecordEmitter(outputRows), dataWriterTaskTracker); return outputRows.iterator(); @@ -50,7 +47,7 @@ protected AbstractVeniceRecordReader getRecordReader(Ven private BiConsumer getRecordEmitter(List rows) { return (key, value) -> { - rows.add(new GenericRowWithSchema(new Object[] { key, value }, DEFAULT_SCHEMA)); + rows.add(new GenericRowWithSchema(new Object[] { key, value }, SparkConstants.DEFAULT_SCHEMA)); }; } } diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/recordprocessor/SparkInputRecordProcessorFactory.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/recordprocessor/SparkInputRecordProcessorFactory.java similarity index 88% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/recordprocessor/SparkInputRecordProcessorFactory.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/recordprocessor/SparkInputRecordProcessorFactory.java index ba8bb24570..d07d7af04f 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/recordprocessor/SparkInputRecordProcessorFactory.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/recordprocessor/SparkInputRecordProcessorFactory.java @@ -1,6 +1,6 @@ -package com.linkedin.venice.hadoop.spark.datawriter.recordprocessor; +package com.linkedin.venice.spark.datawriter.recordprocessor; -import com.linkedin.venice.hadoop.spark.datawriter.task.DataWriterAccumulators; +import com.linkedin.venice.spark.datawriter.task.DataWriterAccumulators; import java.util.Iterator; import java.util.Properties; import org.apache.spark.api.java.function.FlatMapFunction; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/task/DataWriterAccumulators.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/task/DataWriterAccumulators.java similarity index 97% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/task/DataWriterAccumulators.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/task/DataWriterAccumulators.java index aa5252adfa..6e3477e5e9 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/task/DataWriterAccumulators.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/task/DataWriterAccumulators.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.datawriter.task; +package com.linkedin.venice.spark.datawriter.task; import java.io.Serializable; import org.apache.spark.SparkContext; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/task/SparkDataWriterTaskTracker.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTracker.java similarity index 98% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/task/SparkDataWriterTaskTracker.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTracker.java index d6bc19ae75..debfe71de4 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/task/SparkDataWriterTaskTracker.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTracker.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.datawriter.task; +package com.linkedin.venice.spark.datawriter.task; import com.linkedin.venice.hadoop.task.datawriter.DataWriterTaskTracker; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/writer/SparkPartitionWriter.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/writer/SparkPartitionWriter.java similarity index 78% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/writer/SparkPartitionWriter.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/writer/SparkPartitionWriter.java index 61bb0a3237..271e811391 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/writer/SparkPartitionWriter.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/writer/SparkPartitionWriter.java @@ -1,12 +1,12 @@ -package com.linkedin.venice.hadoop.spark.datawriter.writer; +package com.linkedin.venice.spark.datawriter.writer; -import static com.linkedin.venice.hadoop.spark.SparkConstants.KEY_COLUMN_NAME; -import static com.linkedin.venice.hadoop.spark.SparkConstants.VALUE_COLUMN_NAME; +import static com.linkedin.venice.spark.SparkConstants.KEY_COLUMN_NAME; +import static com.linkedin.venice.spark.SparkConstants.VALUE_COLUMN_NAME; -import com.linkedin.venice.hadoop.spark.datawriter.task.DataWriterAccumulators; -import com.linkedin.venice.hadoop.spark.datawriter.task.SparkDataWriterTaskTracker; -import com.linkedin.venice.hadoop.spark.engine.SparkEngineTaskConfigProvider; import com.linkedin.venice.hadoop.task.datawriter.AbstractPartitionWriter; +import com.linkedin.venice.spark.datawriter.task.DataWriterAccumulators; +import com.linkedin.venice.spark.datawriter.task.SparkDataWriterTaskTracker; +import com.linkedin.venice.spark.engine.SparkEngineTaskConfigProvider; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/writer/SparkPartitionWriterFactory.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/writer/SparkPartitionWriterFactory.java similarity index 86% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/writer/SparkPartitionWriterFactory.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/writer/SparkPartitionWriterFactory.java index a5f2108fed..6f6f689c93 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/datawriter/writer/SparkPartitionWriterFactory.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/datawriter/writer/SparkPartitionWriterFactory.java @@ -1,6 +1,6 @@ -package com.linkedin.venice.hadoop.spark.datawriter.writer; +package com.linkedin.venice.spark.datawriter.writer; -import com.linkedin.venice.hadoop.spark.datawriter.task.DataWriterAccumulators; +import com.linkedin.venice.spark.datawriter.task.DataWriterAccumulators; import java.util.Iterator; import java.util.Properties; import org.apache.spark.api.java.function.MapPartitionsFunction; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/engine/SparkEngineTaskConfigProvider.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProvider.java similarity index 85% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/engine/SparkEngineTaskConfigProvider.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProvider.java index 6adb45660b..cb59990161 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/engine/SparkEngineTaskConfigProvider.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProvider.java @@ -1,6 +1,6 @@ -package com.linkedin.venice.hadoop.spark.engine; +package com.linkedin.venice.spark.engine; -import static com.linkedin.venice.hadoop.spark.SparkConstants.SPARK_APP_NAME_CONFIG; +import static com.linkedin.venice.spark.SparkConstants.SPARK_APP_NAME_CONFIG; import com.linkedin.venice.hadoop.engine.EngineTaskConfigProvider; import java.util.Properties; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/VeniceAbstractPartitionReader.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/VeniceAbstractPartitionReader.java similarity index 96% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/VeniceAbstractPartitionReader.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/VeniceAbstractPartitionReader.java index 43a31eae8a..1b2d1ddb9e 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/VeniceAbstractPartitionReader.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/VeniceAbstractPartitionReader.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.input; +package com.linkedin.venice.spark.input; import com.linkedin.venice.hadoop.input.recordreader.VeniceRecordIterator; import com.linkedin.venice.utils.Utils; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputPartition.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputPartition.java similarity index 93% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputPartition.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputPartition.java index 48325ecc34..981ebe8d59 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputPartition.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputPartition.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.input.hdfs; +package com.linkedin.venice.spark.input.hdfs; import java.net.URI; import org.apache.hadoop.fs.Path; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputPartitionReader.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputPartitionReader.java similarity index 93% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputPartitionReader.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputPartitionReader.java index 3bacdfd35f..c48ff758a5 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputPartitionReader.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputPartitionReader.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.input.hdfs; +package com.linkedin.venice.spark.input.hdfs; import static com.linkedin.venice.hadoop.VenicePushJobConstants.VSON_PUSH; @@ -8,7 +8,7 @@ import com.linkedin.venice.hadoop.input.recordreader.avro.VeniceAvroRecordReader; import com.linkedin.venice.hadoop.input.recordreader.vson.VeniceVsonFileIterator; import com.linkedin.venice.hadoop.input.recordreader.vson.VeniceVsonRecordReader; -import com.linkedin.venice.hadoop.spark.input.VeniceAbstractPartitionReader; +import com.linkedin.venice.spark.input.VeniceAbstractPartitionReader; import com.linkedin.venice.utils.VeniceProperties; import java.io.IOException; import org.apache.hadoop.conf.Configuration; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputPartitionReaderFactory.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputPartitionReaderFactory.java similarity index 94% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputPartitionReaderFactory.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputPartitionReaderFactory.java index 49d9b58d42..01c62b3938 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputPartitionReaderFactory.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputPartitionReaderFactory.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.input.hdfs; +package com.linkedin.venice.spark.input.hdfs; import com.linkedin.venice.utils.VeniceProperties; import org.apache.spark.sql.catalyst.InternalRow; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputScan.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputScan.java similarity index 95% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputScan.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputScan.java index b24b111a77..7dcb56369c 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputScan.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputScan.java @@ -1,10 +1,10 @@ -package com.linkedin.venice.hadoop.spark.input.hdfs; +package com.linkedin.venice.spark.input.hdfs; import static com.linkedin.venice.hadoop.VenicePushJobConstants.INPUT_PATH_PROP; import static com.linkedin.venice.hadoop.VenicePushJobConstants.PATH_FILTER; import com.linkedin.venice.exceptions.VeniceException; -import com.linkedin.venice.hadoop.spark.SparkConstants; +import com.linkedin.venice.spark.SparkConstants; import com.linkedin.venice.utils.VeniceProperties; import java.io.IOException; import java.util.ArrayList; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputScanBuilder.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputScanBuilder.java similarity index 89% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputScanBuilder.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputScanBuilder.java index 42e900e12e..b35058062e 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputScanBuilder.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputScanBuilder.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.input.hdfs; +package com.linkedin.venice.spark.input.hdfs; import com.linkedin.venice.utils.VeniceProperties; import org.apache.spark.sql.connector.read.Scan; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputTable.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputTable.java similarity index 91% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputTable.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputTable.java index 5b8b3bbca4..14fe5c6bcd 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsInputTable.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsInputTable.java @@ -1,6 +1,6 @@ -package com.linkedin.venice.hadoop.spark.input.hdfs; +package com.linkedin.venice.spark.input.hdfs; -import static com.linkedin.venice.hadoop.spark.SparkConstants.DEFAULT_SCHEMA; +import static com.linkedin.venice.spark.SparkConstants.DEFAULT_SCHEMA; import com.linkedin.venice.utils.VeniceProperties; import java.util.Collections; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsSource.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsSource.java similarity index 89% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsSource.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsSource.java index 9e76f4f3c1..2c2fc90445 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/input/hdfs/VeniceHdfsSource.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/input/hdfs/VeniceHdfsSource.java @@ -1,6 +1,6 @@ -package com.linkedin.venice.hadoop.spark.input.hdfs; +package com.linkedin.venice.spark.input.hdfs; -import static com.linkedin.venice.hadoop.spark.SparkConstants.DEFAULT_SCHEMA; +import static com.linkedin.venice.spark.SparkConstants.DEFAULT_SCHEMA; import com.linkedin.venice.utils.VeniceProperties; import java.util.Map; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/utils/SparkPartitionUtils.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/utils/SparkPartitionUtils.java similarity index 96% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/utils/SparkPartitionUtils.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/utils/SparkPartitionUtils.java index 9fb1758a33..0b429c84a3 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/utils/SparkPartitionUtils.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/utils/SparkPartitionUtils.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.utils; +package com.linkedin.venice.spark.utils; import java.util.Comparator; import org.apache.spark.Partitioner; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/utils/SparkScalaUtils.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/utils/SparkScalaUtils.java similarity index 94% rename from clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/utils/SparkScalaUtils.java rename to clients/venice-push-job/src/main/java/com/linkedin/venice/spark/utils/SparkScalaUtils.java index 33e3ac1ba4..ce42a0be16 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/spark/utils/SparkScalaUtils.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/spark/utils/SparkScalaUtils.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.utils; +package com.linkedin.venice.spark.utils; import org.apache.spark.sql.types.StructType; import scala.Int; diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/jobs/AbstractDataWriterSparkJobTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/jobs/AbstractDataWriterSparkJobTest.java similarity index 96% rename from clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/jobs/AbstractDataWriterSparkJobTest.java rename to clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/jobs/AbstractDataWriterSparkJobTest.java index 5c2b198ff1..a6e6628988 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/jobs/AbstractDataWriterSparkJobTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/jobs/AbstractDataWriterSparkJobTest.java @@ -1,14 +1,14 @@ -package com.linkedin.venice.hadoop.spark.datawriter.jobs; +package com.linkedin.venice.spark.datawriter.jobs; import static com.linkedin.venice.ConfigKeys.KAFKA_CONFIG_PREFIX; import static com.linkedin.venice.hadoop.VenicePushJobConstants.DEFAULT_KEY_FIELD_PROP; import static com.linkedin.venice.hadoop.VenicePushJobConstants.DEFAULT_VALUE_FIELD_PROP; -import static com.linkedin.venice.hadoop.spark.SparkConstants.KEY_COLUMN_NAME; -import static com.linkedin.venice.hadoop.spark.SparkConstants.SPARK_APP_NAME_CONFIG; -import static com.linkedin.venice.hadoop.spark.SparkConstants.SPARK_DATA_WRITER_CONF_PREFIX; -import static com.linkedin.venice.hadoop.spark.SparkConstants.SPARK_SESSION_CONF_PREFIX; -import static com.linkedin.venice.hadoop.spark.SparkConstants.VALUE_COLUMN_NAME; import static com.linkedin.venice.meta.Store.UNLIMITED_STORAGE_QUOTA; +import static com.linkedin.venice.spark.SparkConstants.KEY_COLUMN_NAME; +import static com.linkedin.venice.spark.SparkConstants.SPARK_APP_NAME_CONFIG; +import static com.linkedin.venice.spark.SparkConstants.SPARK_DATA_WRITER_CONF_PREFIX; +import static com.linkedin.venice.spark.SparkConstants.SPARK_SESSION_CONF_PREFIX; +import static com.linkedin.venice.spark.SparkConstants.VALUE_COLUMN_NAME; import static org.apache.spark.sql.types.DataTypes.BinaryType; import static org.apache.spark.sql.types.DataTypes.StringType; diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/partition/PartitionSorterTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/PartitionSorterTest.java similarity index 95% rename from clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/partition/PartitionSorterTest.java rename to clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/PartitionSorterTest.java index 289adee8dd..727c6418da 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/partition/PartitionSorterTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/PartitionSorterTest.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.datawriter.partition; +package com.linkedin.venice.spark.datawriter.partition; import org.apache.spark.sql.Row; import org.apache.spark.sql.RowFactory; diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/partition/VeniceSparkPartitionerTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitionerTest.java similarity index 96% rename from clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/partition/VeniceSparkPartitionerTest.java rename to clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitionerTest.java index 320ad8d69c..b7d78153e2 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/partition/VeniceSparkPartitionerTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/partition/VeniceSparkPartitionerTest.java @@ -1,11 +1,11 @@ -package com.linkedin.venice.hadoop.spark.datawriter.partition; +package com.linkedin.venice.spark.datawriter.partition; import static com.linkedin.venice.hadoop.VenicePushJobConstants.PARTITION_COUNT; import com.linkedin.venice.exceptions.VeniceException; -import com.linkedin.venice.hadoop.spark.SparkConstants; import com.linkedin.venice.partitioner.DefaultVenicePartitioner; import com.linkedin.venice.partitioner.VenicePartitioner; +import com.linkedin.venice.spark.SparkConstants; import com.linkedin.venice.utils.ByteUtils; import com.linkedin.venice.utils.VeniceProperties; import java.util.Properties; diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java similarity index 98% rename from clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java rename to clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java index c036f0eb4d..b0f39d5537 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/datawriter/task/SparkDataWriterTaskTrackerTest.java @@ -1,6 +1,6 @@ -package com.linkedin.venice.hadoop.spark.datawriter.task; +package com.linkedin.venice.spark.datawriter.task; -import com.linkedin.venice.hadoop.spark.SparkConstants; +import com.linkedin.venice.spark.SparkConstants; import org.apache.spark.sql.SparkSession; import org.testng.Assert; import org.testng.annotations.AfterClass; diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/engine/SparkEngineTaskConfigProviderTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProviderTest.java similarity index 95% rename from clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/engine/SparkEngineTaskConfigProviderTest.java rename to clients/venice-push-job/src/test/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProviderTest.java index cb244cd670..0bbacb2862 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/engine/SparkEngineTaskConfigProviderTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/engine/SparkEngineTaskConfigProviderTest.java @@ -1,6 +1,6 @@ -package com.linkedin.venice.hadoop.spark.engine; +package com.linkedin.venice.spark.engine; -import com.linkedin.venice.hadoop.spark.SparkConstants; +import com.linkedin.venice.spark.SparkConstants; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/input/hdfs/TestSparkInputFromHdfs.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/input/hdfs/TestSparkInputFromHdfs.java similarity index 98% rename from clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/input/hdfs/TestSparkInputFromHdfs.java rename to clients/venice-push-job/src/test/java/com/linkedin/venice/spark/input/hdfs/TestSparkInputFromHdfs.java index a7b7237366..c47def0872 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/input/hdfs/TestSparkInputFromHdfs.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/input/hdfs/TestSparkInputFromHdfs.java @@ -1,4 +1,4 @@ -package com.linkedin.venice.hadoop.spark.input.hdfs; +package com.linkedin.venice.spark.input.hdfs; import static com.linkedin.venice.hadoop.VenicePushJobConstants.DEFAULT_KEY_FIELD_PROP; import static com.linkedin.venice.hadoop.VenicePushJobConstants.DEFAULT_VALUE_FIELD_PROP; @@ -11,8 +11,8 @@ import static com.linkedin.venice.hadoop.VenicePushJobConstants.UPDATE_SCHEMA_STRING_PROP; import static com.linkedin.venice.hadoop.VenicePushJobConstants.VALUE_FIELD_PROP; import static com.linkedin.venice.hadoop.VenicePushJobConstants.VSON_PUSH; -import static com.linkedin.venice.hadoop.spark.SparkConstants.DEFAULT_SCHEMA; -import static com.linkedin.venice.hadoop.spark.input.hdfs.VeniceHdfsInputTable.INPUT_TABLE_NAME; +import static com.linkedin.venice.spark.SparkConstants.DEFAULT_SCHEMA; +import static com.linkedin.venice.spark.input.hdfs.VeniceHdfsInputTable.INPUT_TABLE_NAME; import static com.linkedin.venice.utils.TestWriteUtils.DEFAULT_USER_DATA_VALUE_PREFIX; import static com.linkedin.venice.utils.TestWriteUtils.NAME_RECORD_V2_SCHEMA; import static com.linkedin.venice.utils.TestWriteUtils.STRING_TO_STRING_SCHEMA; diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/utils/SparkScalaUtilsTest.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/utils/SparkScalaUtilsTest.java similarity index 80% rename from clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/utils/SparkScalaUtilsTest.java rename to clients/venice-push-job/src/test/java/com/linkedin/venice/spark/utils/SparkScalaUtilsTest.java index 7081393d26..badf34aea8 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/spark/utils/SparkScalaUtilsTest.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/spark/utils/SparkScalaUtilsTest.java @@ -1,7 +1,7 @@ -package com.linkedin.venice.hadoop.spark.utils; +package com.linkedin.venice.spark.utils; -import static com.linkedin.venice.hadoop.spark.SparkConstants.KEY_COLUMN_NAME; -import static com.linkedin.venice.hadoop.spark.SparkConstants.VALUE_COLUMN_NAME; +import static com.linkedin.venice.spark.SparkConstants.KEY_COLUMN_NAME; +import static com.linkedin.venice.spark.SparkConstants.VALUE_COLUMN_NAME; import static org.apache.spark.sql.types.DataTypes.BinaryType; import org.apache.spark.sql.types.Metadata; diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PartialUpdateTest.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PartialUpdateTest.java index 8f7fb3a627..d242d7a60b 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PartialUpdateTest.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/PartialUpdateTest.java @@ -70,7 +70,6 @@ import com.linkedin.venice.controllerapi.VersionCreationResponse; import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.hadoop.VenicePushJob; -import com.linkedin.venice.hadoop.spark.datawriter.jobs.DataWriterSparkJob; import com.linkedin.venice.helix.VeniceJsonSerializer; import com.linkedin.venice.integration.utils.ServiceFactory; import com.linkedin.venice.integration.utils.VeniceClusterWrapper; @@ -93,6 +92,7 @@ import com.linkedin.venice.schema.writecompute.WriteComputeSchemaConverter; import com.linkedin.venice.serialization.avro.AvroProtocolDefinition; import com.linkedin.venice.serialization.avro.ChunkedValueManifestSerializer; +import com.linkedin.venice.spark.datawriter.jobs.DataWriterSparkJob; import com.linkedin.venice.stats.AbstractVeniceStats; import com.linkedin.venice.storage.protocol.ChunkedValueManifest; import com.linkedin.venice.tehuti.MetricsUtils; diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java index 9667f2aae5..833d1d11a4 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestBatch.java @@ -62,11 +62,11 @@ import com.linkedin.venice.controllerapi.UpdateStoreQueryParams; import com.linkedin.venice.controllerapi.VersionCreationResponse; import com.linkedin.venice.exceptions.VeniceException; -import com.linkedin.venice.hadoop.spark.datawriter.jobs.DataWriterSparkJob; import com.linkedin.venice.integration.utils.VeniceClusterWrapper; import com.linkedin.venice.meta.BackupStrategy; import com.linkedin.venice.meta.Version; import com.linkedin.venice.read.RequestType; +import com.linkedin.venice.spark.datawriter.jobs.DataWriterSparkJob; import com.linkedin.venice.stats.AbstractVeniceStats; import com.linkedin.venice.system.store.MetaStoreDataType; import com.linkedin.venice.systemstore.schemas.StoreMetaKey; diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestVsonStoreBatch.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestVsonStoreBatch.java index 932e5ee8d5..952c8aedc9 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestVsonStoreBatch.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/endToEnd/TestVsonStoreBatch.java @@ -23,12 +23,12 @@ import com.linkedin.venice.compression.CompressionStrategy; import com.linkedin.venice.controllerapi.ControllerClient; import com.linkedin.venice.controllerapi.UpdateStoreQueryParams; -import com.linkedin.venice.hadoop.spark.datawriter.jobs.DataWriterSparkJob; import com.linkedin.venice.integration.utils.ServiceFactory; import com.linkedin.venice.integration.utils.VeniceClusterWrapper; import com.linkedin.venice.meta.Version; import com.linkedin.venice.schema.vson.VsonAvroSchemaAdapter; import com.linkedin.venice.schema.vson.VsonSchema; +import com.linkedin.venice.spark.datawriter.jobs.DataWriterSparkJob; import com.linkedin.venice.utils.KeyAndValueSchemas; import com.linkedin.venice.utils.Pair; import com.linkedin.venice.utils.TestWriteUtils;