-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(build): spark 3.5.2 #42
Changes from all commits
ccc4f19
763e404
473aee4
03ce301
f53d168
93ff4da
5da1083
0a7d760
0f9bb44
484cac9
b30c75f
2ed424b
8ce6125
c3b8413
6c00c0e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
export IMAGE_NAME=spark-k8s | ||
export SELF_VERSION="v3" | ||
export SELF_VERSION="v4" | ||
export SCALA_VERSION="2.12" | ||
export SPARK_VERSION="3.3.0" | ||
export HADOOP_VERSION="3.3.2" | ||
export JAVA_VERSION="11" | ||
export SPARK_VERSION="3.5.2" | ||
export HADOOP_VERSION="3.3.6" | ||
export JAVA_VERSION="8" | ||
export WITH_HIVE="true" | ||
export WITH_PYSPARK="true" | ||
bash make-distribution.sh |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,57 +40,25 @@ TERM=xterm-color ./dev/make-distribution.sh \ | |
${HIVE_INSTALL_FLAG:+"-Phive"} \ | ||
-DskipTests | ||
|
||
SPARK_MAJOR_VERSION="$(echo "${SPARK_VERSION}" | cut -d '.' -f1)" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not needed anymore as we no longer use any spark version that is 2.y.z |
||
HADOOP_MAJOR_VERSION="$(echo "${HADOOP_VERSION}" | cut -d '.' -f1)" | ||
HIVE_HADOOP3_HIVE_EXEC_URL=${HIVE_HADOOP3_HIVE_EXEC_URL:-https://github.com/guangie88/hive-exec-jar/releases/download/1.2.1.spark2-hadoop3/hive-exec-1.2.1.spark2.jar} | ||
|
||
# Replace Hive for Hadoop 3 since Hive 1.2.1 does not officially support Hadoop 3 when using Spark 2.y.z | ||
# Note docker-image-tool.sh takes the jars from assembly/target/scala-2.*/jars | ||
if [[ "${WITH_HIVE}" = "true" ]] && [[ "${SPARK_MAJOR_VERSION}" -eq 2 ]] && [[ "${HADOOP_MAJOR_VERSION}" -eq 3 ]]; then | ||
HIVE_EXEC_JAR_NAME="hive-exec-1.2.1.spark2.jar" | ||
TARGET_JAR_PATH="$(find assembly -type f -name "${HIVE_EXEC_JAR_NAME}")" | ||
curl -LO "${HIVE_HADOOP3_HIVE_EXEC_URL}" && mv "${HIVE_EXEC_JAR_NAME}" "${TARGET_JAR_PATH}" | ||
# Spark <= 2.4 uses ${TARGET_JAR_PATH} for Docker COPY, but Spark >= 3 uses dist/jars/ | ||
cp "${TARGET_JAR_PATH}" "dist/jars/" | ||
fi | ||
|
||
SPARK_MAJOR_VERSION="$(echo "${SPARK_VERSION}" | cut -d '.' -f1)" | ||
SPARK_MINOR_VERSION="$(echo "${SPARK_VERSION}" | cut -d '.' -f2)" | ||
HADOOP_MAJOR_VERSION="$(echo "${HADOOP_VERSION}" | cut -d '.' -f1)" | ||
|
||
if [[ ${SPARK_MAJOR_VERSION} -eq 2 && ${SPARK_MINOR_VERSION} -eq 4 ]]; then # 2.4.z | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not needed anymore as we no longer use any spark version that is 2.y.z |
||
# Same Dockerfiles as Spark v2.4.8, but allow override of base image to use Debian Buster | ||
# and not using PYTHONENV and instead copies pyspark out like Spark 3.y.z | ||
DOCKERFILE_BASE="../overrides/base/2.4.z/Dockerfile" | ||
DOCKERFILE_PY="../overrides/python/2.4.z/Dockerfile" | ||
else | ||
DOCKERFILE_BASE="./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile" | ||
DOCKERFILE_PY="./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile" | ||
fi | ||
DOCKERFILE_BASE="./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile" | ||
DOCKERFILE_PY="./resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile" | ||
|
||
if [[ ${SPARK_MAJOR_VERSION} -eq 3 && ${SPARK_MINOR_VERSION} -ge 4 ]]; then # >=3.4 | ||
# From Spark v3.4.0 onwards, openjdk is not the prefered base image source as it i | ||
# deprecated and taken over by eclipse-temurin. slim-buster variants are not available | ||
# on eclipse-temurin at the moment. | ||
IMAGE_VARIANT="jre" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
IMAGE_VARIANT="jre-focal" | ||
else | ||
IMAGE_VARIANT="jre-slim-buster" | ||
fi | ||
|
||
# Temporarily remove R build due to keyserver issue | ||
# DOCKERFILE_R="./resource-managers/kubernetes/docker/src/main/dockerfiles/R/Dockerfile" | ||
|
||
SPARK_LABEL="${SPARK_VERSION}" | ||
TAG_NAME="${SELF_VERSION}_${SPARK_LABEL}_hadoop-${HADOOP_VERSION}_scala-${SCALA_VERSION}_java-${JAVA_VERSION}" | ||
|
||
# ./bin/docker-image-tool.sh \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. R is not used anymore |
||
# -b java_image_tag=${JAVA_VERSION}-jre-slim-buster \ | ||
# -r "${IMAGE_NAME}" \ | ||
# -t "${TAG_NAME}" \ | ||
# -f "${DOCKERFILE_BASE}" \ | ||
# -p "${DOCKERFILE_PY}" \ | ||
# -R "${DOCKERFILE_R}" \ | ||
# build | ||
|
||
./bin/docker-image-tool.sh \ | ||
-b java_image_tag=${JAVA_VERSION}-${IMAGE_VARIANT} \ | ||
-r "${IMAGE_NAME}" \ | ||
|
@@ -101,6 +69,5 @@ TAG_NAME="${SELF_VERSION}_${SPARK_LABEL}_hadoop-${HADOOP_VERSION}_scala-${SCALA_ | |
|
||
docker tag "${IMAGE_NAME}/spark:${TAG_NAME}" "${IMAGE_NAME}:${TAG_NAME}" | ||
docker tag "${IMAGE_NAME}/spark-py:${TAG_NAME}" "${IMAGE_NAME}-py:${TAG_NAME}" | ||
# docker tag "${IMAGE_NAME}/spark-r:${TAG_NAME}" "${IMAGE_NAME}-r:${TAG_NAME}" | ||
|
||
popd >/dev/null |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,21 +11,10 @@ else | |
fi | ||
|
||
TAG_NAME="${SELF_VERSION}_${SPARK_LABEL}_hadoop-${HADOOP_VERSION}_scala-${SCALA_VERSION}_java-${JAVA_VERSION}" | ||
ALT_TAG_NAME="${SPARK_LABEL}_hadoop-${HADOOP_VERSION}_scala-${SCALA_VERSION}_java-${JAVA_VERSION}" | ||
|
||
docker tag "${IMAGE_NAME}:${TAG_NAME}" "${IMAGE_ORG}/${IMAGE_NAME}:${TAG_NAME}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to only use image tags that has self version to prevent overwritten previously working images when rebuild with new base jre images |
||
docker push "${IMAGE_ORG}/${IMAGE_NAME}:${TAG_NAME}" | ||
docker tag "${IMAGE_NAME}:${TAG_NAME}" "${IMAGE_ORG}/${IMAGE_NAME}:${ALT_TAG_NAME}" | ||
docker push "${IMAGE_ORG}/${IMAGE_NAME}:${ALT_TAG_NAME}" | ||
|
||
# Python image push | ||
docker tag "${IMAGE_NAME}-py:${TAG_NAME}" "${IMAGE_ORG}/${IMAGE_NAME}-py:${TAG_NAME}" | ||
docker push "${IMAGE_ORG}/${IMAGE_NAME}-py:${TAG_NAME}" | ||
docker tag "${IMAGE_NAME}-py:${TAG_NAME}" "${IMAGE_ORG}/${IMAGE_NAME}-py:${ALT_TAG_NAME}" | ||
docker push "${IMAGE_ORG}/${IMAGE_NAME}-py:${ALT_TAG_NAME}" | ||
|
||
# R image push | ||
# docker tag "${IMAGE_NAME}-r:${TAG_NAME}" "${IMAGE_ORG}/${IMAGE_NAME}-r:${TAG_NAME}" | ||
# docker push "${IMAGE_ORG}/${IMAGE_NAME}-r:${TAG_NAME}" | ||
# docker tag "${IMAGE_NAME}-r:${TAG_NAME}" "${IMAGE_ORG}/${IMAGE_NAME}-r:${ALT_TAG_NAME}" | ||
# docker push "${IMAGE_ORG}/${IMAGE_NAME}-r:${ALT_TAG_NAME}" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lol i'm pretty sure this guy (Chao Sun) who reported the issue was a singaporean colleague at my previous company
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Time to drop him a message to share with him how to fix