-
Notifications
You must be signed in to change notification settings - Fork 235
/
Copy pathrun_pyspark_examples.sh
executable file
·84 lines (69 loc) · 2.1 KB
/
run_pyspark_examples.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash
# shellcheck disable=SC1091,SC2034
source env_setup.sh
set -ex
set -o pipefail
#tag::package_venv[]
if [ ! -d pyspark_venv ]; then
python -m venv pyspark_venv
fi
source pyspark_venv/bin/activate
pip install -r ./python/requirements.txt
if [ ! -f pyspark_venv.tar.gz ]; then
venv-pack -o pyspark_venv.tar.gz
fi
# Set in local and client mode where the driver uses the Python present
# (requires that you have activated the venv as we did above)
PYSPARK_DRIVER_PYTHON=python
export PYSPARK_DRIVER_PYTHON
export PYTHON_PATH=./environment/bin/python
#end::package_venv[]
# Some hack for our json magic
cat se*.json > spark_expectations_sample_rules.json
function check_fail () {
local ex="$1"
local code="$2"
if [ -f "${ex}.fail" ]; then
echo "ok";
else
exit "$code"
fi
}
EXAMPLE_JAR="./core/target/scala-2.13/core-assembly-0.1.0-SNAPSHOT.jar"
if [ ! -f "${EXAMPLE_JAR}" ]; then
sbt core/assembly
fi
if [ ! -f "${EXAMPLE_JAR}" ]; then
echo "Can't find sample jar?!?"
exit 1
fi
function run_example () {
local ex="$1"
# shellcheck disable=SC2046
spark-submit \
--master local[5] \
--conf spark.eventLog.enabled=true \
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
--conf spark.sql.catalog.spark_catalog.type=hive \
--conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.local.type=hadoop \
--archives pyspark_venv.tar.gz#environment \
--conf "spark.sql.catalog.local.warehouse=$PWD/warehouse" \
$(cat "${ex}.conf" || echo "") \
--name "${ex}" \
--jars "${EXAMPLE_JAR}" \
"${ex}" 2>&1 | tee -a "${ex}.out" || check_fail "$ex" $?
}
if [ $# -eq 1 ]; then
run_example "python/examples/$1"
else
for ex in python/examples/*.py; do
if [[ "$ex" =~ test.* ]]; then
echo "Skipping ex $ex as it is a test and covered by our tests."
else
echo "Running $ex"
run_example "$ex"
fi
done
fi