From 619e2d736fa32825414be83ca788d9975ad89b16 Mon Sep 17 00:00:00 2001 From: Nick Badger Date: Tue, 28 Aug 2018 09:29:19 +0100 Subject: [PATCH 1/6] Run elasticsearch via 'exec' to ensure it gets pid 1 --- run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run.sh b/run.sh index e381ae8..f1f2287 100755 --- a/run.sh +++ b/run.sh @@ -55,11 +55,11 @@ if [[ $(whoami) == "root" ]]; then chown -R elasticsearch:elasticsearch /data exec su-exec elasticsearch $BASE/bin/elasticsearch $ES_EXTRA_ARGS else - # the container's first process is not running as 'root', + # the container's first process is not running as 'root', # it does not have the rights to chown. however, we may # assume that it is being ran as 'elasticsearch', and that # the volumes already have the right permissions. this is # the case for kubernetes for example, when 'runAsUser: 1000' # and 'fsGroup:1000' are defined in the pod's security context. - $BASE/bin/elasticsearch $ES_EXTRA_ARGS + exec $BASE/bin/elasticsearch $ES_EXTRA_ARGS fi From 209d02c1c2494e3ef35236b39aad52138781c806 Mon Sep 17 00:00:00 2001 From: Nick Badger Date: Thu, 20 Sep 2018 11:13:43 +0100 Subject: [PATCH 2/6] Adding startup fixes --- Dockerfile | 6 +++++- run.sh | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 95e40a3..d445c26 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,11 +55,15 @@ ENV SHARD_ALLOCATION_AWARENESS_ATTR "" ENV MEMORY_LOCK false ENV REPO_LOCATIONS "" ENV DISCOVERY_SERVICE elasticsearch-discovery +ENV NETWORK_ADDRESS_CACHE_TTL 3 +ENV NETWORK_ADDRESS_CACHE_NEGATIVE_TTL 10 +ENV DISCOVERY_SERVICE elasticsearch-discovery # Volume for Elasticsearch data VOLUME ["/data"] -RUN chown elasticsearch:elasticsearch -R /usr/share/elasticsearch /data +RUN chown elasticsearch:elasticsearch -R /usr/share/elasticsearch /data && \ + chown elasticsearch:elasticsearch -R /opt/jdk-10.0.2/conf USER elasticsearch CMD ["/run.sh"] diff --git a/run.sh b/run.sh index f1f2287..32643e0 100755 --- a/run.sh +++ b/run.sh @@ -2,6 +2,18 @@ set -ex +# SIGTERM-handler +term_handler() { + if [ $PID -ne 0 ]; then + kill -SIGTERM "$PID" + wait "$PID" + sleep 10 + fi + exit 0; +} + +export NODE_NAME=${NODE_NAME:-${HOSTNAME}} + BASE=/usr/share/elasticsearch # allow for memlock if enabled @@ -49,11 +61,22 @@ if [ ! -z "${SHARD_ALLOCATION_AWARENESS_ATTR}" ]; then fi fi +# configuration overrides +if [ ! -z "${NETWORK_ADDRESS_CACHE_TTL}" ]; then + sed -i -e "s/#networkaddress.cache.ttl=-1/networkaddress.cache.ttl=${NETWORK_ADDRESS_CACHE_TTL}/" /opt/jdk-10.0.2/conf/security/java.security +fi + +if [ ! -z "${NETWORK_ADDRESS_CACHE_NEGATIVE_TTL}" ]; then + sed -i -e ""s/networkaddress.cache.negative.ttl=10/networkaddress.cache.negative.ttl=${NETWORK_ADDRESS_CACHE_NEGATIVE_TTL}/"" /opt/jdk-10.0.2/conf/security/java.security +fi + +trap 'kill ${!}; term_handler' SIGTERM + # run if [[ $(whoami) == "root" ]]; then chown -R elasticsearch:elasticsearch $BASE chown -R elasticsearch:elasticsearch /data - exec su-exec elasticsearch $BASE/bin/elasticsearch $ES_EXTRA_ARGS + exec su-exec elasticsearch $BASE/bin/elasticsearch $ES_EXTRA_ARGS & else # the container's first process is not running as 'root', # it does not have the rights to chown. however, we may @@ -61,5 +84,10 @@ else # the volumes already have the right permissions. this is # the case for kubernetes for example, when 'runAsUser: 1000' # and 'fsGroup:1000' are defined in the pod's security context. - exec $BASE/bin/elasticsearch $ES_EXTRA_ARGS + $BASE/bin/elasticsearch $ES_EXTRA_ARGS & fi + +PID="$!" + +echo "Setting ES pid to $PID" + From 6bd03f123d927624b9c5d9e5775a4f96e0b846e0 Mon Sep 17 00:00:00 2001 From: Nick Badger Date: Thu, 20 Sep 2018 11:45:01 +0100 Subject: [PATCH 3/6] pkill instead of kill --- run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.sh b/run.sh index 32643e0..0455429 100755 --- a/run.sh +++ b/run.sh @@ -5,7 +5,7 @@ set -ex # SIGTERM-handler term_handler() { if [ $PID -ne 0 ]; then - kill -SIGTERM "$PID" + pkill -SIGTERM "$PID" wait "$PID" sleep 10 fi From 141388eac3bc373be00519e27ce2dd2e80c29673 Mon Sep 17 00:00:00 2001 From: Nick Badger Date: Thu, 20 Sep 2018 12:05:15 +0100 Subject: [PATCH 4/6] Don't exit the container but wait forever --- run.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/run.sh b/run.sh index 0455429..1b11453 100755 --- a/run.sh +++ b/run.sh @@ -89,5 +89,6 @@ fi PID="$!" -echo "Setting ES pid to $PID" - +while true ; do + tail -f /dev/null & wait ${!} +done From dc1b1e4c798bbb07170825c35896afae0da2274c Mon Sep 17 00:00:00 2001 From: Francesco Ciocchetti Date: Mon, 24 Sep 2018 08:14:11 +0100 Subject: [PATCH 5/6] Fix startup script to properly manage the SIGTERM and wait after elasticsearch termed --- Dockerfile | 1 + run.sh | 42 +++++++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/Dockerfile b/Dockerfile index d445c26..b28f0e8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,6 +62,7 @@ ENV DISCOVERY_SERVICE elasticsearch-discovery # Volume for Elasticsearch data VOLUME ["/data"] +# Run elasticsearch as unprivileged RUN chown elasticsearch:elasticsearch -R /usr/share/elasticsearch /data && \ chown elasticsearch:elasticsearch -R /opt/jdk-10.0.2/conf USER elasticsearch diff --git a/run.sh b/run.sh index 1b11453..8e82c31 100755 --- a/run.sh +++ b/run.sh @@ -2,17 +2,22 @@ set -ex +export POST_TERM_WAIT=${POST_TERM_WAIT:-15} + # SIGTERM-handler term_handler() { if [ $PID -ne 0 ]; then - pkill -SIGTERM "$PID" + set +e + kill -15 "$PID" # SIGTERM wait "$PID" - sleep 10 + echo "Sleeping $POST_TERM_WAIT Seconds before exiting the term_handler" + sleep $POST_TERM_WAIT + set -e fi exit 0; + #exit 143; # 128 + 15 -- SIGTERM } -export NODE_NAME=${NODE_NAME:-${HOSTNAME}} BASE=/usr/share/elasticsearch @@ -21,9 +26,11 @@ if [ "$MEMORY_LOCK" == "true" ]; then ulimit -l unlimited fi +NODE_NAME=${NODE_NAME:-${HOSTNAME}} + # Set a random node name if not set. if [ -z "${NODE_NAME}" ]; then - NODE_NAME=${HOSTNAME} + NODE_NAME=$(uuidgen) fi export NODE_NAME=${NODE_NAME} @@ -48,6 +55,8 @@ if [ ! -z "${ES_PLUGINS_INSTALL}" ]; then IFS=$OLDIFS fi +# Configure Shard Allocation Awareness +# XXX: If runnig kubernetes and kubernetes is runnign in the cloud -> Fetch zone from node if [ ! -z "${SHARD_ALLOCATION_AWARENESS_ATTR}" ]; then # this will map to a file like /etc/hostname => /dockerhostname so reading that file will get the # container hostname @@ -62,31 +71,22 @@ if [ ! -z "${SHARD_ALLOCATION_AWARENESS_ATTR}" ]; then fi # configuration overrides +# CONF directory and files need to be writable by the user running the container + +## DNS Timers if [ ! -z "${NETWORK_ADDRESS_CACHE_TTL}" ]; then - sed -i -e "s/#networkaddress.cache.ttl=-1/networkaddress.cache.ttl=${NETWORK_ADDRESS_CACHE_TTL}/" /opt/jdk-10.0.2/conf/security/java.security + sed -i -e "s/#networkaddress.cache.ttl=.*/networkaddress.cache.ttl=${NETWORK_ADDRESS_CACHE_TTL}/" /opt/jdk-10.0.2/conf/security/java.security fi if [ ! -z "${NETWORK_ADDRESS_CACHE_NEGATIVE_TTL}" ]; then - sed -i -e ""s/networkaddress.cache.negative.ttl=10/networkaddress.cache.negative.ttl=${NETWORK_ADDRESS_CACHE_NEGATIVE_TTL}/"" /opt/jdk-10.0.2/conf/security/java.security + sed -i -e ""s/networkaddress.cache.negative.ttl=.*/networkaddress.cache.negative.ttl=${NETWORK_ADDRESS_CACHE_NEGATIVE_TTL}/"" /opt/jdk-10.0.2/conf/security/java.security fi +# Trap the TERM Signals trap 'kill ${!}; term_handler' SIGTERM -# run -if [[ $(whoami) == "root" ]]; then - chown -R elasticsearch:elasticsearch $BASE - chown -R elasticsearch:elasticsearch /data - exec su-exec elasticsearch $BASE/bin/elasticsearch $ES_EXTRA_ARGS & -else - # the container's first process is not running as 'root', - # it does not have the rights to chown. however, we may - # assume that it is being ran as 'elasticsearch', and that - # the volumes already have the right permissions. this is - # the case for kubernetes for example, when 'runAsUser: 1000' - # and 'fsGroup:1000' are defined in the pod's security context. - $BASE/bin/elasticsearch $ES_EXTRA_ARGS & -fi - +# run Elasticsearch in the background +$BASE/bin/elasticsearch $ES_EXTRA_ARGS & PID="$!" while true ; do From 3ccb4571848c34a8b12360beab9a0cd0eb780b57 Mon Sep 17 00:00:00 2001 From: Francesco Ciocchetti Date: Mon, 24 Sep 2018 09:40:05 +0100 Subject: [PATCH 6/6] Disable all pkilling since now run.sh entrypoint will deal with SIGTERMing the right java process --- scripts/pre-stop-master.sh | 4 +++- scripts/stop-local-routing.sh | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/pre-stop-master.sh b/scripts/pre-stop-master.sh index 95128f4..3312b6b 100755 --- a/scripts/pre-stop-master.sh +++ b/scripts/pre-stop-master.sh @@ -1,2 +1,4 @@ #!/bin/bash -pkill -SIGTERM java +# Disable since now the run.sh entrypoint will deal with SIGTERMing java +#pkill -SIGTERM java +exit 0 diff --git a/scripts/stop-local-routing.sh b/scripts/stop-local-routing.sh index 875173f..00527d3 100755 --- a/scripts/stop-local-routing.sh +++ b/scripts/stop-local-routing.sh @@ -17,9 +17,10 @@ while true ; do echo -e "Wait for node ${NODE_NAME} to become empty" SHARDS_ALLOCATION=$(curl --retry 3 -s -XGET 'http://localhost:9200/_cat/shards') if ! echo "${SHARDS_ALLOCATION}" | grep -E "${NODE_NAME}"; then - # Send Sigterm to elasticsearch once the relocation is finished - sleep 2 - pkill -SIGTERM -P 1 + # Send Sigterm to elasticsearch once the relocation is finished + # Disabled since now the run.sh entrypoint will deal with sigterming the right process + # sleep 2 + # pkill -SIGTERM -P 1 break fi sleep 2