From c8640e666f3725b89cd11b6d6612ffa278053a73 Mon Sep 17 00:00:00 2001 From: Raphael Hoegger Date: Wed, 16 Sep 2015 10:26:25 +0200 Subject: [PATCH 1/3] Added a "restartpause" option * Applied on startup failure aditionnaly to the backoff delay (by fclairamb) * Applied on bad exit status (by fclairamb) * fixed tests (by pfuender) Default value is 0, which doesn't change the existing behavior. The goal is to impose a minimum delay between restarts to avoid overloading host with restarts. --- supervisor/options.py | 4 +++- supervisor/process.py | 7 +++++-- supervisor/tests/base.py | 3 ++- supervisor/tests/test_options.py | 13 ++++++++++--- supervisor/tests/test_supervisord.py | 2 +- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/supervisor/options.py b/supervisor/options.py index 290c91433..090336cab 100644 --- a/supervisor/options.py +++ b/supervisor/options.py @@ -874,6 +874,7 @@ def get(section, opt, *args, **kwargs): autorestart = auto_restart(get(section, 'autorestart', 'unexpected')) startsecs = integer(get(section, 'startsecs', 1)) startretries = integer(get(section, 'startretries', 3)) + restartpause = integer(get(section, 'restartpause', 0)) stopsignal = signal_number(get(section, 'stopsignal', 'TERM')) stopwaitsecs = integer(get(section, 'stopwaitsecs', 10)) stopasgroup = boolean(get(section, 'stopasgroup', 'false')) @@ -982,6 +983,7 @@ def get(section, opt, *args, **kwargs): autorestart=autorestart, startsecs=startsecs, startretries=startretries, + restartpause=restartpause, uid=uid, stdout_logfile=logfiles['stdout_logfile'], stdout_capture_maxbytes = stdout_cmaxbytes, @@ -1783,7 +1785,7 @@ def __repr__(self): class ProcessConfig(Config): req_param_names = [ 'name', 'uid', 'command', 'directory', 'umask', 'priority', - 'autostart', 'autorestart', 'startsecs', 'startretries', + 'autostart', 'autorestart', 'startsecs', 'startretries', 'restartpause', 'stdout_logfile', 'stdout_capture_maxbytes', 'stdout_events_enabled', 'stdout_syslog', 'stdout_logfile_backups', 'stdout_logfile_maxbytes', diff --git a/supervisor/process.py b/supervisor/process.py index fbc0a71f9..336940f1c 100644 --- a/supervisor/process.py +++ b/supervisor/process.py @@ -171,7 +171,7 @@ def change_state(self, new_state, expected=True): if new_state == ProcessStates.BACKOFF: now = time.time() self.backoff += 1 - self.delay = now + self.backoff + self.delay = now + self.backoff + self.config.restartpause self.state = new_state @@ -544,6 +544,9 @@ def finish(self, pid, sts): # unexpected exit code self.spawnerr = 'Bad exit code %s' % es msg = "exited: %s (%s)" % (processname, msg + "; not expected") + self.delay = now + self.config.restartpause + if self.config.restartpause > 0: + msg += ". Will restart in %s seconds (restartpause)" % self.config.restartpause self.change_state(ProcessStates.EXITED, expected=False) self.config.options.logger.info(msg) @@ -593,7 +596,7 @@ def transition(self): if self.config.options.mood > SupervisorStates.RESTARTING: # dont start any processes if supervisor is shutting down if state == ProcessStates.EXITED: - if self.config.autorestart: + if self.config.autorestart and now > self.delay: if self.config.autorestart is RestartUnconditionally: # EXITED -> STARTING self.spawn() diff --git a/supervisor/tests/base.py b/supervisor/tests/base.py index 94932f3b6..2aa9e408c 100644 --- a/supervisor/tests/base.py +++ b/supervisor/tests/base.py @@ -507,7 +507,7 @@ def __lt__(self, other): class DummyPConfig: def __init__(self, options, name, command, directory=None, umask=None, priority=999, autostart=True, - autorestart=True, startsecs=10, startretries=999, + autorestart=True, startsecs=10, startretries=999, restartpause=2, uid=None, stdout_logfile=None, stdout_capture_maxbytes=0, stdout_events_enabled=False, stdout_logfile_backups=0, stdout_logfile_maxbytes=0, @@ -525,6 +525,7 @@ def __init__(self, options, name, command, directory=None, umask=None, self.autorestart = autorestart self.startsecs = startsecs self.startretries = startretries + self.restartpause = restartpause self.uid = uid self.stdout_logfile = stdout_logfile self.stdout_capture_maxbytes = stdout_capture_maxbytes diff --git a/supervisor/tests/test_options.py b/supervisor/tests/test_options.py index 9613e5014..5b7200d69 100644 --- a/supervisor/tests/test_options.py +++ b/supervisor/tests/test_options.py @@ -436,6 +436,7 @@ def test_options(self): stopwaitsecs=5 startsecs=5 startretries=10 + restartpause=2 directory=/tmp umask=002 @@ -516,6 +517,7 @@ def test_options(self): self.assertEqual(proc1.autorestart, datatypes.RestartWhenExitUnexpected) self.assertEqual(proc1.startsecs, 5) self.assertEqual(proc1.startretries, 10) + self.assertEqual(proc1.restartpause, 2) self.assertEqual(proc1.uid, 0) self.assertEqual(proc1.stdout_logfile, '/tmp/cat.log') self.assertEqual(proc1.stopsignal, signal.SIGKILL) @@ -1369,6 +1371,7 @@ def test_processes_from_section(self): autorestart = false startsecs = 100 startretries = 100 + restartpause = 2 user = root stdout_logfile = NONE stdout_logfile_backups = 1 @@ -1395,6 +1398,7 @@ def test_processes_from_section(self): self.assertEqual(pconfig.autorestart, False) self.assertEqual(pconfig.startsecs, 100) self.assertEqual(pconfig.startretries, 100) + self.assertEqual(pconfig.restartpause, 2) self.assertEqual(pconfig.uid, 0) self.assertEqual(pconfig.stdout_logfile, None) self.assertEqual(pconfig.stdout_capture_maxbytes, 0) @@ -1557,6 +1561,7 @@ def test_options_with_environment_expansions(self): stopwaitsecs=%(ENV_CAT1_STOPWAIT)s startsecs=%(ENV_CAT1_STARTWAIT)s startretries=%(ENV_CAT1_STARTRETRIES)s + restartpause=%(ENV_CAT1_RESTARTPAUSE)s directory=%(ENV_CAT1_DIR)s umask=%(ENV_CAT1_UMASK)s """) @@ -1590,6 +1595,7 @@ def test_options_with_environment_expansions(self): 'ENV_CAT1_STOPWAIT': '5', 'ENV_CAT1_STARTWAIT': '5', 'ENV_CAT1_STARTRETRIES': '10', + 'ENV_CAT1_RESTARTPAUSE': '2', 'ENV_CAT1_DIR': '/tmp', 'ENV_CAT1_UMASK': '002', } @@ -1634,6 +1640,7 @@ def test_options_with_environment_expansions(self): self.assertEqual(proc1.autorestart, datatypes.RestartWhenExitUnexpected) self.assertEqual(proc1.startsecs, 5) self.assertEqual(proc1.startretries, 10) + self.assertEqual(proc1.restartpause, 2) self.assertEqual(proc1.uid, 0) self.assertEqual(proc1.stdout_logfile, '/tmp/cat.log') self.assertEqual(proc1.stopsignal, signal.SIGKILL) @@ -2670,7 +2677,7 @@ def _makeOne(self, *arg, **kw): defaults = {} for name in ('name', 'command', 'directory', 'umask', 'priority', 'autostart', 'autorestart', - 'startsecs', 'startretries', 'uid', + 'startsecs', 'startretries', 'restartpause' ,'uid', 'stdout_logfile', 'stdout_capture_maxbytes', 'stdout_events_enabled', 'stdout_syslog', 'stderr_logfile', 'stderr_capture_maxbytes', @@ -2752,7 +2759,7 @@ def _makeOne(self, *arg, **kw): defaults = {} for name in ('name', 'command', 'directory', 'umask', 'priority', 'autostart', 'autorestart', - 'startsecs', 'startretries', 'uid', + 'startsecs', 'startretries', 'restartpause' ,'uid', 'stdout_logfile', 'stdout_capture_maxbytes', 'stdout_events_enabled', 'stdout_syslog', 'stderr_logfile', 'stderr_capture_maxbytes', @@ -2800,7 +2807,7 @@ def _makeOne(self, *arg, **kw): defaults = {} for name in ('name', 'command', 'directory', 'umask', 'priority', 'autostart', 'autorestart', - 'startsecs', 'startretries', 'uid', + 'startsecs', 'startretries', 'restartpause', 'uid', 'stdout_logfile', 'stdout_capture_maxbytes', 'stdout_events_enabled', 'stdout_syslog', 'stderr_logfile', 'stderr_capture_maxbytes', diff --git a/supervisor/tests/test_supervisord.py b/supervisor/tests/test_supervisord.py index df1523296..06da53a4a 100644 --- a/supervisor/tests/test_supervisord.py +++ b/supervisor/tests/test_supervisord.py @@ -318,7 +318,7 @@ def make_pconfig(name, command, **params): result = { 'name': name, 'command': command, 'directory': None, 'umask': None, 'priority': 999, 'autostart': True, - 'autorestart': True, 'startsecs': 10, 'startretries': 999, + 'autorestart': True, 'startsecs': 10, 'startretries': 999, 'restartpause': 2, 'uid': None, 'stdout_logfile': None, 'stdout_capture_maxbytes': 0, 'stdout_events_enabled': False, 'stdout_logfile_backups': 0, 'stdout_logfile_maxbytes': 0, From 76d2a4fcc9d939aefd7aa96b7791a1f19d1ad680 Mon Sep 17 00:00:00 2001 From: Raphael Hoegger Date: Wed, 30 Dec 2015 16:36:30 +0100 Subject: [PATCH 2/3] document new "restartpause" option --- docs/configuration.rst | 12 ++++++++++++ supervisor/skel/sample.conf | 1 + 2 files changed, 13 insertions(+) diff --git a/docs/configuration.rst b/docs/configuration.rst index 6f0a7933a..914a20aa4 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -710,6 +710,18 @@ where specified. *Introduced*: 3.0 +``restartpause`` + + Adds a pause (in seconds) between successive failed start attempts - thus + throttles failed-start attemps and prevents massive load increase during + this. + + *Default*: 0 + + *Required*: No. + + *Introduced*: 4.0 (?) + ``autorestart`` Specifies if :program:`supervisord` should automatically restart a diff --git a/supervisor/skel/sample.conf b/supervisor/skel/sample.conf index a15a168ed..2cdcd6bc6 100644 --- a/supervisor/skel/sample.conf +++ b/supervisor/skel/sample.conf @@ -68,6 +68,7 @@ serverurl=unix:///tmp/supervisor.sock ; use a unix:// URL for a unix socket ;autostart=true ; start at supervisord start (default: true) ;startsecs=1 ; # of secs prog must stay up to be running (def. 1) ;startretries=3 ; max # of serial start failures when starting (default 3) +;restartpause=0 ; number of seconds to wait after a failed start attempt ;autorestart=unexpected ; when to restart if exited after running (def: unexpected) ;exitcodes=0,2 ; 'expected' exit codes used with autorestart (default 0,2) ;stopsignal=QUIT ; signal used to kill process (default TERM) From 6c04d477a814dbc92d2c3addc2c868b7c45af46b Mon Sep 17 00:00:00 2001 From: Raphael Hoegger Date: Thu, 31 Dec 2015 07:52:53 +0100 Subject: [PATCH 3/3] fixing typo - s/attemps/attempts/ --- docs/configuration.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 914a20aa4..9238bebe4 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -713,7 +713,7 @@ where specified. ``restartpause`` Adds a pause (in seconds) between successive failed start attempts - thus - throttles failed-start attemps and prevents massive load increase during + throttles failed-start attempts and prevents massive load increase during this. *Default*: 0