config/qumomf.conf.yml

qumomf:
  # TCP port to listen.
  port: ':8080'
  logging:
    # Verbose level of logging: trace, debug, info, warn, error, fatal, panic.
    # To disable logging, pass an empty string.
    level: 'debug'
    # Write logs to the local syslog daemon.
    syslog_enabled: false
    # Write logs to the file.
    file_enabled: true
    # Absolute path to the log output file.
    file_name: '/var/log/qumomf.log'
    # The max size in MB of the logfile before it's rolled.
    file_max_size: 256
    # The max number of rolled files to keep.
    file_max_backups: 3
    # The max age in days to keep a logfile.
    file_max_age: 5
  # Indicates whether qumomf should run in the readonly mode:
  # no auto failover will be executed.
  # Can be overwritten by cluster-specific options.
  readonly: true
  # How often should qumomf discover the cluster topology.
  cluster_discovery_time: '5s'
  # How often should qumomf analyze the cluster state.
  cluster_recovery_time: '1s'
  # Qumomf avoids flapping (cascading failures causing continuous outage and elimination of resources)
  # by introducing a block period, where on any given cluster, qumomf will not kick in automated recovery
  # on an interval smaller than said period.
  # It only applies to recoveries on the same cluster.
  # There is nothing to prevent concurrent recoveries running on different clusters.
  shard_recovery_block_time: '30m'
  # Similar to the shard_recovery_block_time option but defines recovery block period
  # only for a single instance. Used during the vshard configuration recovery.
  instance_recovery_block_time: '10m'

  # How should qumomf choose a new master during the failover.
  # Available options: idle, smart.
  # See README for the description.
  # Can be overwritten by cluster-specific options.
  elector: 'smart'
  # On crash recovery, followers that are lagging more than given LSN must not participate in the election.
  # Value of 0 disables this feature.
  reasonable_follower_lsn_lag: 500
  # On crash recovery, followers that are lagging more than given duration must not participate in the election.
  # Value of 0 disables this feature.
  reasonable_follower_idle: '1m'

  # Hooks invoked through the recovery process.
  # These are arrays of commands invoked via shell, in particular bash.
  hooks:
    # Shell to use invoking hooks in format "shell -c <command>".
    shell: bash
    # Deadline timeout for basic hooks.
    timeout: 5s
    # Deadline timeout for async hooks.
    timeout_async: 10m
    # PreFailover hooks executed before the recovery process.
    pre_failover:
      - "echo 'Will recover from {failureType} on {failureCluster}' >> /tmp/qumomf_recovery.log"
    # PostSuccessfulFailover hooks executed after the successful recovery process.
    post_successful_failover:
      - "echo 'Recovered from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}; Successor: {successorURI}' >> /tmp/qumomf_recovery.log"
    # PostUnsuccessfulFailover hooks executed after the unsuccessful recovery process.
    post_unsuccessful_failover:
      - "echo 'Failed to recover from {failureType} on {failureCluster}. Set: {failureReplicaSetUUID}; Failed: {failedURI}' >> /tmp/qumomf_recovery.log"

  # Local persistent storage to save snapshots, recoveries and other useful data
  storage:
    filename: 'qumomf.db'
    connect_timeout: '1s'
    query_timeout: '1s'

# Tarantool connection options.
# Can be overwritten by cluster-specific options.
connection:
  user: 'qumomf'
  password: 'qumomf'
  connect_timeout: '500ms'
  request_timeout: '1s'

# List of all clusters.
clusters:
  # Cluster unique name.
  qumomf_sandbox:
    readonly: false

    # During the autodiscovery qumomf will use the information
    # read from tarantool instances.
    # You may want to override the URI of the instances.
    override_uri_rules:
      'qumomf_1_m.ddk:3301': '127.0.0.1:9303'
      'qumomf_1_s.ddk:3301': '127.0.0.1:9304'
      'qumomf_2_m.ddk:3301': '127.0.0.1:9305'
      'qumomf_2_s_1.ddk:3301': '127.0.0.1:9306'
      'qumomf_2_s_2.ddk:3301': '127.0.0.1:9307'

    # List of all routers in the cluster.
    # Used to discover the cluster topology.
    routers:
      - name: 'router_1'
        uuid: 'router_1_uuid'
        addr: '127.0.0.1:9301'

  qumomf_sandbox_2:
    elector: 'idle'

    connection:
      user: 'tnt'
      password: 'tnt'
      connect_timeout: 10s
      request_timeout: 10s

    # List of priorities for the cluster instances.
    priorities:
      'a3ef657e-eb9a-4730-b420-7ea78d52797d': 0
      'bd64dd00-161e-4c99-8b3c-d3c4635e18d2': 10
      'cc4cfb9c-11d8-4810-84d2-66cfbebb0f6e': -1 # exclude from the election process

    routers:
      - name: 'sandbox2-router1'
        uuid: '38dbe90b-9bca-4766-a98c-f02e56ddf986'
        addr: '127.0.0.1:7301'