diff --git a/build/Dockerfile b/build/Dockerfile new file mode 100644 index 0000000..ed56200 --- /dev/null +++ b/build/Dockerfile @@ -0,0 +1,16 @@ +FROM alpine:3.18 + +RUN sniper \ + && adduser -S sniper -u 1000 -G sniper + +RUN apk add --no-cache bash=5.2.15-r5 + +# config.yaml should be a configmap mounted as /app/config.yaml +# credentials.yaml should be a secret mounted as /app/credentials.yaml (or whatever path is configured in config.yaml) +COPY --chown=root:root --chmod=755 query-sniper /app/ + +WORKDIR /app + +USER sniper + +ENTRYPOINT ["/app/query-sniper"] \ No newline at end of file diff --git a/build/dev.Dockerfile b/build/dev.Dockerfile new file mode 100644 index 0000000..2c21d8f --- /dev/null +++ b/build/dev.Dockerfile @@ -0,0 +1,37 @@ +# syntax=docker/dockerfile:1 + +# Stage 1 +FROM golang:1.21.5-alpine AS builder + +ARG BUILD_SHA +ARG BUILD_TIME +ARG VERSION + +ENV GO111MODULE=on + +# Set destination for COPY +WORKDIR /build + +COPY go.sum go.mod ./ + +RUN go mod download + +COPY . . + +RUN CGO_ENABLED="0" go build -o query-sniper cmd/query-sniper/main.go + +# Stage 2 +FROM alpine:3.18.4 as runner + +RUN addgroup sniper && adduser -S sniper -u 1000 -G sniper + +RUN apk add --no-cache bash + +WORKDIR /app + +# FIXME: missing the configs, I haven't decided how to deal with that yet. +COPY --chown=sniper:sniper --from=builder --chmod=700 /build/query-sniper /app/ + +USER sniper + +ENTRYPOINT ["/app/query-sniper"] \ No newline at end of file diff --git a/cmd/query-sniper/main.go b/cmd/query-sniper/main.go new file mode 100644 index 0000000..bb2d8b8 --- /dev/null +++ b/cmd/query-sniper/main.go @@ -0,0 +1,64 @@ +package main + +import ( + "context" + "log/slog" + "os" + "os/signal" + "syscall" + + "github.com/persona-id/query-sniper/internal/configuration" + "github.com/persona-id/query-sniper/internal/sniper" +) + +func main() { + settings, err := configuration.Configure() + if err != nil { + slog.Error("Error in Configure()", slog.Any("err", err)) + os.Exit(1) + } + + setupLogger(settings.LogLevel) + + ctx, cancelFunc := context.WithCancel(context.Background()) + + go sniper.Run(ctx, settings) + + termChan := make(chan os.Signal, 1) + signal.Notify(termChan, syscall.SIGINT, syscall.SIGTERM) + <-termChan + + slog.Info("QuerySniper received TERM signal, shutting down") + + cancelFunc() + + slog.Info("QuerySniper shut down") +} + +func setupLogger(level string) { + var logLevel slog.Level + + switch level { + case "DEBUG": + logLevel = slog.LevelDebug + case "INFO": + logLevel = slog.LevelInfo + case "WARN": + logLevel = slog.LevelWarn + case "ERROR": + logLevel = slog.LevelError + default: + logLevel = slog.LevelInfo + } + + opts := &slog.HandlerOptions{ + AddSource: true, + Level: logLevel, + } + + var handler slog.Handler = slog.NewTextHandler(os.Stdout, opts) + + logger := slog.New(handler) + + slog.SetDefault(logger) +} diff --git a/cmd/query-sniper/main_test.go b/cmd/query-sniper/main_test.go new file mode 100644 index 0000000..06ab7d0 --- /dev/null +++ b/cmd/query-sniper/main_test.go @@ -0,0 +1 @@ +package main diff --git a/configs/config.yaml b/configs/config.yaml new file mode 100644 index 0000000..abe5ba0 --- /dev/null +++ b/configs/config.yaml @@ -0,0 +1,37 @@ +--- +# Path to the crednetials secret +credentials: configs/credentials.yaml + +log_level: DEBUG + +# List of databases to watch and various configuration options for them. The login creds +# are stored in the file defined above, and the keys to each database entry must match. +databases: + us1_primary: + address: 192.168.194.152:6033 + schema: persona-web-us1 + replica_lag_limit: 50s + hll_limit: 50 + long_query_limit: 60s + interval: 1s + us1_replica0: + address: 192.168.194.152:6033 + schema: persona-web-us1 + replica_lag_limit: 60s + hll_limit: 60 + long_query_limit: 120s + interval: 5s + us2_primary: + address: 192.168.194.152:6033 + schema: persona-web-us2 + replica_lag_limit: 70s + hll_limit: 70 + long_query_limit: 60s + interval: 1s + us2_replica0: + address: 192.168.194.152:6033 + schema: persona-web-us2 + replica_lag_limit: 80s + hll_limit: 80 + long_query_limit: 120s + interval: 5s diff --git a/configs/credentials.yaml b/configs/credentials.yaml new file mode 100644 index 0000000..b1e9403 --- /dev/null +++ b/configs/credentials.yaml @@ -0,0 +1,16 @@ +--- +# Auth credentials for the configured databases. +# NB: The `databases` key needs to match the keys in the config file. +databases: + us1_primary: + username: persona-web-us1 + password: persona-web-us1 + us1_replica0: + username: persona-web-us1-ro + password: persona-web-us1-ro + us2_primary: + username: persona-web-us2 + password: persona-web-us2 + us2_replica0: + username: persona-web-us2-ro + password: persona-web-us2-ro diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..2196a03 --- /dev/null +++ b/go.mod @@ -0,0 +1,34 @@ +module github.com/persona-id/query-sniper + +go 1.21.5 + +require ( + github.com/go-sql-driver/mysql v1.7.1 + github.com/openark/golib v0.0.0-20210531070646-355f37940af8 + github.com/spf13/pflag v1.0.5 + github.com/spf13/viper v1.18.1 + github.com/stretchr/testify v1.8.4 +) + +require ( + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/hashicorp/hcl v1.0.0 // indirect + github.com/magiconair/properties v1.8.7 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/pelletier/go-toml/v2 v2.1.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/sagikazarmark/locafero v0.4.0 // indirect + github.com/sagikazarmark/slog-shim v0.1.0 // indirect + github.com/sourcegraph/conc v0.3.0 // indirect + github.com/spf13/afero v1.11.0 // indirect + github.com/spf13/cast v1.6.0 // indirect + github.com/subosito/gotenv v1.6.0 // indirect + go.uber.org/atomic v1.9.0 // indirect + go.uber.org/multierr v1.9.0 // indirect + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect + golang.org/x/sys v0.15.0 // indirect + golang.org/x/text v0.14.0 // indirect + gopkg.in/ini.v1 v1.67.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..45cc570 --- /dev/null +++ b/go.sum @@ -0,0 +1,73 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= +github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= +github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/openark/golib v0.0.0-20210531070646-355f37940af8 h1:9ciIHNuyFqRWi9NpMNw9sVLB6z1ItpP5ZhTY9Q1xVu4= +github.com/openark/golib v0.0.0-20210531070646-355f37940af8/go.mod h1:1jj8x1eDVZxgc/Z4VyamX4qTbAdHPUQA6NeVtCd8Sl8= +github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4= +github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= +github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= +github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= +github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= +github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= +github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= +github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= +github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= +github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= +github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.18.1 h1:rmuU42rScKWlhhJDyXZRKJQHXFX02chSVW1IvkPGiVM= +github.com/spf13/viper v1.18.1/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= +go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= +go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= +golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= +golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc= +golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= +gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/configuration/configuration.go b/internal/configuration/configuration.go new file mode 100644 index 0000000..c6ea73e --- /dev/null +++ b/internal/configuration/configuration.go @@ -0,0 +1,95 @@ +package configuration + +import ( + "errors" + "fmt" + "os" + "time" + + "github.com/spf13/pflag" + "github.com/spf13/viper" +) + +type Config struct { + LogLevel string `mapstructure:"log_level"` + Credentials string `mapstructure:"credentials"` + Databases map[string]struct { + Address string `mapstructure:"address"` + Schema string `mapstructure:"schema"` + Username string `mapstructure:"username"` + Password string `mapstructure:"password"` + ReplicaLagLimit time.Duration `mapstructure:"replica_lag_limit"` + HLLLimit int `mapstructure:"hll_limit"` + Interval time.Duration `mapstructure:"interval"` + LongQueryLimit time.Duration `mapstructure:"long_query_limit"` + } `mapstructure:"databases"` +} + +func Configure() (*Config, error) { + if file := os.Getenv("SNIPER_CONFIG_FILE"); file != "" { + // if the config file path is specified in the env, load that + viper.SetConfigFile(file) + } else { + // otherwise setup some default locations + viper.SetConfigName("config") + viper.SetConfigType("yaml") + viper.AddConfigPath(".") + viper.AddConfigPath("configs") + } + + // read the config file, if it exists. if not, keep on truckin' + if err := viper.ReadInConfig(); err != nil { + errVal := viper.ConfigFileNotFoundError{} + if ok := errors.As(err, &errVal); !ok { + return nil, err + } + } + + // load the credentials config and merge it into the existing configuration. + if file := os.Getenv("SNIPER_CREDS_FILE"); file != "" { + viper.SetConfigFile(file) + + err := viper.MergeInConfig() + if err != nil { + return nil, err + } + } else { + if creds := viper.GetViper().GetString("credentials"); creds != "" { + viper.SetConfigFile(creds) + + err := viper.MergeInConfig() + if err != nil { + return nil, err + } + } + } + + pflag.Bool("show-config", false, "Dump the configuration for debugging") + + err := pflag.CommandLine.MarkHidden("show-config") + if err != nil { + return nil, err + } + + pflag.Parse() + + err = viper.BindPFlags(pflag.CommandLine) + if err != nil { + return nil, err + } + + // we are only dumping the config if the secret flag show-config is specified, because the config + // contains the proxysql admin password + if viper.GetViper().GetBool("show-config") { + fmt.Println("settings", viper.GetViper().AllSettings()) + } + + settings := &Config{} + + err = viper.Unmarshal(settings) + if err != nil { + return nil, err + } + + return settings, nil +} diff --git a/internal/configuration/configuration_test.go b/internal/configuration/configuration_test.go new file mode 100644 index 0000000..baf1ab3 --- /dev/null +++ b/internal/configuration/configuration_test.go @@ -0,0 +1,41 @@ +package configuration + +import ( + "os" + "path/filepath" + "testing" + "time" + + "github.com/spf13/viper" + "github.com/stretchr/testify/assert" +) + +func TestConfigFile(t *testing.T) { + viper.Reset() + + currentDir, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + + // Get the absolute path to the test config files, to prevent copying them around in the test. + configFilePath := filepath.Join(currentDir, "../../test/test_config.yaml") + credentialsFilePath := filepath.Join(currentDir, "../../test/test_credentials.yaml") + + t.Setenv("SNIPER_CONFIG_FILE", configFilePath) + t.Setenv("SNIPER_CREDS_FILE", credentialsFilePath) + + config, err := Configure() + assert.NoError(t, err, "Configuration should not return an error") + + assert.Equal(t, config.Credentials, "test/test_credentials.yaml") + + assert.Equal(t, "127.0.0.1:3306", config.Databases["test_primary"].Address) + assert.Equal(t, "primary_user", config.Databases["test_primary"].Username) + assert.Equal(t, 15*time.Minute, config.Databases["test_primary"].ReplicaLagLimit) + assert.Equal(t, 60*time.Second, config.Databases["test_primary"].LongQueryLimit) + + assert.Equal(t, "test_schema", config.Databases["test_primary"].Schema) + assert.Equal(t, "replica_pass", config.Databases["test_replica"].Password) + assert.Equal(t, 120, config.Databases["test_replica"].HLLLimit) +} diff --git a/internal/sniper/sniper.go b/internal/sniper/sniper.go new file mode 100644 index 0000000..7e565db --- /dev/null +++ b/internal/sniper/sniper.go @@ -0,0 +1,323 @@ +package sniper + +import ( + "bytes" + "context" + "database/sql" + "fmt" + "log/slog" + "strings" + "sync" + "text/template" + "time" + + // Import the mysql driver functionality. + _ "github.com/go-sql-driver/mysql" + "github.com/openark/golib/sqlutils" + "github.com/persona-id/query-sniper/internal/configuration" +) + +type QuerySniper struct { + // The name will just be the database name from the config. + Name string + Connection *sql.DB + ReplicaLagLimit time.Duration + HLLLimit int + CheckLag bool + QueryLimit time.Duration + Interval time.Duration + Schema string + LRQQuery string +} + +type MysqlProcess struct { + ID int `db:"ID"` + DB sql.NullString `db:"DB"` + State sql.NullString `db:"STATE"` + Command string `db:"COMMAND"` + Time int `db:"TIME"` + Info sql.NullString `db:"INFO"` +} + +// Sniper constructor +// +// Parameters: +// - name: name of the sniper, the value is the databases key from the config by default. +// - *configuration.Config: the Viper config struct with all of the application configuration +// including database specific values. +// +// Returns: +// - QuerySniper: the new sniper with the correct configuration, or a blank struct on error. +// - error: any errors that occur, or nil. +func New(name string, settings *configuration.Config) (QuerySniper, error) { + // setch all database specific config via key name + dbConfig := settings.Databases[name] + dsn := fmt.Sprintf("%s:%s@tcp(%s)/", dbConfig.Username, dbConfig.Password, dbConfig.Address) + + db, err := sql.Open("mysql", dsn) + if err != nil { + return QuerySniper{}, err + } + + sniper := QuerySniper{ + Name: name, + Connection: db, + ReplicaLagLimit: dbConfig.ReplicaLagLimit, + HLLLimit: dbConfig.HLLLimit, + QueryLimit: dbConfig.LongQueryLimit, + Interval: dbConfig.Interval, + Schema: dbConfig.Schema, + } + + query, err := sniper.generateHunterQuery() + if err != nil { + return QuerySniper{}, err + } + + sniper.LRQQuery = query + + slog.Info("Created new sniper", + slog.String("name", sniper.Name), + slog.String("address", dbConfig.Address), + slog.String("schema", sniper.Schema), + slog.String("username", dbConfig.Username), + slog.Duration("lag_limit", sniper.ReplicaLagLimit), + slog.Int("hll_limit", sniper.HLLLimit), + slog.Duration("query_limit", sniper.QueryLimit), + slog.Duration("interval", sniper.Interval), + slog.String("hunt_query", sniper.LRQQuery), + ) + + return sniper, nil +} + +// Process the configuration file and construct all required QuerySnipers from it. +// +// Parameters: +// - context.Context: the background context for the sniper to use. +// - *configuration.Config: settings struct, which is loaded from the config files by go-viper. +func Run(ctx context.Context, settings *configuration.Config) { + var wg sync.WaitGroup + + for dbName := range settings.Databases { + // FIXME: maybe dont pass in all of settings? + sniper, err := New(dbName, settings) + if err != nil { + slog.Error("Error in createSnipers()", slog.Any("err", err)) + continue + } + + wg.Add(1) + + go func(ctx context.Context, s *QuerySniper) { + defer wg.Done() + s.Loop(ctx) + }(ctx, &sniper) + } + + wg.Wait() +} + +// Loop that runs in the background and checks for lag / kills queries and calls the Kill() command on queries +// that need to be removed. +// +// Parameters: +// - context.Context: the background context for the sniper to use in the loop. +func (sniper QuerySniper) Loop(ctx context.Context) { + ticker := time.NewTicker(sniper.Interval) + + for { + select { + case <-ctx.Done(): + ticker.Stop() + return + + case <-ticker.C: + lagged, err := sniper.DetectLag() + if err != nil { + slog.Error("Error in sniper.DetectLag()", slog.String("instance", sniper.Name), slog.Any("err", err)) + } + + if lagged { + slog.Warn("Lag detected, running sniper") + + queries, err := sniper.GetLongRunningQueries() + if err != nil { + slog.Error("Error in sniper.GetLongRunningQueries()", slog.Any("err", err)) + } + + slog.Info("LRQS", slog.Any("Queries", queries)) + + sniper.KillProcesses(queries) + } else { + slog.Debug("No lag detected, sleeping", slog.Duration("interval", sniper.Interval)) + } + } + } +} + +// Check for either elevated history list length or replica lag. If the instance is not a replica, +// the replica lag check should return false. +// See more info at: https://lefred.be/content/a-graph-a-day-keeps-the-doctor-away-mysql-history-list-length/ +// +// Returns: +// - bool: true if either HLL or replica lag is elevated, based on the configuration values of the sniper; returns false on error. +// - error: any errors that occur, or nil. +func (sniper QuerySniper) DetectLag() (bool, error) { + // HLL specific + query := "select count from information_schema.innodb_metrics where name = 'trx_rseg_history_len'" + + rows, err := sniper.Connection.Query(query) + if err != nil { + return false, err + } + defer rows.Close() + + hllCount := 0 + + err = sniper.Connection.QueryRow(query).Scan(&hllCount) + if err != nil { + return false, err + } + + if hllCount > sniper.HLLLimit { + return true, nil + } + + // Now check for replica lag + lag, err := sniper.GetReplicationLagFromSlaveStatus() + if err != nil { + return false, err + } + + if lag > sniper.ReplicaLagLimit { + return true, nil + } + + return false, nil +} + +// Get the value of replication lag, if it exists. +// This function was copied from https://github.com/github/gh-ost/ because it makes this tedious process +// much easier. Since you can't just do "select replication_lag from information_schema" or the like, this +// would required a LOT of extra code to parse the value. +// +// Returns: +// - time.Duration: replication lag in seconds, or nil on error +// - error: any errors that occur, or nil +func (sniper QuerySniper) GetReplicationLagFromSlaveStatus() (replicationLag time.Duration, err error) { + err = sqlutils.QueryRowsMap(sniper.Connection, `show slave status`, func(m sqlutils.RowMap) error { + slaveIORunning := m.GetString("Slave_IO_Running") + slaveSQLRunning := m.GetString("Slave_SQL_Running") + secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master") + + if !secondsBehindMaster.Valid { + return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=%+v", slaveIORunning, slaveSQLRunning) + } + + replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second + return nil + }) + + return replicationLag, err +} + +// Finds all long running queries, using the query generated by generateHunterQuery(). +// +// Returns: +// - []MysqlProcess: list of long running processes, or a blank list in the event of an error. +// - error: any errors that occur, or nil. +func (sniper QuerySniper) GetLongRunningQueries() ([]MysqlProcess, error) { + rows, err := sniper.Connection.Query(sniper.LRQQuery) + if err != nil { + return []MysqlProcess{}, err + } + defer rows.Close() + + var processes []MysqlProcess + + for rows.Next() { + var process MysqlProcess + + err := rows.Scan(&process.ID, &process.DB, &process.State, &process.Command, &process.Time, &process.Info) + if err != nil { + return []MysqlProcess{}, err + } + + processes = append(processes, process) + } + + return processes, nil +} + +// Kills all of the processes in the []MysqlProcess list parameter. +// +// Parameters: +// - []MysqlProcess, list of slow queries that need to be killed. +// +// Returns: +// - count of killed queries. +func (sniper QuerySniper) KillProcesses(processes []MysqlProcess) int { + killed := 0 + + for _, process := range processes { + if process.ID <= 0 { + continue + } + + killQuery := fmt.Sprintf("KILL %d", process.ID) + + _, err := sniper.Connection.Exec(killQuery) + if err != nil { + // We log here, rather than returning err, because we don't want to stop processing all of the other queries. + slog.Error("Error killing process ID", slog.Int("process_id", process.ID), slog.Any("err", err)) + continue + } + + killed++ + } + + return killed +} + +// Generate the query used to find long running queries; uses templating to interpolate values. +// +// Returns: +// - string: the mysql query for the sniper to use +// - error: any errors that occur, or nil +func (sniper QuerySniper) generateHunterQuery() (string, error) { + tmpl := template.Must(template.New("query hunter").Parse(` + SELECT ID, DB, STATE, COMMAND, TIME, INFO + FROM information_schema.PROCESSLIST + WHERE COMMAND NOT IN ('Sleep', 'Killed') + AND INFO NOT LIKE '%PROCESSLIST%' + AND DB IS NOT NULL + {{.TimeFilter}} + {{.DBFilter}} + ORDER BY TIME DESC`)) + + type QueryParams struct { + TimeFilter string + DBFilter string + } + + // convert the duration into seconds for use in the query + timeout := sniper.QueryLimit.Seconds() + + params := QueryParams{ + TimeFilter: fmt.Sprintf("AND TIME >= %d", int(timeout)), + // FIXME: make this able to support multiple databases with IN? + DBFilter: fmt.Sprintf("AND DB in ('%s')", sniper.Schema), + } + + var queryBytes bytes.Buffer + + err := tmpl.Execute(&queryBytes, params) + if err != nil { + return "", err + } + + result := strings.Join(strings.Fields(queryBytes.String()), " ") + + return result, nil +} diff --git a/internal/sniper/sniper_test.go b/internal/sniper/sniper_test.go new file mode 100644 index 0000000..3dcca0e --- /dev/null +++ b/internal/sniper/sniper_test.go @@ -0,0 +1 @@ +package sniper diff --git a/test/test_config.yaml b/test/test_config.yaml new file mode 100644 index 0000000..a27df0c --- /dev/null +++ b/test/test_config.yaml @@ -0,0 +1,18 @@ +--- +credentials: test/test_credentials.yaml +log_level: DEBUG +databases: + test_primary: + address: 127.0.0.1:3306 + schema: test_schema + replica_lag_limit: 15m + hll_limit: 60 + long_query_limit: 60s + interval: 5s + test_replica: + address: 127.0.0.1:3307 + schema: test_schema + replica_lag_limit: 5m + hll_limit: 120 + long_query_limit: 1m + interval: 1m diff --git a/test/test_credentials.yaml b/test/test_credentials.yaml new file mode 100644 index 0000000..8e69ebb --- /dev/null +++ b/test/test_credentials.yaml @@ -0,0 +1,8 @@ +--- +databases: + test_primary: + username: primary_user + password: primary_pass + test_replica: + username: replica_user + password: replica_pass