From 59b653f95f17f85350ad54740bbbeaf2456aca07 Mon Sep 17 00:00:00 2001 From: facundo Date: Mon, 29 Jul 2024 16:46:11 -0300 Subject: [PATCH 1/6] add failing format test --- main.go | 2 -- main_test.go | 77 +++++++++++++++++++++++++++++----------------------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/main.go b/main.go index 3859ab0..ec32d1c 100644 --- a/main.go +++ b/main.go @@ -31,8 +31,6 @@ var NowTimeFun = time.Now // overridable with NGTOP_LOGS_PATH env var const DEFAULT_PATH_PATTERN = "/var/log/nginx/access.log*" const DEFAULT_DB_PATH = "./ngtop.db" - -// TODO replace with 'combined' once alias support is added const DEFAULT_LOG_FORMAT = `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"` func main() { diff --git a/main_test.go b/main_test.go index 1d6c322..f060cee 100644 --- a/main_test.go +++ b/main_test.go @@ -107,11 +107,11 @@ xx.xx.xx.xx - - [24/Jul/2024:00:06:41 +0000] "GET /blog/a-note-on-essential-comp xx.xx.xx.xx - - [24/Jul/2024:00:06:41 +0000] "GET /blog/posdata-de-borges-y-bioy HTTP/1.1" 301 169 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9"` func TestBasicQuery(t *testing.T) { - columns, rows := runCommand(t, SAMPLE_LOGS, []string{}) + columns, rows := runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{}) assertEqual(t, columns, []string{"#reqs"}) assertEqual(t, rows[0][0], "11") - columns, rows = runCommand(t, SAMPLE_LOGS, []string{"url"}) + columns, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url"}) assertEqual(t, columns, []string{"path", "#reqs"}) assertEqual(t, len(rows), 5) assertEqual(t, rows[0], []string{"/feed.xml", "3"}) @@ -122,36 +122,36 @@ func TestBasicQuery(t *testing.T) { } func TestDateFiltering(t *testing.T) { - _, rows := runCommand(t, SAMPLE_LOGS, []string{}) + _, rows := runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{}) assertEqual(t, rows[0][0], "11") - _, rows = runCommand(t, SAMPLE_LOGS, []string{"-s", "1m"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"-s", "1m"}) assertEqual(t, rows[0][0], "3") - _, rows = runCommand(t, SAMPLE_LOGS, []string{"-u", "1m"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"-u", "1m"}) assertEqual(t, rows[0][0], "8") - _, rows = runCommand(t, SAMPLE_LOGS, []string{"-s", "4m", "-u", "1m"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"-s", "4m", "-u", "1m"}) assertEqual(t, rows[0][0], "1") - _, rows = runCommand(t, SAMPLE_LOGS, []string{"-s", "1h"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"-s", "1h"}) assertEqual(t, rows[0][0], "11") - _, rows = runCommand(t, SAMPLE_LOGS, []string{"-u", "1h"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"-u", "1h"}) assertEqual(t, rows[0][0], "0") } func TestLimit(t *testing.T) { - _, rows := runCommand(t, SAMPLE_LOGS, []string{"url"}) + _, rows := runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url"}) assertEqual(t, len(rows), 5) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-l", "3"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-l", "3"}) assertEqual(t, len(rows), 3) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-l", "10"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-l", "10"}) assertEqual(t, len(rows), 8) // not that many distinct urls } func TestMultiField(t *testing.T) { - columns, rows := runCommand(t, SAMPLE_LOGS, []string{"url", "method"}) + columns, rows := runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "method"}) assertEqual(t, columns, []string{"path", "method", "#reqs"}) assertEqual(t, len(rows), 5) assertEqual(t, rows[0], []string{"/feed.xml", "GET", "3"}) @@ -160,19 +160,19 @@ func TestMultiField(t *testing.T) { assertEqual(t, rows[3][1], "GET") assertEqual(t, rows[4][1], "GET") - columns, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "status"}) + columns, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "status"}) assertEqual(t, columns, []string{"path", "status", "#reqs"}) assertEqual(t, len(rows), 5) assertEqual(t, rows[0], []string{"/feed.xml", "200", "3"}) assertEqual(t, rows[1], []string{"/feed", "301", "2"}) - columns, rows = runCommand(t, SAMPLE_LOGS, []string{"method", "status"}) + columns, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"method", "status"}) assertEqual(t, columns, []string{"method", "status", "#reqs"}) assertEqual(t, len(rows), 2) assertEqual(t, rows[0], []string{"GET", "301", "6"}) assertEqual(t, rows[1], []string{"GET", "200", "5"}) - columns, rows = runCommand(t, SAMPLE_LOGS, []string{"status", "method"}) + columns, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"status", "method"}) assertEqual(t, columns, []string{"status", "method", "#reqs"}) assertEqual(t, len(rows), 2) assertEqual(t, rows[0], []string{"301", "GET", "6"}) @@ -180,65 +180,74 @@ func TestMultiField(t *testing.T) { } func TestWhereFilter(t *testing.T) { - columns, rows := runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status=200"}) + columns, rows := runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status=200"}) assertEqual(t, columns, []string{"path", "#reqs"}) assertEqual(t, len(rows), 3) assertEqual(t, rows[0], []string{"/feed.xml", "3"}) assertEqual(t, rows[1][1], "1") assertEqual(t, rows[2][1], "1") - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status=301", "-l", "10"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status=301", "-l", "10"}) assertEqual(t, len(rows), 5) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "method=GET"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "method=GET"}) assertEqual(t, len(rows), 5) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "method=get"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "method=get"}) assertEqual(t, len(rows), 5) } func TestWhereMultipleValues(t *testing.T) { - _, rows := runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status=200", "-w", "status=301"}) + _, rows := runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status=200", "-w", "status=301"}) assertEqual(t, len(rows), 5) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status=200", "-w", "status=301", "-l", "10"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status=200", "-w", "status=301", "-l", "10"}) assertEqual(t, len(rows), 8) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "ua=feedi"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "ua=feedi"}) assertEqual(t, len(rows), 2) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "ua=feedi", "-w", "status=200"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "ua=feedi", "-w", "status=200"}) assertEqual(t, len(rows), 1) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "ua=feedi", "-w", "status=200", "-w", "status=301"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "ua=feedi", "-w", "status=200", "-w", "status=301"}) assertEqual(t, len(rows), 2) } func TestWherePattern(t *testing.T) { - _, rows := runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "url=/feed%"}) + _, rows := runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "url=/feed%"}) assertEqual(t, len(rows), 2) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "url=/blog/%"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "url=/blog/%"}) assertEqual(t, len(rows), 5) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status=3%"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status=3%"}) assertEqual(t, len(rows), 5) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status=2%"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status=2%"}) assertEqual(t, len(rows), 3) } func TestWhereNegation(t *testing.T) { - _, rows := runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status!=200", "-l", "10"}) + _, rows := runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status!=200", "-l", "10"}) assertEqual(t, len(rows), 5) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status!=301", "-l", "10"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status!=301", "-l", "10"}) assertEqual(t, len(rows), 3) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status!=2%", "-l", "10"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status!=2%", "-l", "10"}) assertEqual(t, len(rows), 5) - _, rows = runCommand(t, SAMPLE_LOGS, []string{"url", "-w", "status!=3%", "-l", "10"}) + _, rows = runCommand(t, DEFAULT_LOG_FORMAT, SAMPLE_LOGS, []string{"url", "-w", "status!=3%", "-l", "10"}) assertEqual(t, len(rows), 3) } +func TestCustomFormat(t *testing.T) { + format := `$remote_addr [$time_iso8601] $server_name $document_root $host $uri $content_type` + sample := `xx.xx.xx.xx [2024-07-29T19:38:49+00:00] jorge.olano.dev /var/www/jorge jorge.olano.dev /index.html - +xx.xx.xx.xx [2024-07-29T19:38:49+00:00] jorge.olano.dev /var/www/jorge jorge.olano.dev /assets/css/main.css -` + columns, rows := runCommand(t, format, sample, []string{}) + assertEqual(t, columns, []string{"#reqs"}) + assertEqual(t, rows[0][0], "2") +} + func TestMultipleLogFiles(t *testing.T) { // TODO // more than one file in a dir, honoring the glob pattern @@ -249,7 +258,7 @@ func TestMultipleLogFiles(t *testing.T) { // ------ HELPERS -------- -func runCommand(t *testing.T, logs string, cliArgs []string) ([]string, [][]string) { +func runCommand(t *testing.T, format string, logs string, cliArgs []string) ([]string, [][]string) { // write the logs to a temp file, and point the NGTOP_LOGS_PATH env to it logFile, err := os.CreateTemp("", "access.log") assertEqual(t, err, nil) @@ -265,7 +274,7 @@ func runCommand(t *testing.T, logs string, cliArgs []string) ([]string, [][]stri os.Args = append([]string{"ngtop"}, cliArgs...) _, spec := querySpecFromCLI() - parser := NewParser(DEFAULT_LOG_FORMAT) + parser := NewParser(format) dbs, err := InitDB(dbFile.Name(), parser.Fields) assertEqual(t, err, nil) defer dbs.Close() From 6b038b480d429daa8c06feaf3570a133dce59db5 Mon Sep 17 00:00:00 2001 From: facundo Date: Mon, 29 Jul 2024 16:46:37 -0300 Subject: [PATCH 2/6] fix variable name parsing --- parser.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/parser.go b/parser.go index 5320f99..89cbd29 100644 --- a/parser.go +++ b/parser.go @@ -135,7 +135,7 @@ func formatToRegex(format string) *regexp.Regexp { } else { // found a varname, process it varname := "" - for j := i + 1; j < len(format) && ((chars[j] >= 'a' && chars[j] <= 'z') || chars[j] == '_'); j++ { + for j := i + 1; j < len(format) && isVariableNameRune(chars[j]); j++ { varname += string(chars[j]) } i += len(varname) @@ -163,6 +163,10 @@ func formatToRegex(format string) *regexp.Regexp { return regexp.MustCompile(newFormat) } +func isVariableNameRune(char rune) bool { + return (char >= 'a' && char <= 'z') || char == '_' || (char >= '0' && char <= '9') +} + func parseLogLine(pattern *regexp.Regexp, line string) (map[string]string, error) { match := pattern.FindStringSubmatch(line) if match == nil { From 8770ddc3da909e558792e8a98306c2f4356a6633 Mon Sep 17 00:00:00 2001 From: facundo Date: Mon, 29 Jul 2024 16:51:41 -0300 Subject: [PATCH 3/6] add iso time field --- fields.go | 14 ++++++++++++++ main_test.go | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/fields.go b/fields.go index 7f705e3..a07b361 100644 --- a/fields.go +++ b/fields.go @@ -33,6 +33,12 @@ var KNOWN_FIELDS = []LogField{ ColumnSpec: "TIMESTAMP NOT NULL", Parse: parseTime, }, + { + LogFormatVar: "time_iso8601", + ColumnName: "time", + ColumnSpec: "TIMESTAMP NOT NULL", + Parse: parseIsoTime, + }, { LogFormatVar: "request", ColumnName: "request_raw", @@ -136,6 +142,14 @@ func parseTime(timestamp string) string { return t.Format(DB_DATE_LAYOUT) } +func parseIsoTime(timestamp string) string { + t, err := time.Parse("2006-01-02T15:04:05-07:00", timestamp) + if err != nil { + panic("can't parse log timestamp " + timestamp) + } + return t.Format(DB_DATE_LAYOUT) +} + func parseRequestDerivedFields(request string) map[string]string { result := make(map[string]string) request_parts := strings.Split(request, " ") diff --git a/main_test.go b/main_test.go index f060cee..06ba609 100644 --- a/main_test.go +++ b/main_test.go @@ -241,8 +241,8 @@ func TestWhereNegation(t *testing.T) { func TestCustomFormat(t *testing.T) { format := `$remote_addr [$time_iso8601] $server_name $document_root $host $uri $content_type` - sample := `xx.xx.xx.xx [2024-07-29T19:38:49+00:00] jorge.olano.dev /var/www/jorge jorge.olano.dev /index.html - -xx.xx.xx.xx [2024-07-29T19:38:49+00:00] jorge.olano.dev /var/www/jorge jorge.olano.dev /assets/css/main.css -` + sample := `xx.xx.xx.xx [2024-07-24T00:00:49+00:00] jorge.olano.dev /var/www/jorge jorge.olano.dev /index.html - +xx.xx.xx.xx [2024-07-24T00:00:51+00:00] jorge.olano.dev /var/www/jorge jorge.olano.dev /assets/css/main.css -` columns, rows := runCommand(t, format, sample, []string{}) assertEqual(t, columns, []string{"#reqs"}) assertEqual(t, rows[0][0], "2") From 8331087be0456d79e0e870e0387c1c79a9cd22c2 Mon Sep 17 00:00:00 2001 From: facundo Date: Mon, 29 Jul 2024 17:02:41 -0300 Subject: [PATCH 4/6] add a few more fields --- fields.go | 14 +++++++++++++- main_test.go | 9 +++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/fields.go b/fields.go index a07b361..99d25da 100644 --- a/fields.go +++ b/fields.go @@ -72,13 +72,25 @@ var KNOWN_FIELDS = []LogField{ ColumnName: "status", ColumnSpec: "INTEGER", }, + { + LogFormatVar: "uri", + CLINames: []string{"path", "url", "uri"}, + ColumnName: "path", + ColumnSpec: "TEXT", + }, + { + LogFormatVar: "host", + CLINames: []string{"host", "server"}, + ColumnName: "host", + ColumnSpec: "TEXT", + }, { CLINames: []string{"method"}, ColumnName: "method", ColumnSpec: "TEXT COLLATE NOCASE", }, { - CLINames: []string{"path", "url"}, + CLINames: []string{"path", "url", "uri"}, ColumnName: "path", ColumnSpec: "TEXT", }, diff --git a/main_test.go b/main_test.go index 06ba609..f5dd092 100644 --- a/main_test.go +++ b/main_test.go @@ -246,6 +246,15 @@ xx.xx.xx.xx [2024-07-24T00:00:51+00:00] jorge.olano.dev /var/www/jorge jorge.ola columns, rows := runCommand(t, format, sample, []string{}) assertEqual(t, columns, []string{"#reqs"}) assertEqual(t, rows[0][0], "2") + + columns, rows = runCommand(t, format, sample, []string{"uri"}) + assertEqual(t, columns, []string{"path", "#reqs"}) + assertEqual(t, len(rows), 2) + + columns, rows = runCommand(t, format, sample, []string{"host"}) + assertEqual(t, columns, []string{"host", "#reqs"}) + assertEqual(t, len(rows), 1) + assertEqual(t, rows[0][0], "jorge.olano.dev") } func TestMultipleLogFiles(t *testing.T) { From 71564436d0d866137a5d6de2e34146e36cdc6a3b Mon Sep 17 00:00:00 2001 From: facundo Date: Mon, 29 Jul 2024 17:05:08 -0300 Subject: [PATCH 5/6] remove duplicated init --- parser.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/parser.go b/parser.go index 89cbd29..ee6d2bf 100644 --- a/parser.go +++ b/parser.go @@ -12,18 +12,6 @@ import ( "time" ) -func init() { - for _, field := range KNOWN_FIELDS { - COLUMN_NAME_TO_FIELD[field.ColumnName] = &field - if field.LogFormatVar != "" { - LOGVAR_TO_FIELD[field.LogFormatVar] = &field - } - for _, name := range field.CLINames { - CLI_NAME_TO_FIELD[name] = &field - } - } -} - const LOG_DATE_LAYOUT = "02/Jan/2006:15:04:05 -0700" type LogParser struct { From 88f2f7155dc09f3f68753206f4b2991edeb86f9f Mon Sep 17 00:00:00 2001 From: facundo Date: Mon, 29 Jul 2024 17:54:10 -0300 Subject: [PATCH 6/6] fix mess between competing fields --- parser.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/parser.go b/parser.go index ee6d2bf..c983c99 100644 --- a/parser.go +++ b/parser.go @@ -27,13 +27,14 @@ func NewParser(format string) *LogParser { // pick the subset of fields deducted from the regex, plus their derived fields // use a map to remove duplicates fieldSubset := make(map[string]*LogField) - for _, name := range parser.formatRegex.SubexpNames() { - if name == "" { + for _, logvar := range parser.formatRegex.SubexpNames() { + if logvar == "" { continue } - fieldSubset[name] = COLUMN_NAME_TO_FIELD[name] + field := LOGVAR_TO_FIELD[logvar] + fieldSubset[field.ColumnName] = field - for _, derived := range COLUMN_NAME_TO_FIELD[name].DerivedFields { + for _, derived := range field.DerivedFields { fieldSubset[derived] = COLUMN_NAME_TO_FIELD[derived] } } @@ -129,13 +130,12 @@ func formatToRegex(format string) *regexp.Regexp { i += len(varname) // write the proper capture group to the format regex pattern - if field, isKnownField := LOGVAR_TO_FIELD[varname]; isKnownField { + if _, isKnownField := LOGVAR_TO_FIELD[varname]; isKnownField { // if the var matches a field we care to extract, use a named group - groupname := field.ColumnName if previousWasSpace { - newFormat += "(?P<" + groupname + ">\\S+)" + newFormat += "(?P<" + varname + ">\\S+)" } else { - newFormat += "(?P<" + groupname + ">.*?)" + newFormat += "(?P<" + varname + ">.*?)" } } else { // otherwise just add a nameless group that ensures matching @@ -162,13 +162,13 @@ func parseLogLine(pattern *regexp.Regexp, line string) (map[string]string, error } result := make(map[string]string) - for i, name := range pattern.SubexpNames() { - field := COLUMN_NAME_TO_FIELD[name] - if name != "" && match[i] != "-" { + for i, logvar := range pattern.SubexpNames() { + field := LOGVAR_TO_FIELD[logvar] + if logvar != "" && match[i] != "-" { if field.Parse != nil { - result[name] = field.Parse(match[i]) + result[field.ColumnName] = field.Parse(match[i]) } else { - result[name] = match[i] + result[field.ColumnName] = match[i] } if field.ParseDerivedFields != nil {