Skip to content

Commit

Permalink
document parser
Browse files Browse the repository at this point in the history
  • Loading branch information
facundoolano committed Jul 30, 2024
1 parent b63ae07 commit fc9afe5
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
2 changes: 1 addition & 1 deletion main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ xx.xx.xx.xx [2024-07-24T00:00:51+00:00] jorge.olano.dev /var/www/jorge jorge.ola
}

func TestMultipleLogFiles(t *testing.T) {
// TODO
// TODO implement test
// more than one file in a dir, honoring the glob pattern
// include gzipped value
}
Expand Down
19 changes: 16 additions & 3 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,16 @@ import (
const LOG_DATE_LAYOUT = "02/Jan/2006:15:04:05 -0700"

type LogParser struct {
// The regular expression pattern used to extract fields from log entries.
// Derived from a format string.
formatRegex *regexp.Regexp
Fields []*LogField
// The list of fields that can be expected to be extracted from an entry by this parser.
// Results from the known fields in the format variables, plus their derived fields.
// The parser result values will be in the same order as in this slice.
Fields []*LogField
}

// Returns a new parser instance prepared to process logs in the given format.
func NewParser(format string) *LogParser {
parser := LogParser{
formatRegex: formatToRegex(format),
Expand Down Expand Up @@ -48,8 +54,10 @@ func NewParser(format string) *LogParser {
return &parser
}

// Parse the fields in the nginx access logs since the `until` time, passing them as a map into the `processFun`.
// Parse the fields in the nginx access logs since the `until` time, passing them as a slice to the `processFun`,
// in the same order as they appear in `parser.Fields1`.
// Processing is interrupted when a log older than `until` is found.
// Files with '.gz' extension are gzip decompressed before processing; the rest are assumed to be plain text.
func (parser LogParser) Parse(
logFiles []string,
until *time.Time,
Expand Down Expand Up @@ -110,7 +118,9 @@ func (parser LogParser) Parse(
return nil
}

// TODO
// Constructs a regular expression from the given format string, converting known variable names
// as expressed in nginx log format expressions (e.g. `$remote_addr`) into named capture groups
// (e.g. `(?P<remote_addr>\S+)`).
func formatToRegex(format string) *regexp.Regexp {
chars := []rune(format)
var newFormat string
Expand Down Expand Up @@ -156,6 +166,9 @@ func isVariableNameRune(char rune) bool {
return (char >= 'a' && char <= 'z') || char == '_' || (char >= '0' && char <= '9')
}

// Parses the given `line` according to the given `pattern`, passing captured variables
// to parser and derived parser functions as specified by the corresponding LogField.
// Extracted fields are returned as maps with field.ColumnName as key.
func parseLogLine(pattern *regexp.Regexp, line string) (map[string]string, error) {
match := pattern.FindStringSubmatch(line)
if match == nil {
Expand Down

0 comments on commit fc9afe5

Please sign in to comment.