Skip to content

Commit

Permalink
Merge pull request #309 from telenornms/feat/banfield_transformer
Browse files Browse the repository at this point in the history
Add banfield transformer
  • Loading branch information
kamilernerd authored Oct 23, 2024
2 parents f76c5fa + 0fae1f3 commit c32ee39
Show file tree
Hide file tree
Showing 5 changed files with 259 additions and 14 deletions.
13 changes: 13 additions & 0 deletions transformer/auto.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ func init() {
Alloc: func() interface{} { return &Replace{} },
Help: "Uses a regular expression to replace the content of a metadata key, storing it to either a different metadata key, or overwriting the original.",
})
Auto.Add(skogul.Module{
Name: "replacedata",
Aliases: []string{},
Alloc: func() interface{} { return &ReplaceData{} },
Help: "Uses a regular expression to replace the content of a data key, storing it to either a different data key, or overwriting the original.",
})
Auto.Add(skogul.Module{
Name: "switch",
Aliases: []string{},
Expand Down Expand Up @@ -122,4 +128,11 @@ func init() {
Help: "Ban values from nested structure using a path e.g. Path looking like this foo.bar.1 has a structure looking like this { foo: { bar: { 1: hello } } }. The last element in the path will get removed from the tree in this case 1: hello, you will end up having a tree looking like this { foo: { bar: {} } }.",
AutoMake: false,
})
Auto.Add(skogul.Module{
Name: "banfield",
Aliases: []string{},
Alloc: func() interface{} { return &BanField{} },
Help: "Remove single fields in a metric based on a regular expression criteria",
AutoMake: false,
})
}
75 changes: 75 additions & 0 deletions transformer/ban_field.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package transformer

import (
"fmt"
"github.com/telenornms/skogul"
"regexp"
"sync"
)

type BanField struct {
SourceData string `doc:"Data field to ban"`
RegexpData string `doc:"Regex to match value of source-data field"`
regexpData *regexp.Regexp
SourceMetadata string `doc:"Metadata field to ban"`
RegexpMetadata string `doc:"Regex to match value of source-metadata field"`
regexpMetadata *regexp.Regexp
errData error
errMetadata error
init sync.Once
}

func (b *BanField) Transform(c *skogul.Container) error {
b.init.Do(func() {
b.regexpData, b.errData = regexp.Compile(b.RegexpData)
b.regexpMetadata, b.errMetadata = regexp.Compile(b.RegexpMetadata)
})

if b.errData != nil {
return fmt.Errorf("unable to compile regexp `%s': %w", b.RegexpData, b.errData)
}
if b.errMetadata != nil {
return fmt.Errorf("unable to compile regexp `%s': %w", b.RegexpMetadata, b.errMetadata)
}

for _, metric := range c.Metrics {
if b.SourceData != "" {
if str, ok := metric.Data[b.SourceData]; ok {
if b.regexpData.Match([]byte(str.(string))) {
delete(metric.Data, b.SourceData)
}
}
}
if b.SourceMetadata != "" {
if str, ok := metric.Metadata[b.SourceMetadata]; ok {
if b.regexpMetadata.Match([]byte(str.(string))) {
delete(metric.Metadata, b.SourceMetadata)
}
}
}
}

return nil
}

func (b *BanField) Verify() error {
if b.SourceData != "" && b.RegexpData == "" {
return fmt.Errorf("regexpdata field has to have a value when sourcedata is provided")
}
if b.SourceMetadata != "" && b.RegexpMetadata == "" {
return fmt.Errorf("regexpmetadata field has to have a value when sourcemetadata is provided")
}

var err error

_, err = regexp.Compile(b.RegexpData)
if err != nil {
return fmt.Errorf("failed to compile regexp for regexpdata field %v %v", b.RegexpData, err)
}

_, err = regexp.Compile(b.RegexpMetadata)
if err != nil {
return fmt.Errorf("failed to compile regexp for regexpmetadata field %v %v", b.RegexpMetadata, err)
}
return nil
}
41 changes: 41 additions & 0 deletions transformer/ban_field_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package transformer_test

import (
"testing"

"github.com/telenornms/skogul"
"github.com/telenornms/skogul/transformer"
)

func TestBanField(t *testing.T) {
metric := skogul.Metric{}
metric.Metadata = make(map[string]interface{})
metric.Metadata["foofoo"] = "barBAR"
metric.Data = make(map[string]interface{})
metric.Data["foo"] = "BAR"
metric.Data["baz"] = "foobar"
c := skogul.Container{}
c.Metrics = []*skogul.Metric{&metric}

ban := transformer.BanField{
SourceData: "foo",
RegexpData: "BAR",
SourceMetadata: "foofoo",
RegexpMetadata: "barBAR",
}

t.Logf("Container before transform:\n%v", c)
err := ban.Transform(&c)
if err != nil {
t.Errorf("ban_field returned non-nil err: %v", err)
}

t.Logf("Container after transform:\n%v", c)

if _, ok := c.Metrics[0].Metadata["foofoo"]; ok {
t.Fatal("ban_field transformer failed to ban key-value pair")
}
if _, ok := c.Metrics[0].Data["foo"]; ok {
t.Fatal("ban_field transformer failed to ban key-value pair")
}
}
68 changes: 54 additions & 14 deletions transformer/cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,24 @@ import (
)

type Cast struct {
MetadataStrings []string `doc:"List of metadatafields that should be strings"`
MetadataInts []string `doc:"List of metadatafields that should be integers"`
MetadataFloats []string `doc:"List of metadatafields that should be 64-bit floats"`
MetadataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."`
MetadataIpToDec []string `doc:"List of metadatafields containing IP addresses that should be decimals"`
MetadataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."`
MetadataTopJson string `doc:"Metadata-field containing text-encoded JSON which will replace all other metadata after being decoded."`
DataStrings []string `doc:"List of datafields that should be strings"`
DataInts []string `doc:"List of datafields that should be integers"`
DataFloats []string `doc:"List of datafields that should be 64-bit floats"`
DataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."`
DataIpToDec []string `doc:"List of datafields containing IP addresses that should be decimals"`
DataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."`
DataTopJson string `doc:"Data-field containing text-encoded JSON which will replace all other data after being decoded."`
MetadataStrings []string `doc:"List of metadatafields that should be strings"`
MetadataInts []string `doc:"List of metadatafields that should be integers"`
MetadataFloats []string `doc:"List of metadatafields that should be 64-bit floats"`
MetadataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."`
MetadataIpToDec []string `doc:"List of metadatafields containing IP addresses that should be decimals"`
MetadataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."`
MetadataTopJson string `doc:"Metadata-field containing text-encoded JSON which will replace all other metadata after being decoded."`
DataStrings []string `doc:"List of datafields that should be strings"`
DataInts []string `doc:"List of datafields that should be integers"`
DataFloats []string `doc:"List of datafields that should be 64-bit floats"`
DataFlatFloats []string `doc:"List of metadatafields that are floats which should be expressed as plain, non-exponential numbers in text. E.g.: Large serial numbers will be written as plain numbers, not 1.1231215e+10. If the field is a non-float, it will be left as is."`
DataIpToDec []string `doc:"List of datafields containing IP addresses that should be decimals"`
DataJson []string `doc:"List of fields that will be json-decoded. E.g.: Original value is encoded as text string, but contains json."`
DataTopJson string `doc:"Data-field containing text-encoded JSON which will replace all other data after being decoded."`
DataBlobsToStrings []string `doc:"List of datafields containning blob (byte-array) values that should be strings"`
DataStringsToBlobs []string `doc:"List of datafields containing a string that should be blobs (byte-arrays)"`
MetadataBlobsToStrings []string `doc:"List of metadatafields containning blob (byte-array) values that should be strings"`
MetadataStringsToBlobs []string `doc:"List of metadatafields containing a string that should be blobs (byte-arrays)"`
}

// Transform enforces the Cast rules
Expand Down Expand Up @@ -95,6 +99,24 @@ func (cast *Cast) Transform(c *skogul.Container) error {

}
}
for _, value := range cast.DataBlobsToStrings {
if c.Metrics[mi].Data[value] != nil {
_, ok := c.Metrics[mi].Data[value].(string)
if ok {
continue
}
c.Metrics[mi].Data[value] = fmt.Sprintf("%s", c.Metrics[mi].Data[value])
}
}
for _, value := range cast.DataStringsToBlobs {
if c.Metrics[mi].Data[value] != nil {
cpy := fmt.Sprintf("%s", c.Metrics[mi].Data[value])
delete(c.Metrics[mi].Data, value)
c.Metrics[mi].Data[value] = make([]byte, len(cpy))
c.Metrics[mi].Data[value] = []byte(cpy)
}
}

for _, value := range cast.MetadataJson {
if c.Metrics[mi].Metadata[value] != nil {
tmp1, ok := c.Metrics[mi].Metadata[value].(string)
Expand Down Expand Up @@ -232,6 +254,24 @@ func (cast *Cast) Transform(c *skogul.Container) error {
c.Metrics[mi].Metadata[value] = cast.Inet6Aton(s)
}
}

for _, value := range cast.MetadataBlobsToStrings {
if c.Metrics[mi].Metadata[value] != nil {
_, ok := c.Metrics[mi].Metadata[value].(string)
if ok {
continue
}
c.Metrics[mi].Metadata[value] = fmt.Sprintf("%s", c.Metrics[mi].Metadata[value])
}
}
for _, value := range cast.MetadataStringsToBlobs {
if c.Metrics[mi].Metadata[value] != nil {
cpy := fmt.Sprintf("%s", c.Metrics[mi].Metadata[value])
delete(c.Metrics[mi].Metadata, value)
c.Metrics[mi].Metadata[value] = make([]byte, len(cpy))
c.Metrics[mi].Metadata[value] = []byte(cpy)
}
}
}
return nil
}
Expand Down
76 changes: 76 additions & 0 deletions transformer/edit.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,79 @@ func (replace *Replace) Verify() error {
skogul.Assert(regex != nil)
return nil
}

// ReplaceData is a copy of the Replace transformer but works on the Data field
// Replace executes a regular expression replacement of metric data.
type ReplaceData struct {
Source string `doc:"Data key to read from."`
Destination string `doc:"Data key to write to. Defaults to overwriting the source-key if left blank. Destination key will always be overwritten, e.g., even if the source key is missing, the key located at the destination will be removed."`
Regex string `doc:"Regular expression to match."`
Replacement string `doc:"Replacement text. Can also use $1, $2, etc to reference sub-matches. Defaults to empty string - remove matching items."`
regex *regexp.Regexp
once sync.Once
err error
}

// Transform executes the regular expression replacement
func (replace *ReplaceData) Transform(c *skogul.Container) error {
replace.once.Do(func() {
if replace.Destination == "" {
replace.Destination = replace.Source
}
replace.regex, replace.err = regexp.Compile(replace.Regex)
})
// Verify() should catch this, so there's no reasonable way this
// should happen. But in the off chance that a regex compiles on
// the first attempt but not the second.... (e.g.: some serious
// bugs). It will also catch our own bugs, if, for some reason, we
// manage to botch up Verify() under some corner case.
skogul.Assert(replace.err == nil)

for mi := range c.Metrics {
if c.Metrics[mi].Data == nil {
continue
}
if c.Metrics[mi].Data[replace.Source] == nil {
delete(c.Metrics[mi].Data, replace.Destination)
continue
}
// FIXME: This should be a type cast to allow working with
// both text strings (as per now) and []byte strings.
// Similar to what is done in the ban transformer.
str, ok := c.Metrics[mi].Data[replace.Source].(string)
if !ok {
// FIXME: What to do? It's tempting to copy the
// key, but that could mean multiple references to
// the same memory, which can create unexpected
// behavior if other transformers want to modify
// just one of the headers.
repLog.WithField("source", replace.Source).Printf("Unable to transform non-string field %s with content %v", replace.Source, c.Metrics[mi].Data[replace.Source])
// This is to confirm with the documentation and
// ensure that this isn't exploited by providing a
// bogus Source-field only to be able to provide a
// custom destination field.
delete(c.Metrics[mi].Data, replace.Destination)
continue
}
c.Metrics[mi].Data[replace.Destination] = string(replace.regex.ReplaceAll([]byte(str), []byte(replace.Replacement)))
}
return nil
}

// Verify checks that the required variables are set and that the regular
// expression compiles
func (replace *ReplaceData) Verify() error {
if replace.Source == "" {
return skogul.MissingArgument("Source")
}
if replace.Regex == "" {
return skogul.MissingArgument("Regex")
}
regex, err := regexp.Compile(replace.Regex)

if err != nil {
return fmt.Errorf("replace transformer regex `%s' didn't compile: %w", replace.Regex, err)
}
skogul.Assert(regex != nil)
return nil
}

0 comments on commit c32ee39

Please sign in to comment.