diff --git a/README.md b/README.md index 9b9bad0..7cc0671 100644 --- a/README.md +++ b/README.md @@ -187,6 +187,8 @@ Or use it as a lib in internal commands, it's the `gen` package ## Avro schema validation +### avrosv + A small Avro schema validation command-line utility is also available. This simple tool leverages the schema parsing functionality of the library, showing validation errors or optionally dumping parsed schemas to the console. It can be used in CI/CD pipelines to validate schema changes in a repository. @@ -223,6 +225,20 @@ Check the options and usage with `-h`: avrosv -h ``` +### Name Validation + +Avro names are validated according to the +[Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names). + +However, the official Java library does not validate said names accordingly, resulting to some files out in the wild +to have invalid names. Thus, this library has a configuration option to allow for these invalid names to be parsed. + +```go +avro.SkipNameValidation = true +``` + +Note that this variable is global, so ideally you'd need to unset it after you're done with the invalid schema. + ## Go Version Support This library supports the last two versions of Go. While the minimum Go version is diff --git a/ocf/ocf.go b/ocf/ocf.go index f258bde..137cc03 100644 --- a/ocf/ocf.go +++ b/ocf/ocf.go @@ -155,7 +155,7 @@ type encoderConfig struct { EncodingConfig avro.API } -// EncoderFunc represents an configuration function for Encoder. +// EncoderFunc represents a configuration function for Encoder. type EncoderFunc func(cfg *encoderConfig) // WithBlockLength sets the block length on the encoder. diff --git a/ocf/ocf_test.go b/ocf/ocf_test.go index 102a1da..5eee2bb 100644 --- a/ocf/ocf_test.go +++ b/ocf/ocf_test.go @@ -195,6 +195,43 @@ func TestDecoder_WithDeflate(t *testing.T) { assert.Equal(t, 1, count) } +func TestDecoder_InvalidName(t *testing.T) { + type record struct { + Hello int `avro:"hello"` + What string `avro:"what"` + } + want := record{ + What: "yes", + Hello: 1, + } + + f, err := os.Open("testdata/invalid-name.avro") + if err != nil { + t.Error(err) + return + } + t.Cleanup(func() { _ = f.Close() }) + + avro.SkipNameValidation = true + defer func() { avro.SkipNameValidation = false }() + + dec, err := ocf.NewDecoder(f) + require.NoError(t, err) + + var count int + for dec.HasNext() { + count++ + var got record + err = dec.Decode(&got) + + require.NoError(t, err) + assert.Equal(t, want, got) + } + + require.NoError(t, dec.Error()) + assert.Equal(t, 1, count) +} + func TestDecoder_WithDeflateHandlesInvalidData(t *testing.T) { f, err := os.Open("testdata/deflate-invalid-data.avro") if err != nil { diff --git a/ocf/testdata/invalid-name.avro b/ocf/testdata/invalid-name.avro new file mode 100644 index 0000000..cf8e249 Binary files /dev/null and b/ocf/testdata/invalid-name.avro differ diff --git a/reader.go b/reader.go index f2b74aa..0f34cb0 100644 --- a/reader.go +++ b/reader.go @@ -60,7 +60,7 @@ func (r *Reader) Reset(b []byte) *Reader { return r } -// ReportError record a error in iterator instance with current position. +// ReportError record an error in iterator instance with current position. func (r *Reader) ReportError(operation, msg string) { if r.Error != nil && !errors.Is(r.Error, io.EOF) { return diff --git a/schema.go b/schema.go index 084283c..aa3b1c5 100644 --- a/schema.go +++ b/schema.go @@ -1549,6 +1549,10 @@ func validateName(name string) error { return errors.New("name must be a non-empty") } + if SkipNameValidation { + return nil + } + if strings.IndexFunc(name[:1], invalidNameFirstChar) > -1 { return fmt.Errorf("invalid name %s", name) } diff --git a/schema_internal_test.go b/schema_internal_test.go index 7e029b2..36a3f1a 100644 --- a/schema_internal_test.go +++ b/schema_internal_test.go @@ -96,6 +96,16 @@ func TestProperties_PropGetsFromEmptySet(t *testing.T) { assert.Nil(t, p.Prop("test")) } +func TestName_InvalidNameFirstCharButValidationSkipped(t *testing.T) { + SkipNameValidation = true + t.Cleanup(func() { + SkipNameValidation = false + }) + + _, err := newName("+bar", "foo", nil) + assert.NoError(t, err) +} + func TestIsValidDefault(t *testing.T) { tests := []struct { name string diff --git a/schema_parse.go b/schema_parse.go index 2be5a37..76e21e1 100644 --- a/schema_parse.go +++ b/schema_parse.go @@ -15,6 +15,13 @@ import ( // DefaultSchemaCache is the default cache for schemas. var DefaultSchemaCache = &SchemaCache{} +// SkipNameValidation sets whether to skip name validation. +// Avro spec incurs a strict naming convention for names and aliases, however official Avro tools do not follow that +// More info: +// https://lists.apache.org/thread/39v98os6wdpyr6w31xdkz0yzol51fsrr +// https://github.com/apache/avro/pull/1995 +var SkipNameValidation = false + // Parse parses a schema string. func Parse(schema string) (Schema, error) { return ParseBytes([]byte(schema)) @@ -25,7 +32,7 @@ func ParseWithCache(schema, namespace string, cache *SchemaCache) (Schema, error return ParseBytesWithCache([]byte(schema), namespace, cache) } -// MustParse parses a schema string, panicing if there is an error. +// MustParse parses a schema string, panicking if there is an error. func MustParse(schema string) Schema { parsed, err := Parse(schema) if err != nil {