Skip to content

Commit

Permalink
Make InferString accept any valid JSON value
Browse files Browse the repository at this point in the history
  • Loading branch information
bombsimon committed Oct 2, 2023
1 parent fe1b071 commit 46856ca
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 12 deletions.
61 changes: 49 additions & 12 deletions inferrer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package jtdinfer

import (
"encoding/json"
"strconv"
)

// Inferrer represents the `InferredSchema` with its state combined with the
Expand Down Expand Up @@ -32,27 +33,63 @@ func (i *Inferrer) IntoSchema() Schema {
return i.Inference.IntoSchema(i.Hints)
}

// InferStrings accepts a slice of strings and will convert them to either a
// `map[string]any` or []any` and run inference on all the rows. If any of the
// rows are not valid JSON object or list, the inference up to that point is
// returned.
//
// If you need to infer simple values like strings or integers they can be
// passed directly to `Infer`.
// InferStrings accepts a slice of strings and will try to JSON unmarshal each
// row to the type that the first row looks like. If an error occurs the
// inferrer will return with the state it had when the error occurred.
// If you already have the type of your data such as a slice of numbers or a map
// of strings you can pass them directly to `Infer`. This is just a convenience
// method if all you got is strings.
func InferStrings(rows []string, hints *Hints) *Inferrer {
inferrer := NewInferrer(hints)
if len(rows) == 0 {
return inferrer
}

var (
firstRow = rows[0]
getToInfer func() any
)

switch {
case isBool(firstRow):
getToInfer = func() any { return false }
case isObject(firstRow):
getToInfer = func() any { return make(map[string]any) }
case isArray(firstRow):
getToInfer = func() any { return make([]any, 0) }
case isNumber(firstRow):
getToInfer = func() any { return 0.0 }
default:
getToInfer = func() any { return "" }
}

for _, row := range rows {
var toInfer any = make(map[string]any, 0)
toInfer := getToInfer()
if err := json.Unmarshal([]byte(row), &toInfer); err != nil {
toInfer = make([]any, 0)
if err := json.Unmarshal([]byte(row), &toInfer); err != nil {
return inferrer
}
return inferrer
}

inferrer = inferrer.Infer(toInfer)
}

return inferrer
}

func isBool(value string) bool {
return value == "true" || value == "false"
}

func isObject(value string) bool {
var m map[string]any
return json.Unmarshal([]byte(value), &m) == nil
}

func isArray(value string) bool {
var a []any
return json.Unmarshal([]byte(value), &a) == nil
}

func isNumber(value string) bool {
_, err := strconv.ParseFloat(value, 64)
return err == nil
}
124 changes: 124 additions & 0 deletions inferrer_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,137 @@
package jtdinfer

import (
"fmt"
"math"
"strconv"
"testing"
"time"

jtd "github.com/jsontypedef/json-typedef-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestInferString(t *testing.T) {
cases := []struct {
description string
values []string
expectedSchema Schema
}{
{
description: "boolean true value",
values: []string{"true"},
expectedSchema: Schema{
Type: jtd.TypeBoolean,
},
},
{
description: "boolean false value",
values: []string{"false"},
expectedSchema: Schema{
Type: jtd.TypeBoolean,
},
},
{
description: "object",
values: []string{`{"name":"Joe"}`},
expectedSchema: Schema{
Properties: map[string]Schema{
"name": {
Type: jtd.TypeString,
},
},
},
},
{
description: "array",
values: []string{`[1, 2, 3]`},
expectedSchema: Schema{
Elements: &Schema{
Type: jtd.TypeUint8,
},
},
},
{
description: "unsigned integer",
values: []string{"1"},
expectedSchema: Schema{
Type: jtd.TypeUint8,
},
},
{
description: "signed integer",
values: []string{"-1"},
expectedSchema: Schema{
Type: jtd.TypeInt8,
},
},
{
description: "signed max integer",
values: []string{strconv.Itoa(math.MinInt32)},
expectedSchema: Schema{
Type: jtd.TypeInt32,
},
},
{
description: "float without fraction",
values: []string{"1.0"},
expectedSchema: Schema{
Type: jtd.TypeUint8,
},
},
{
description: "positive float",
values: []string{"1.1"},
expectedSchema: Schema{
Type: jtd.TypeFloat64,
},
},
{
description: "negative float",
values: []string{"-1.1"},
expectedSchema: Schema{
Type: jtd.TypeFloat64,
},
},
{
description: "string",
values: []string{`"string"`},
expectedSchema: Schema{
Type: jtd.TypeString,
},
},
{
description: "number in string is still string",
values: []string{`"2.2"`},
expectedSchema: Schema{
Type: jtd.TypeString,
},
},
{
description: "timestamp",
values: []string{fmt.Sprintf(`"%s"`, time.Now().Format(time.RFC3339))},
expectedSchema: Schema{
Type: jtd.TypeTimestamp,
},
},
{
description: "null",
values: []string{"null"},
expectedSchema: Schema{
Nullable: true,
},
},
}

for _, tc := range cases {
t.Run(tc.description, func(t *testing.T) {
gotSchema := InferStrings(tc.values, NewHints()).IntoSchema()
assert.EqualValues(t, tc.expectedSchema, gotSchema)
})
}
}

func TestJTDInfer(t *testing.T) {
rows := []string{
`{"name": "Joe", "age": 42, "hobbies": ["code", "animals"]}`,
Expand Down

0 comments on commit 46856ca

Please sign in to comment.