From 0054d7c299d6a09823812cf503674698c641fbdd Mon Sep 17 00:00:00 2001 From: Paul Mach Date: Tue, 9 Jan 2024 22:14:27 -0800 Subject: [PATCH 1/2] encoding/wkt: improve unmarshalling performance --- encoding/wkt/benchmarks_test.go | 115 +++++ encoding/wkt/testdata/polygon.json | 1 + encoding/wkt/unmarshal.go | 708 ++++++++++++++++++----------- encoding/wkt/unmarshal_test.go | 273 ++++++++++- 4 files changed, 821 insertions(+), 276 deletions(-) create mode 100644 encoding/wkt/benchmarks_test.go create mode 100644 encoding/wkt/testdata/polygon.json diff --git a/encoding/wkt/benchmarks_test.go b/encoding/wkt/benchmarks_test.go new file mode 100644 index 0000000..fd48ec0 --- /dev/null +++ b/encoding/wkt/benchmarks_test.go @@ -0,0 +1,115 @@ +package wkt + +import ( + "encoding/json" + "io/ioutil" + "testing" + + "github.com/paulmach/orb" +) + +func BenchmarkUnmarshalPoint(b *testing.B) { + var mp orb.MultiPolygon + loadJSON(b, "testdata/polygon.json", &mp) + + text := MarshalString(orb.Point{-81.60644531, 41.51377887}) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := Unmarshal(text) + if err != nil { + b.Fatalf("unexpected error: %v", err) + } + } +} + +func BenchmarkUnmarshalLineString_small(b *testing.B) { + ls := orb.LineString{{1, 2}, {3, 4}} + + text := MarshalString(ls) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := Unmarshal(text) + if err != nil { + b.Fatalf("unexpected error: %v", err) + } + } +} + +func BenchmarkUnmarshalLineString(b *testing.B) { + var mp orb.MultiPolygon + loadJSON(b, "testdata/polygon.json", &mp) + + text := MarshalString(orb.LineString(mp[0][0])) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := Unmarshal(text) + if err != nil { + b.Fatalf("unexpected error: %v", err) + } + } +} + +func BenchmarkUnmarshalPolygon(b *testing.B) { + var mp orb.MultiPolygon + loadJSON(b, "testdata/polygon.json", &mp) + + text := MarshalString(mp[0]) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := Unmarshal(text) + if err != nil { + b.Fatalf("unexpected error: %v", err) + } + } +} + +func BenchmarkUnmarshalMultiPolygon_small(b *testing.B) { + mp := orb.MultiPolygon{{{{1, 2}, {3, 4}}}, {{{5, 6}, {7, 8}}, {{1, 2}, {5, 4}}}} + + text := MarshalString(mp) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := Unmarshal(text) + if err != nil { + b.Fatalf("unexpected error: %v", err) + } + } +} + +func BenchmarkUnmarshalMultiPolygon(b *testing.B) { + var mp orb.MultiPolygon + loadJSON(b, "testdata/polygon.json", &mp) + + text := MarshalString(mp) + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := Unmarshal(text) + if err != nil { + b.Fatalf("unexpected error: %v", err) + } + } +} + +func loadJSON(tb testing.TB, filename string, obj interface{}) { + data, err := ioutil.ReadFile(filename) + if err != nil { + tb.Fatalf("failed to load mvt file: %v", err) + } + + err = json.Unmarshal(data, obj) + if err != nil { + tb.Fatalf("unmarshal error: %v", err) + } +} diff --git a/encoding/wkt/testdata/polygon.json b/encoding/wkt/testdata/polygon.json new file mode 100644 index 0000000..9505d83 --- /dev/null +++ b/encoding/wkt/testdata/polygon.json @@ -0,0 +1 @@ +[[[[-81.60644531,41.51377887],[-81.60639143,41.51373958],[-81.60644531,41.51369176],[-81.60644531,41.51377887]]],[[[-81.60644531,41.51367126],[-81.6062813,41.51356772],[-81.60629702,41.51355379],[-81.60626037,41.51353058],[-81.60628858,41.51350556],[-81.60598468,41.51331372],[-81.60566371,41.51359859],[-81.60597318,41.51379406],[-81.6059553,41.51381001],[-81.60601091,41.51384512],[-81.60602843,41.51382951],[-81.60618132,41.51392611],[-81.60618635,41.5139216],[-81.60622929,41.51394851],[-81.60623998,41.5139405],[-81.60629505,41.51397366],[-81.60630574,41.51396357],[-81.60644531,41.51405409],[-81.60644531,41.51425983],[-81.60639305,41.5143673],[-81.60644531,41.51439781],[-81.60644531,41.51475154],[-81.60588452,41.51475938],[-81.60587113,41.51480956],[-81.60586314,41.51486378],[-81.60586844,41.51493212],[-81.60588452,41.51500436],[-81.60591668,41.5150727],[-81.60595961,41.51513492],[-81.60601872,41.51519317],[-81.60608574,41.51524342],[-81.60617422,41.51528546],[-81.6062654,41.51531768],[-81.60635658,41.51533981],[-81.60643168,41.51534781],[-81.60644531,41.51534813],[-81.60644531,41.51584208],[-81.60601827,41.51584402],[-81.60601585,41.51523467],[-81.60549563,41.51523992],[-81.60551064,41.51584617],[-81.60477168,41.51585316],[-81.60474204,41.51443308],[-81.60470063,41.51230842],[-81.6047009,41.51210689],[-81.604738,41.51208018],[-81.60480645,41.51207272],[-81.60506247,41.51219568],[-81.605367,41.5123212],[-81.60554531,41.51237737],[-81.60552124,41.51245479],[-81.60556409,41.51251197],[-81.60570629,41.51253612],[-81.60577869,41.51250202],[-81.6058431,41.51249186],[-81.60589134,41.51245681],[-81.60623064,41.51248789],[-81.60644531,41.51248933],[-81.60644531,41.51367126]],[[-81.6056876,41.5138203],[-81.60559005,41.51375969],[-81.60557478,41.51377348],[-81.6055401,41.51375189],[-81.60555699,41.51373662],[-81.60545719,41.5136746],[-81.6053793,41.51374489],[-81.6053961,41.51375532],[-81.60537849,41.51377119],[-81.60536367,41.51378458],[-81.6053493,41.51377557],[-81.60527465,41.51384317],[-81.60537149,41.5139033],[-81.60538649,41.51388978],[-81.60542242,41.51391212],[-81.60540652,41.51392638],[-81.60550345,41.51398658],[-81.60557801,41.51391918],[-81.60556202,41.51390929],[-81.60557927,41.51389368],[-81.60559661,41.51387801],[-81.6056126,41.51388797],[-81.6056876,41.5138203]],[[-81.60579783,41.51453089],[-81.6057133,41.51447506],[-81.60569533,41.5144908],[-81.60564817,41.51446006],[-81.60566793,41.51444358],[-81.6055852,41.51439279],[-81.60551145,41.51445959],[-81.60553759,41.51447358],[-81.60549402,41.51451077],[-81.60546922,41.51449517],[-81.60539341,41.51456048],[-81.60548063,41.51461618],[-81.60549528,41.5146032],[-81.60554504,41.51463131],[-81.60552618,41.51464631],[-81.60561134,41.51469898],[-81.60561538,41.51469548],[-81.60568707,41.51463131],[-81.60566901,41.51461968],[-81.60570916,41.51458456],[-81.60572668,41.51459559],[-81.60579783,41.51453089]],[[-81.60521015,41.51392402],[-81.60511349,41.51386422],[-81.60509939,41.51387626],[-81.60506462,41.51385521],[-81.6050807,41.51384209],[-81.60497937,41.51378041],[-81.60490158,41.51384707],[-81.60491972,41.51385871],[-81.60488891,41.51388581],[-81.60487355,41.5138768],[-81.60479702,41.51394662],[-81.60489287,41.51400629],[-81.60490966,41.51399378],[-81.60494452,41.51401577],[-81.60492637,41.51402936],[-81.60502429,41.5140897],[-81.60509939,41.51402142],[-81.6050834,41.51401126],[-81.60511762,41.51398321],[-81.60513164,41.51399277],[-81.60521015,41.51392402]],[[-81.6054783,41.51421017],[-81.6053864,41.51415387],[-81.60537643,41.51416449],[-81.60533484,41.51413988],[-81.60534687,41.51412931],[-81.60525569,41.51407261],[-81.6051779,41.51414189],[-81.60519937,41.51415636],[-81.60516784,41.51418252],[-81.60514439,41.51416947],[-81.60506992,41.51424131],[-81.60515984,41.51429896],[-81.60517116,41.514289],[-81.60521078,41.51431308],[-81.60519874,41.51432311],[-81.60529064,41.51438041],[-81.60537104,41.5143071],[-81.60535217,41.514296],[-81.60537769,41.51427091],[-81.60539853,41.51428201],[-81.6054783,41.51421017]],[[-81.6055534,41.51291369],[-81.60533879,41.51277909],[-81.6052503,41.51285846],[-81.60527312,41.51287447],[-81.60523692,41.51290871],[-81.60512562,41.51283936],[-81.60497937,41.51297396],[-81.60509067,41.51304432],[-81.60505178,41.51307936],[-81.60502097,41.51306329],[-81.6048775,41.51319688],[-81.60509337,41.51333047],[-81.60523961,41.51320091],[-81.6052168,41.5131808],[-81.60525704,41.51314569],[-81.60536969,41.51321302],[-81.60551055,41.51308239],[-81.6054032,41.51301506],[-81.60544075,41.512979],[-81.60546761,41.512994],[-81.6055534,41.51291369]],[[-81.60622058,41.51403689],[-81.60612473,41.51397319],[-81.60610865,41.51398819],[-81.6060711,41.51396761],[-81.6060878,41.51395308],[-81.60599402,41.51389429],[-81.60592018,41.5139586],[-81.60593293,41.51396862],[-81.60589871,41.51399727],[-81.60588604,41.5139892],[-81.60580825,41.514057],[-81.60590679,41.51411828],[-81.60592153,41.5141057],[-81.60595638,41.51412528],[-81.60593904,41.5141394],[-81.60603552,41.51420008],[-81.60611197,41.51413227],[-81.60609589,41.51412232],[-81.60613083,41.51409366],[-81.60614485,41.51410221],[-81.60622058,41.51403689]],[[-81.60540868,41.51522539],[-81.6053979,41.51474371],[-81.60496778,41.51474909],[-81.60496455,41.51459936],[-81.6047998,41.51460138],[-81.60480788,41.51496219],[-81.60521123,41.51495707],[-81.60521401,41.515084],[-81.60486062,41.51508837],[-81.60487193,41.51559662],[-81.60530133,41.51559117],[-81.60529819,41.51545416],[-81.60500345,41.51545786],[-81.60500219,41.51540351],[-81.60507145,41.5154027],[-81.60506867,41.51528061],[-81.60499258,41.51528156],[-81.6049915,41.51523057],[-81.60540868,41.51522539]],[[-81.60527995,41.51574897],[-81.60527555,41.51565258],[-81.60492709,41.51566139],[-81.6049314,41.51575778],[-81.60527995,41.51574897]],[[-81.60543671,41.51256827],[-81.6052194,41.51243172],[-81.60502905,41.51259437],[-81.60524905,41.51273496],[-81.60543671,41.51256827]],[[-81.60583708,41.51515396],[-81.60579962,41.51497456],[-81.6057053,41.51498566],[-81.60574294,41.51516506],[-81.60583708,41.51515396]],[[-81.60598117,41.51270879],[-81.60575588,41.51256827],[-81.60557621,41.51273597],[-81.60580007,41.51287447],[-81.60598117,41.51270879]],[[-81.60599061,41.51309046],[-81.6057733,41.51295189],[-81.60558421,41.51311959],[-81.60580421,41.51325722],[-81.60599061,41.51309046]],[[-81.60639018,41.51283229],[-81.60616281,41.51269157],[-81.60597309,41.51285927],[-81.60620512,41.513001],[-81.60639018,41.51283229]]]] diff --git a/encoding/wkt/unmarshal.go b/encoding/wkt/unmarshal.go index b0d12f6..d9659d4 100644 --- a/encoding/wkt/unmarshal.go +++ b/encoding/wkt/unmarshal.go @@ -1,6 +1,7 @@ package wkt import ( + "bytes" "errors" "regexp" "strconv" @@ -22,147 +23,388 @@ var ( doubleParen = regexp.MustCompile(`\)[\s|\t]*\)([\s|\t]*,[\s|\t]*)\([\s|\t]*\(`) singleParen = regexp.MustCompile(`\)([\s|\t]*,[\s|\t]*)\(`) - noParen = regexp.MustCompile(`([\s|\t]*,[\s|\t]*)`) ) // UnmarshalPoint returns the point represented by the wkt string. // Will return ErrIncorrectGeometry if the wkt is not a point. -func UnmarshalPoint(s string) (p orb.Point, err error) { - geom, err := Unmarshal(s) +func UnmarshalPoint(s string) (orb.Point, error) { + s = trimSpace(s) + prefix := upperPrefix(s) + if !bytes.HasPrefix(prefix, []byte("POINT")) { + return orb.Point{}, ErrIncorrectGeometry + } + + return unmarshalPoint(s) +} + +func unmarshalPoint(s string) (orb.Point, error) { + s, err := trimSpaceBrackets(s[5:]) if err != nil { return orb.Point{}, err } - g, ok := geom.(orb.Point) + + tp, err := parsePoint(s) + if err != nil { + return orb.Point{}, err + } + + return tp, nil +} + +// parsePoint pase point by (x y) +func parsePoint(s string) (p orb.Point, err error) { + one, two, ok := cut(s, " ") if !ok { - return orb.Point{}, ErrIncorrectGeometry + return orb.Point{}, ErrNotWKT } - return g, nil + + x, err := strconv.ParseFloat(one, 64) + if err != nil { + return orb.Point{}, ErrNotWKT + } + + y, err := strconv.ParseFloat(two, 64) + if err != nil { + return orb.Point{}, ErrNotWKT + } + + return orb.Point{x, y}, nil } // UnmarshalMultiPoint returns the multi-point represented by the wkt string. // Will return ErrIncorrectGeometry if the wkt is not a multi-point. -func UnmarshalMultiPoint(s string) (p orb.MultiPoint, err error) { - geom, err := Unmarshal(s) +func UnmarshalMultiPoint(s string) (orb.MultiPoint, error) { + s = trimSpace(s) + prefix := upperPrefix(s) + if !bytes.HasPrefix(prefix, []byte("MULTIPOINT")) { + return nil, ErrIncorrectGeometry + } + + return unmarshalMultiPoint(s) +} + +func unmarshalMultiPoint(s string) (orb.MultiPoint, error) { + if strings.EqualFold(s, "MULTIPOINT EMPTY") { + return orb.MultiPoint{}, nil + } + + s, err := trimSpaceBrackets(s[10:]) if err != nil { return nil, err } - g, ok := geom.(orb.MultiPoint) - if !ok { - return nil, ErrIncorrectGeometry + count := strings.Count(s, ",") + mp := make(orb.MultiPoint, 0, count+1) + + err = splitOnComma(s, func(p string) error { + p, err := trimSpaceBrackets(p) + if err != nil { + return err + } + + tp, err := parsePoint(p) + if err != nil { + return err + } + + mp = append(mp, tp) + return nil + }) + if err != nil { + return nil, err } - return g, nil + + return mp, nil } // UnmarshalLineString returns the linestring represented by the wkt string. // Will return ErrIncorrectGeometry if the wkt is not a linestring. -func UnmarshalLineString(s string) (p orb.LineString, err error) { - geom, err := Unmarshal(s) +func UnmarshalLineString(s string) (orb.LineString, error) { + s = trimSpace(s) + prefix := upperPrefix(s) + if !bytes.HasPrefix(prefix, []byte("LINESTRING")) { + return nil, ErrIncorrectGeometry + } + + return unmarshalLineString(s) +} + +func unmarshalLineString(s string) (orb.LineString, error) { + if strings.EqualFold(s, "LINESTRING EMPTY") { + return orb.LineString{}, nil + } + + s, err := trimSpaceBrackets(s[10:]) if err != nil { return nil, err } - g, ok := geom.(orb.LineString) - if !ok { - return nil, ErrIncorrectGeometry + + count := strings.Count(s, ",") + ls := make(orb.LineString, 0, count+1) + + err = splitOnComma(s, func(p string) error { + tp, err := parsePoint(p) + if err != nil { + return err + } + + ls = append(ls, tp) + return nil + }) + if err != nil { + return nil, err } - return g, nil + + return ls, nil } // UnmarshalMultiLineString returns the multi-linestring represented by the wkt string. // Will return ErrIncorrectGeometry if the wkt is not a multi-linestring. -func UnmarshalMultiLineString(s string) (p orb.MultiLineString, err error) { - geom, err := Unmarshal(s) +func UnmarshalMultiLineString(s string) (orb.MultiLineString, error) { + s = trimSpace(s) + prefix := upperPrefix(s) + if !bytes.HasPrefix(prefix, []byte("MULTILINESTRING")) { + return nil, ErrIncorrectGeometry + } + + return unmarshalMultiLineString(s) +} + +func unmarshalMultiLineString(s string) (orb.MultiLineString, error) { + if strings.EqualFold(s, "MULTILINESTRING EMPTY") { + return orb.MultiLineString{}, nil + } + + s, err := trimSpaceBrackets(s[15:]) if err != nil { return nil, err } - g, ok := geom.(orb.MultiLineString) - if !ok { - return nil, ErrIncorrectGeometry + + var tmls orb.MultiLineString + err = splitByRegexpYield( + s, + singleParen, + func(i int) { + tmls = make(orb.MultiLineString, 0, i) + }, + func(ls string) error { + ls, err := trimSpaceBrackets(ls) + if err != nil { + return err + } + + count := strings.Count(ls, ",") + tls := make(orb.LineString, 0, count+1) + + err = splitOnComma(ls, func(p string) error { + tp, err := parsePoint(p) + if err != nil { + return err + } + + tls = append(tls, tp) + return nil + }) + if err != nil { + return err + } + + tmls = append(tmls, tls) + return nil + }, + ) + if err != nil { + return nil, err } - return g, nil + + return tmls, nil } // UnmarshalPolygon returns the polygon represented by the wkt string. // Will return ErrIncorrectGeometry if the wkt is not a polygon. -func UnmarshalPolygon(s string) (p orb.Polygon, err error) { - geom, err := Unmarshal(s) +func UnmarshalPolygon(s string) (orb.Polygon, error) { + s = trimSpace(s) + prefix := upperPrefix(s) + if !bytes.HasPrefix(prefix, []byte("POLYGON")) { + return nil, ErrIncorrectGeometry + } + + return unmarshalPolygon(s) +} + +func unmarshalPolygon(s string) (orb.Polygon, error) { + if strings.EqualFold(s, "POLYGON EMPTY") { + return orb.Polygon{}, nil + } + + s, err := trimSpaceBrackets(s[7:]) if err != nil { return nil, err } - g, ok := geom.(orb.Polygon) - if !ok { - return nil, ErrIncorrectGeometry + + var poly orb.Polygon + err = splitByRegexpYield( + s, + singleParen, + func(i int) { + poly = make(orb.Polygon, 0, i) + }, + func(r string) error { + r, err := trimSpaceBrackets(r) + if err != nil { + return err + } + + count := strings.Count(r, ",") + ring := make(orb.Ring, 0, count+1) + + err = splitOnComma(r, func(p string) error { + tp, err := parsePoint(p) + if err != nil { + return err + } + ring = append(ring, tp) + return nil + }) + if err != nil { + return err + } + + poly = append(poly, ring) + return nil + }, + ) + if err != nil { + return nil, err } - return g, nil + + return poly, nil } // UnmarshalMultiPolygon returns the multi-polygon represented by the wkt string. // Will return ErrIncorrectGeometry if the wkt is not a multi-polygon. -func UnmarshalMultiPolygon(s string) (p orb.MultiPolygon, err error) { - geom, err := Unmarshal(s) +func UnmarshalMultiPolygon(s string) (orb.MultiPolygon, error) { + s = trimSpace(s) + prefix := upperPrefix(s) + if !bytes.HasPrefix(prefix, []byte("MULTIPOLYGON")) { + return nil, ErrIncorrectGeometry + } + + return unmarshalMultiPolygon(s) +} + +func unmarshalMultiPolygon(s string) (orb.MultiPolygon, error) { + if strings.EqualFold(s, "MULTIPOLYGON EMPTY") { + return orb.MultiPolygon{}, nil + } + + s, err := trimSpaceBrackets(s[12:]) if err != nil { return nil, err } - g, ok := geom.(orb.MultiPolygon) - if !ok { - return nil, ErrIncorrectGeometry + + var mpoly orb.MultiPolygon + err = splitByRegexpYield( + s, + doubleParen, + func(i int) { + mpoly = make(orb.MultiPolygon, 0, i) + }, + func(poly string) error { + poly, err := trimSpaceBrackets(poly) + if err != nil { + return err + } + + var tpoly orb.Polygon + err = splitByRegexpYield( + poly, + singleParen, + func(i int) { + tpoly = make(orb.Polygon, 0, i) + }, + func(r string) error { + r, err := trimSpaceBrackets(r) + if err != nil { + return err + } + + count := strings.Count(r, ",") + tr := make(orb.Ring, 0, count+1) + + err = splitOnComma(r, func(s string) error { + tp, err := parsePoint(s) + if err != nil { + return err + } + + tr = append(tr, tp) + return nil + }) + if err != nil { + return err + } + + tpoly = append(tpoly, tr) + return nil + }, + ) + if err != nil { + return err + } + + mpoly = append(mpoly, tpoly) + return nil + }, + ) + if err != nil { + return nil, err } - return g, nil + + return mpoly, nil } // UnmarshalCollection returns the geometry collection represented by the wkt string. // Will return ErrIncorrectGeometry if the wkt is not a geometry collection. -func UnmarshalCollection(s string) (p orb.Collection, err error) { - geom, err := Unmarshal(s) - if err != nil { - return orb.Collection{}, err - } - g, ok := geom.(orb.Collection) - if !ok { +func UnmarshalCollection(s string) (orb.Collection, error) { + s = trimSpace(s) + prefix := upperPrefix(s) + if !bytes.HasPrefix(prefix, []byte("GEOMETRYCOLLECTION")) { return nil, ErrIncorrectGeometry } - return g, nil + + return unmarshalCollection(s) } -// trimSpaceBrackets trim space and brackets -func trimSpaceBrackets(s string) (string, error) { - s = strings.Trim(s, " ") - if len(s) == 0 { - return "", nil +func unmarshalCollection(s string) (orb.Collection, error) { + if strings.EqualFold(s, "GEOMETRYCOLLECTION EMPTY") { + return orb.Collection{}, nil } - if s[0] == '(' { - s = s[1:] - } else { - return "", ErrNotWKT + if len(s) == 18 { // just GEOMETRYCOLLECTION + return nil, ErrNotWKT } - if s[len(s)-1] == ')' { - s = s[:len(s)-1] - } else { - return "", ErrNotWKT + geometries := splitGeometryCollection(s[18:]) + if len(geometries) == 0 { + return orb.Collection{}, nil } - return strings.Trim(s, " "), nil -} -// parsePoint pase point by (x y) -func parsePoint(s string) (p orb.Point, err error) { - ps := strings.Split(s, " ") - if len(ps) != 2 { - return orb.Point{}, ErrNotWKT - } + c := make(orb.Collection, 0, len(geometries)) + for _, g := range geometries { + if len(g) == 0 { + continue + } - x, err := strconv.ParseFloat(ps[0], 64) - if err != nil { - return orb.Point{}, err - } + tg, err := Unmarshal(g) + if err != nil { + return nil, err + } - y, err := strconv.ParseFloat(ps[1], 64) - if err != nil { - return orb.Point{}, err + c = append(c, tg) } - return orb.Point{x, y}, nil + return c, nil } // splitGeometryCollection split GEOMETRYCOLLECTION to more geometry @@ -175,7 +417,7 @@ func splitGeometryCollection(s string) (r []string) { stack = append(stack, v) continue } - if v >= 'A' && v < 'Z' { + if ('A' <= v && v < 'Z') || ('a' <= v && v < 'z') { t := string(stack) r = append(r, t[:len(t)-1]) stack = make([]rune, 0) @@ -192,221 +434,177 @@ func splitGeometryCollection(s string) (r []string) { } // Unmarshal return a geometry by parsing the WKT string. -func Unmarshal(s string) (geom orb.Geometry, err error) { - s = strings.ToUpper(strings.Trim(s, " ")) - switch { - case strings.Contains(s, "GEOMETRYCOLLECTION"): - if s == "GEOMETRYCOLLECTION EMPTY" { - return orb.Collection{}, nil - } - - s = strings.ReplaceAll(s, "GEOMETRYCOLLECTION", "") - if len(s) == 0 { - return nil, ErrNotWKT - } - - tc := orb.Collection{} - geometries := splitGeometryCollection(s) - if len(geometries) == 0 { - return nil, err - } - - for _, g := range geometries { - if len(g) == 0 { - continue - } - - tg, err := Unmarshal(g) - if err != nil { - return nil, err - } - - tc = append(tc, tg) - } +func Unmarshal(s string) (orb.Geometry, error) { + var ( + g orb.Geometry + err error + ) + + s = trimSpace(s) + prefix := upperPrefix(s) + + if bytes.HasPrefix(prefix, []byte("POINT")) { + g, err = unmarshalPoint(s) + } else if bytes.HasPrefix(prefix, []byte("LINESTRING")) { + g, err = unmarshalLineString(s) + } else if bytes.HasPrefix(prefix, []byte("POLYGON")) { + g, err = unmarshalPolygon(s) + } else if bytes.HasPrefix(prefix, []byte("MULTIPOINT")) { + g, err = unmarshalMultiPoint(s) + } else if bytes.HasPrefix(prefix, []byte("MULTILINESTRING")) { + g, err = unmarshalMultiLineString(s) + } else if bytes.HasPrefix(prefix, []byte("MULTIPOLYGON")) { + g, err = unmarshalMultiPolygon(s) + } else if bytes.HasPrefix(prefix, []byte("GEOMETRYCOLLECTION")) { + g, err = unmarshalCollection(s) + } else { + return nil, ErrUnsupportedGeometry + } - geom = tc + if err != nil { + return nil, err + } - case strings.Contains(s, "MULTIPOINT"): - if s == "MULTIPOINT EMPTY" { - return orb.MultiPoint{}, nil - } + return g, nil +} - s, err := trimSpaceBrackets(strings.ReplaceAll(s, "MULTIPOINT", "")) +// splitByRegexpYield splits the input by the regexp. The first callback can +// be used to initialize an array with the size of the result, the second +// is the callback with the matches. +// We use a yield function because it was faster/used less memory than +// allocating an array of the results. +func splitByRegexpYield(s string, re *regexp.Regexp, set func(int), yield func(string) error) error { + indexes := re.FindAllStringSubmatchIndex(s, -1) + set(len(indexes) + 1) + start := 0 + for _, element := range indexes { + err := yield(s[start:element[2]]) if err != nil { - return nil, err + return err } + start = element[3] + } - ps := splitByRegexp(s, noParen) - tmp := orb.MultiPoint{} - for _, p := range ps { - p, err := trimSpaceBrackets(p) - if err != nil { - return nil, err - } + return yield(s[start:]) +} - tp, err := parsePoint(p) - if err != nil { - return nil, err +// splitOnComma is optimized to split on the regex [\s|\t|\n]*,[\s|\t|\n]* +// i.e. comma with possible spaces on each side. e.g. ' , ' +// We use a yield function because it was faster/used less memory than +// allocating an array of the results. +func splitOnComma(s string, yield func(s string) error) error { + // in WKT points are separtated by commas, coordinates in points are separted by spaces + // e.g. 1 2,3 4,5 6,7 81 2,5 4 + // we want to split this and find each point. + + // at is right after the previous space-comma-space match. + // once a space-comma-space match is found, we go from 'at' to the start + // of the match, that's the split that needs to be returned. + var at int + + var start int // the start of a space-comma-space section + + // a space starts a section, we need to see a comma for it to be a valid section + var sawSpace, sawComma bool + for i := 0; i < len(s); i++ { + if s[i] == ',' { + if !sawSpace { + sawSpace = true + start = i } - - tmp = append(tmp, tp) - } - - geom = tmp - - case strings.Contains(s, "POINT"): - s, err := trimSpaceBrackets(strings.ReplaceAll(s, "POINT", "")) - if err != nil { - return nil, err - } - - tp, err := parsePoint(s) - if err != nil { - return nil, err - } - - geom = tp - - case strings.Contains(s, "MULTILINESTRING"): - if s == "MULTILINESTRING EMPTY" { - return orb.MultiLineString{}, nil - } - - s, err := trimSpaceBrackets(strings.ReplaceAll(s, "MULTILINESTRING", "")) - if err != nil { - return nil, err + sawComma = true + continue } - tmls := orb.MultiLineString{} - for _, ls := range splitByRegexp(s, singleParen) { - ls, err := trimSpaceBrackets(ls) - if err != nil { - return nil, err + if v := s[i]; v == ' ' || v == '\t' || v == '\n' { + if !sawSpace { + sawSpace = true + start = i } - - tls := orb.LineString{} - for _, p := range splitByRegexp(ls, noParen) { - tp, err := parsePoint(p) - if err != nil { - return nil, err - } - tls = append(tls, tp) - } - tmls = append(tmls, tls) - } - - geom = tmls - - case strings.Contains(s, "LINESTRING"): - if s == "LINESTRING EMPTY" { - return orb.LineString{}, nil - } - - s, err := trimSpaceBrackets(strings.ReplaceAll(s, "LINESTRING", "")) - if err != nil { - return nil, err + continue } - ls := splitByRegexp(s, noParen) - tls := orb.LineString{} - for _, p := range ls { - tp, err := parsePoint(p) + if sawComma { + err := yield(s[at:start]) if err != nil { - return nil, err + return err } - - tls = append(tls, tp) + at = i } + sawSpace = false + sawComma = false + } - geom = tls - - case strings.Contains(s, "MULTIPOLYGON"): - if s == "MULTIPOLYGON EMPTY" { - return orb.MultiPolygon{}, nil - } + return yield(s[at:]) +} - s, err := trimSpaceBrackets(strings.ReplaceAll(s, "MULTIPOLYGON", "")) - if err != nil { - return nil, err - } +// trimSpaceBrackets trim space and brackets +func trimSpaceBrackets(s string) (string, error) { + s = trimSpace(s) + if len(s) == 0 { + return s, nil + } - tmpoly := orb.MultiPolygon{} - for _, poly := range splitByRegexp(s, doubleParen) { - poly, err := trimSpaceBrackets(poly) - if err != nil { - return nil, err - } + if s[0] == '(' { + s = s[1:] + } else { + return "", ErrNotWKT + } - tpoly := orb.Polygon{} - for _, r := range splitByRegexp(poly, singleParen) { - r, err := trimSpaceBrackets(r) - if err != nil { - return nil, err - } + if s[len(s)-1] == ')' { + s = s[:len(s)-1] + } else { + return "", ErrNotWKT + } - tr := orb.Ring{} - for _, p := range splitByRegexp(r, noParen) { - tp, err := parsePoint(p) - if err != nil { - return nil, err - } + return trimSpace(s), nil +} - tr = append(tr, tp) - } +func trimSpace(s string) string { + if len(s) == 0 { + return s + } - tpoly = append(tpoly, tr) - } + var start, end int - tmpoly = append(tmpoly, tpoly) + for start = 0; start < len(s); start++ { + if v := s[start]; v != ' ' && v != '\t' && v != '\n' { + break } + } - geom = tmpoly - - case strings.Contains(s, "POLYGON"): - if s == "POLYGON EMPTY" { - return orb.Polygon{}, nil + for end = len(s) - 1; end >= 0; end-- { + if v := s[end]; v != ' ' && v != '\t' && v != '\n' { + break } + } - s, err := trimSpaceBrackets(strings.ReplaceAll(s, "POLYGON", "")) - if err != nil { - return nil, err - } + if start >= end { + return "" + } - rings := splitByRegexp(s, singleParen) - tpoly := make(orb.Polygon, 0, len(rings)) - for _, r := range rings { - r, err := trimSpaceBrackets(r) - if err != nil { - return nil, err - } + return s[start : end+1] +} - ps := splitByRegexp(r, noParen) - tring := orb.Ring{} - for _, p := range ps { - tp, err := parsePoint(p) - if err != nil { - return nil, err - } - tring = append(tring, tp) - } - tpoly = append(tpoly, tring) +// gets the ToUpper case of the first 20 chars. +// This is to determin the type without doing a full strings.ToUpper +func upperPrefix(s string) []byte { + prefix := make([]byte, 20) + for i := 0; i < 20 && i < len(s); i++ { + if 'a' <= s[i] && s[i] <= 'z' { + prefix[i] = s[i] - ('a' - 'A') + } else { + prefix[i] = s[i] } - geom = tpoly - default: - return nil, ErrUnsupportedGeometry } - return + return prefix } -func splitByRegexp(s string, re *regexp.Regexp) []string { - indexes := re.FindAllStringSubmatchIndex(s, -1) - start := 0 - result := make([]string, len(indexes)+1) - for i, element := range indexes { - result[i] = s[start:element[2]] - start = element[3] +// coppied here from strings.Cut so we don't require go1.18 +func cut(s, sep string) (before, after string, found bool) { + if i := strings.Index(s, sep); i >= 0 { + return s[:i], s[i+len(sep):], true } - result[len(indexes)] = s[start:] - return result + return s, "", false } diff --git a/encoding/wkt/unmarshal_test.go b/encoding/wkt/unmarshal_test.go index e0603a8..c32fc50 100644 --- a/encoding/wkt/unmarshal_test.go +++ b/encoding/wkt/unmarshal_test.go @@ -1,6 +1,9 @@ package wkt import ( + "encoding/json" + "reflect" + "strings" "testing" "github.com/paulmach/orb" @@ -118,13 +121,40 @@ func TestUnmarshalPoint(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - geom, err := UnmarshalPoint(tc.s) + // you know it's a point + p, err := UnmarshalPoint(tc.s) if err != nil { t.Fatal(err) } - if !geom.Equal(tc.expected) { - t.Log(geom) + if !p.Equal(tc.expected) { + t.Log(p) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // lower case + p, err = UnmarshalPoint(" " + strings.ToLower(tc.s)) + if err != nil { + t.Fatal(err) + } + + if !p.Equal(tc.expected) { + t.Log(p) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // via generic unmarshal + geom, err := Unmarshal(tc.s) + if err != nil { + t.Fatal(err) + } + + p = geom.(orb.Point) + + if !p.Equal(tc.expected) { + t.Log(p) t.Log(tc.expected) t.Errorf("incorrect wkt unmarshalling") } @@ -195,13 +225,40 @@ func TestUnmarshalMultiPoint(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - geom, err := UnmarshalMultiPoint(tc.s) + // you know it's multipoint + mp, err := UnmarshalMultiPoint(tc.s) if err != nil { t.Fatal(err) } - if !geom.Equal(tc.expected) { - t.Log(geom) + if !mp.Equal(tc.expected) { + t.Log(mp) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // lower case + mp, err = UnmarshalMultiPoint(" " + strings.ToLower(tc.s)) + if err != nil { + t.Fatal(err) + } + + if !mp.Equal(tc.expected) { + t.Log(mp) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // via generic unmarshall + geom, err := Unmarshal(tc.s) + if err != nil { + t.Fatal(err) + } + + mp = geom.(orb.MultiPoint) + + if !mp.Equal(tc.expected) { + t.Log(mp) t.Log(tc.expected) t.Errorf("incorrect wkt unmarshalling") } @@ -267,13 +324,40 @@ func TestUnmarshalLineString(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - geom, err := UnmarshalLineString(tc.s) + // you know it's a linestring + ls, err := UnmarshalLineString(tc.s) + if err != nil { + t.Fatal(err) + } + + if !ls.Equal(tc.expected) { + t.Log(ls) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // lower case + ls, err = UnmarshalLineString(" " + strings.ToLower(tc.s)) + if err != nil { + t.Fatal(err) + } + + if !ls.Equal(tc.expected) { + t.Log(ls) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // via generic unmarshal + geom, err := Unmarshal(tc.s) if err != nil { t.Fatal(err) } - if !geom.Equal(tc.expected) { - t.Log(geom) + ls = geom.(orb.LineString) + + if !ls.Equal(tc.expected) { + t.Log(ls) t.Log(tc.expected) t.Errorf("incorrect wkt unmarshalling") } @@ -334,13 +418,40 @@ func TestUnmarshalMultiLineString(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - geom, err := UnmarshalMultiLineString(tc.s) + // you know it's a linestring + mls, err := UnmarshalMultiLineString(tc.s) if err != nil { t.Fatal(err) } - if !geom.Equal(tc.expected) { - t.Log(geom) + if !mls.Equal(tc.expected) { + t.Log(mls) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // lower case + mls, err = UnmarshalMultiLineString(" " + strings.ToLower(tc.s)) + if err != nil { + t.Fatal(err) + } + + if !mls.Equal(tc.expected) { + t.Log(mls) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // via generic unmarshal + geom, err := Unmarshal(tc.s) + if err != nil { + t.Fatal(err) + } + + mls = geom.(orb.MultiLineString) + + if !mls.Equal(tc.expected) { + t.Log(mls) t.Log(tc.expected) t.Errorf("incorrect wkt unmarshalling") } @@ -411,13 +522,40 @@ func TestUnmarshalPolygon(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - geom, err := UnmarshalPolygon(tc.s) + // you know it's a polygon + p, err := UnmarshalPolygon(tc.s) if err != nil { t.Fatal(err) } - if !geom.Equal(tc.expected) { - t.Log(geom) + if !p.Equal(tc.expected) { + t.Log(p) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // lower case + p, err = UnmarshalPolygon(strings.ToLower(tc.s)) + if err != nil { + t.Fatal(err) + } + + if !p.Equal(tc.expected) { + t.Log(p) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // via generic unmarshal + geom, err := Unmarshal(tc.s) + if err != nil { + t.Fatal(err) + } + + p = geom.(orb.Polygon) + + if !p.Equal(tc.expected) { + t.Log(p) t.Log(tc.expected) t.Errorf("incorrect wkt unmarshalling") } @@ -483,12 +621,38 @@ func TestUnmarshalMutilPolygon(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - geom, err := UnmarshalMultiPolygon(tc.s) + // you know it's a multipolygon + mp, err := UnmarshalMultiPolygon(tc.s) if err != nil { t.Fatal(err) } - if !geom.Equal(tc.expected) { - t.Log(geom) + if !mp.Equal(tc.expected) { + t.Log(mp) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // lower case + mp, err = UnmarshalMultiPolygon(" " + strings.ToLower(tc.s)) + if err != nil { + t.Fatal(err) + } + if !mp.Equal(tc.expected) { + t.Log(mp) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // via generic unmarshal + geom, err := Unmarshal(tc.s) + if err != nil { + t.Fatal(err) + } + + mp = geom.(orb.MultiPolygon) + + if !mp.Equal(tc.expected) { + t.Log(mp) t.Log(tc.expected) t.Errorf("incorrect wkt unmarshalling") } @@ -566,12 +730,40 @@ func TestUnmarshalCollection(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - geom, err := UnmarshalCollection(tc.s) + // you know it's a collection + c, err := UnmarshalCollection(tc.s) if err != nil { t.Fatal(err) } - if !geom.Equal(tc.expected) { - t.Log(geom) + + if !c.Equal(tc.expected) { + t.Log(c) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // lower case + c, err = UnmarshalCollection(" " + strings.ToLower(tc.s)) + if err != nil { + t.Fatal(err) + } + + if !c.Equal(tc.expected) { + t.Log(c) + t.Log(tc.expected) + t.Errorf("incorrect wkt unmarshalling") + } + + // via generic unmarshal + geom, err := Unmarshal(tc.s) + if err != nil { + t.Fatal(err) + } + + c = geom.(orb.Collection) + + if !c.Equal(tc.expected) { + t.Log(c) t.Log(tc.expected) t.Errorf("incorrect wkt unmarshalling") } @@ -616,3 +808,42 @@ func TestUnmarshalCollection_errors(t *testing.T) { }) } } + +func TestSplitOnComma(t *testing.T) { + cases := []struct { + name string + input string + expected []string + }{ + { + name: "comma", + input: "0 1,3 0,4 3,0 4,0 1", + expected: []string{"0 1", "3 0", "4 3", "0 4", "0 1"}, + }, + { + name: "comma spaces", + input: "0 1 ,3 0, 4 3 , 0 4 , 0 1", + expected: []string{"0 1", "3 0", "4 3", "0 4", "0 1"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + var results []string + splitOnComma(tc.input, func(s string) error { + results = append(results, s) + return nil + }) + if !reflect.DeepEqual(tc.expected, results) { + t.Log(tc.input) + + data, _ := json.Marshal(results) + t.Log(string(data)) + + t.Log(tc.expected) + t.Errorf("incorrect results") + } + + }) + } +} From 81828b8d4bc768b67eff3a3690c8757374119c83 Mon Sep 17 00:00:00 2001 From: Paul Mach Date: Tue, 9 Jan 2024 22:33:35 -0800 Subject: [PATCH 2/2] check error because the linter demands it. --- encoding/wkt/unmarshal_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/encoding/wkt/unmarshal_test.go b/encoding/wkt/unmarshal_test.go index c32fc50..ae09cba 100644 --- a/encoding/wkt/unmarshal_test.go +++ b/encoding/wkt/unmarshal_test.go @@ -830,10 +830,14 @@ func TestSplitOnComma(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { var results []string - splitOnComma(tc.input, func(s string) error { + err := splitOnComma(tc.input, func(s string) error { results = append(results, s) return nil }) + if err != nil { + t.Fatalf("impossible error: %v", err) + } + if !reflect.DeepEqual(tc.expected, results) { t.Log(tc.input)