Skip to content

Commit

Permalink
core(parser) - create generic parser engine
Browse files Browse the repository at this point in the history
  • Loading branch information
PxyUp committed Jan 23, 2024
1 parent 04bd73c commit cedffcb
Show file tree
Hide file tree
Showing 14 changed files with 474 additions and 864 deletions.
2 changes: 1 addition & 1 deletion examples/plugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func (pl *plugin) Format(parsedValue builder.Jsonable, field *config.PluginField
err := json.Unmarshal(field.Config, pl)
if err != nil {
logger.Errorw("cant unmarshal plugin configuration", "error", err.Error())
return builder.Null()
return builder.NullValue
}
return builder.String(fmt.Sprintf("Hello %s", pl.Name))
}
Expand Down
2 changes: 1 addition & 1 deletion examples/plugin/hardcoder/hardcoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func (pl *plugin) Format(parsedValue builder.Jsonable, field *config.PluginField
err := json.Unmarshal(field.Config, pl)
if err != nil {
logger.Errorw("cant unmarshal plugin configuration", "error", err.Error())
return builder.Null()
return builder.NullValue
}
return builder.String(fmt.Sprintf("Hello %s", pl.Name))
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@ type Jsonable interface {

var (
EMPTY = PureString("")

NullValue = Null()
)
286 changes: 40 additions & 246 deletions pkg/parser/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,236 +2,32 @@ package parser

import (
"bytes"
builder "github.com/PxyUp/fitter/pkg/builder"
"strconv"
"sync"

"github.com/PuerkitoBio/goquery"
"github.com/PxyUp/fitter/pkg/builder"
"github.com/PxyUp/fitter/pkg/config"
"github.com/PxyUp/fitter/pkg/logger"
"strconv"
)

type htmlParser struct {
logger logger.Logger
body []byte
parserBody *goquery.Selection
}

func newHTML(body []byte) *htmlParser {
document, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
return &htmlParser{
body: body,
logger: logger.Null,
parserBody: document.Selection,
}
}

func (h *htmlParser) WithLogger(logger logger.Logger) *htmlParser {
h.logger = logger
return h
}

func (h *htmlParser) Parse(model *config.Model) (*ParseResult, error) {
if h.parserBody == nil {
res := builder.Null()
return &ParseResult{
RawResult: res.Raw(),
Json: res.ToJson(),
}, nil
}

if model.BaseField != nil {
res := h.buildBaseField(h.parserBody, model.BaseField, nil)
return &ParseResult{
RawResult: res.Raw(),
Json: res.ToJson(),
}, nil
}

if model.ArrayConfig != nil {
res := h.buildArray(model.ArrayConfig)
return &ParseResult{
RawResult: res.Raw(),
Json: res.ToJson(),
}, nil
}

res := h.buildObject(model.ObjectConfig)
return &ParseResult{
RawResult: res.Raw(),
Json: res.ToJson(),
}, nil
}

func (h *htmlParser) buildArray(array *config.ArrayConfig) builder.Jsonable {
return h.buildArrayField(h.parserBody, array)
}

func (h *htmlParser) buildObject(object *config.ObjectConfig) builder.Jsonable {
return h.buildObjectField(h.parserBody, object)
}

func (h *htmlParser) buildObjectField(parent *goquery.Selection, object *config.ObjectConfig) builder.Jsonable {
kv := make(map[string]builder.Jsonable)
var wg sync.WaitGroup
var mutex sync.Mutex

for lKey, lValue := range object.Fields {
key := lKey
value := lValue
wg.Add(1)
go func(k string, v *config.Field) {
defer wg.Done()

mutex.Lock()
kv[k] = h.resolveField(parent, v, nil)
mutex.Unlock()
}(key, value)
}

wg.Wait()

return builder.Object(kv)
}

func (h *htmlParser) buildStaticArray(cfg *config.StaticArrayConfig) builder.Jsonable {
length := len(cfg.Items)
if cfg.Length > 0 {
length = int(cfg.Length)
}

values := make([]builder.Jsonable, length)

var wg sync.WaitGroup

for lKey, lValue := range cfg.Items {
key := lKey
value := lValue
wg.Add(1)
go func(k uint32, v *config.Field) {
defer wg.Done()

arrIndex := k
values[int(k)] = h.resolveField(h.parserBody, v, &arrIndex)
}(key, value)

}

wg.Wait()

return builder.Array(values)
}

func (h *htmlParser) buildFirstOfField(parent *goquery.Selection, fields []*config.Field, index *uint32) builder.Jsonable {
for _, value := range fields {
tempValue := h.resolveField(parent, value, index)
if !tempValue.IsEmpty() {
return tempValue
}
}

return builder.Null()
}

func (h *htmlParser) resolveField(parent *goquery.Selection, field *config.Field, index *uint32) builder.Jsonable {
if len(field.FirstOf) != 0 {
return h.buildFirstOfField(parent, field.FirstOf, index)
}

if field.BaseField != nil {
return h.buildBaseField(parent, field.BaseField, index)
}

if field.ObjectConfig != nil {
return h.buildObjectField(parent, field.ObjectConfig)
}

if field.ArrayConfig != nil {
return h.buildArrayField(parent, field.ArrayConfig)
}
return builder.Null()
}

func (h *htmlParser) buildArrayField(parent *goquery.Selection, array *config.ArrayConfig) builder.Jsonable {
if array.StaticConfig != nil {
return h.buildStaticArray(array.StaticConfig)
}

if array.RootPath != "" {
parent = parent.Find(array.RootPath)
}

size := parent.Length()
if array.LengthLimit > 0 {
size = int(array.LengthLimit)
}

values := make([]builder.Jsonable, size)

if array.ItemConfig.Field != nil {
var wg sync.WaitGroup
parent.Each(func(i int, s *goquery.Selection) {
if i >= size {
return
}
wg.Add(1)
go func(index int, selection *goquery.Selection) {
defer wg.Done()

arrIndex := uint32(index)
values[index] = h.buildBaseField(selection, array.ItemConfig.Field, &arrIndex)
}(i, s)

})
wg.Wait()
return builder.Array(values)
}

if array.ItemConfig.ArrayConfig != nil {
var wg sync.WaitGroup
parent.Each(func(i int, s *goquery.Selection) {
if i >= size {
return
}

wg.Add(1)
go func(index int, selection *goquery.Selection) {
defer wg.Done()

values[index] = h.buildArrayField(selection, array.ItemConfig.ArrayConfig)
}(i, s)
})
wg.Wait()
return builder.Array(values)
}
func selectionToArray(parent *goquery.Selection) []*goquery.Selection {
tmp := make([]*goquery.Selection, len(parent.Nodes))

var wg sync.WaitGroup
parent.Each(func(i int, s *goquery.Selection) {
if i >= size {
return
}

wg.Add(1)
go func(index int, selection *goquery.Selection) {
defer wg.Done()

values[index] = h.buildObjectField(selection, array.ItemConfig)
}(i, s)
parent.Each(func(i int, selection *goquery.Selection) {
tmp[i] = selection
})
wg.Wait()

return builder.Array(values)
return tmp
}

func (h *htmlParser) fillUpBaseField(source *goquery.Selection, field *config.BaseField) builder.Jsonable {
func htmlFillUpBaseField(source *goquery.Selection, field *config.BaseField) builder.Jsonable {
if source.Length() <= 0 {
return builder.Null()
return builder.NullValue
}

if field.Type == config.HtmlString {
htmlString, err := source.Html()
if err != nil {
return builder.Null()
return builder.NullValue
}
return builder.String(htmlString)
}
Expand All @@ -241,7 +37,7 @@ func (h *htmlParser) fillUpBaseField(source *goquery.Selection, field *config.Ba
if field.HTMLAttribute != "" {
attrValue, attrExists := source.First().Attr(field.HTMLAttribute)
if !attrExists {
return builder.Null()
return builder.NullValue
}
text = attrValue
} else {
Expand All @@ -250,71 +46,69 @@ func (h *htmlParser) fillUpBaseField(source *goquery.Selection, field *config.Ba

switch field.Type {
case config.Null:
return builder.Null()
return builder.NullValue
case config.RawString:
return builder.String(text, false)
case config.String:
return builder.String(text)
case config.Bool:
boolValue, err := strconv.ParseBool(text)
if err != nil {
return builder.Null()
return builder.NullValue
}
return builder.Bool(boolValue)
case config.Float:
float32Value, err := strconv.ParseFloat(text, 32)
if err != nil {
return builder.Null()
return builder.NullValue
}
return builder.Float(float32(float32Value))
case config.Float64:
float64Value, err := strconv.ParseFloat(text, 64)
if err != nil {
return builder.Null()
return builder.NullValue
}
return builder.Float64(float64Value)
case config.Int:
intValue, err := strconv.ParseInt(text, 10, 32)
if err != nil {
return builder.Null()
return builder.NullValue
}
return builder.Int(int(intValue))
case config.Int64:
int64Value, err := strconv.ParseInt(text, 10, 64)
if err != nil {
return builder.Null()
return builder.NullValue
}
return builder.Int64(int64Value)
}

return builder.Null()
}

func (h *htmlParser) buildFirstOfBaseField(source *goquery.Selection, fields []*config.BaseField, index *uint32) builder.Jsonable {
for _, value := range fields {
tempValue := h.buildBaseField(source, value, index)
if !tempValue.IsEmpty() {
return tempValue
}
}

return builder.Null()
return builder.NullValue
}

func (h *htmlParser) buildBaseField(source *goquery.Selection, field *config.BaseField, index *uint32) builder.Jsonable {
if len(field.FirstOf) != 0 {
return h.buildFirstOfBaseField(source, field.FirstOf, index)
}

if field.Path != "" {
source = source.Find(field.Path)
}
func NewHTML(body []byte, logger logger.Logger) *engineParser[*goquery.Selection] {
document, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))

tempValue := h.fillUpBaseField(source, field)
return &engineParser[*goquery.Selection]{
getText: func(r *goquery.Selection) string {
return r.First().Text()
},
parserBody: document.Selection,
logger: logger,
getAll: func(parent *goquery.Selection, path string) []*goquery.Selection {
if path == "" {
return selectionToArray(parent)
}

if field.Generated != nil {
return buildGeneratedField(tempValue, field.Type, field.Generated, h.logger, index)
res := parent.Find(path)
return selectionToArray(res)
},
getOne: func(parent *goquery.Selection, path string) *goquery.Selection {
if path == "" {
return parent
}
return parent.Find(path)
},
customFillUpBaseField: htmlFillUpBaseField,
}

return tempValue
}
Loading

0 comments on commit cedffcb

Please sign in to comment.