Skip to content

Commit

Permalink
Merge pull request #247 from ddosify/develop
Browse files Browse the repository at this point in the history
implement html extration feature
  • Loading branch information
fatihbaltaci authored Nov 24, 2023
2 parents daeabc1 + 30023a2 commit ae9bcd5
Show file tree
Hide file tree
Showing 11 changed files with 273 additions and 13 deletions.
2 changes: 2 additions & 0 deletions config/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ type RegexCaptureConf struct {
type capturePath struct {
JsonPath *string `json:"json_path"`
XPath *string `json:"xpath"`
XpathHtml *string `json:"xpath_html"`
RegExp *RegexCaptureConf `json:"regexp"`
From string `json:"from"` // body,header,cookie
CookieName *string `json:"cookie_name"`
Expand Down Expand Up @@ -375,6 +376,7 @@ func stepToScenarioStep(s step) (types.ScenarioStep, error) {
capConf := types.EnvCaptureConf{
JsonPath: path.JsonPath,
Xpath: path.XPath,
XpathHtml: path.XpathHtml,
Name: name,
From: types.SourceType(path.From),
Key: path.HeaderKey,
Expand Down
22 changes: 20 additions & 2 deletions core/scenario/scripting/assertion/assert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ func TestAssert(t *testing.T) {
expected: true,
},
{
input: `equals(xml_path("//item/title"),"ABC")`,
input: `equals(xpath("//item/title"),"ABC")`,
envs: &evaluator.AssertEnv{
Body: `<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
Expand All @@ -502,6 +502,19 @@ func TestAssert(t *testing.T) {

expected: true,
},
{
input: `equals(html_path("//body/h1"),"ABC")`,
envs: &evaluator.AssertEnv{
Body: `<!DOCTYPE html>
<html>
<body>
<h1>ABC</h1>
</body>
</html>`,
},

expected: true,
},
{
input: "equals(cookies.test.value, \"value\")",
envs: &evaluator.AssertEnv{
Expand Down Expand Up @@ -790,7 +803,12 @@ func TestAssert(t *testing.T) {
expectedError: "ArgumentError",
},
{
input: "xml_path(23)", // arg must be string
input: "xpath(23)", // arg must be string
expected: false,
expectedError: "ArgumentError",
},
{
input: "html_path(23)", // arg must be string
expected: false,
expectedError: "ArgumentError",
},
Expand Down
9 changes: 9 additions & 0 deletions core/scenario/scripting/assertion/evaluator/evaluator.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,15 @@ func Eval(node ast.Node, env *AssertEnv, receivedMap map[string]interface{}) (in
}
}
return xmlExtract(env.Body, xpath)
case HTMLPATH:
html, ok := args[0].(string)
if !ok {
return false, ArgumentError{
msg: "htmlpath must be a string",
wrappedErr: nil,
}
}
return htmlExtract(env.Body, html)
case REGEXP:
regexp, ok := args[1].(string)
if !ok {
Expand Down
9 changes: 8 additions & 1 deletion core/scenario/scripting/assertion/evaluator/function.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ var xmlExtract = func(source interface{}, xPath string) (interface{}, error) {
return val, err
}

var htmlExtract = func(source interface{}, xPath string) (interface{}, error) {
val, err := extraction.ExtractFromHtml(source, xPath)
return val, err
}

var regexExtract = func(source interface{}, xPath string, matchNo int64) (interface{}, error) {
val, err := extraction.ExtractWithRegex(source, types.RegexCaptureConf{
Exp: &xPath,
Expand Down Expand Up @@ -194,6 +199,7 @@ var assertionFuncMap = map[string]struct{}{
IN: {},
JSONPATH: {},
XMLPATH: {},
HTMLPATH: {},
REGEXP: {},
EXISTS: {},
CONTAINS: {},
Expand All @@ -216,7 +222,8 @@ const (
EQUALS = "equals"
IN = "in"
JSONPATH = "json_path"
XMLPATH = "xml_path"
XMLPATH = "xpath"
HTMLPATH = "html_path"
REGEXP = "regexp"
EXISTS = "exists"
CONTAINS = "contains"
Expand Down
14 changes: 14 additions & 0 deletions core/scenario/scripting/extraction/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ func Extract(source interface{}, ce types.EnvCaptureConf) (val interface{}, err
val, err = ExtractWithRegex(source, *ce.RegExp)
} else if ce.Xpath != nil {
val, err = ExtractFromXml(source, *ce.Xpath)
} else if ce.XpathHtml != nil {
val, err = ExtractFromHtml(source, *ce.XpathHtml)
}
case types.Cookie:
cookies := source.(map[string]*http.Cookie)
Expand Down Expand Up @@ -111,6 +113,18 @@ func ExtractFromXml(source interface{}, xPath string) (interface{}, error) {
}
}

func ExtractFromHtml(source interface{}, xPath string) (interface{}, error) {
xe := htmlExtractor{}
switch s := source.(type) {
case []byte: // from response body
return xe.extractFromByteSlice(s, xPath)
case string: // from response header
return xe.extractFromString(s, xPath)
default:
return "", fmt.Errorf("Unsupported type for extraction source")
}
}

type ExtractionError struct { // UnWrappable
msg string
wrappedErr error
Expand Down
43 changes: 43 additions & 0 deletions core/scenario/scripting/extraction/html.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package extraction

import (
"bytes"
"fmt"

"github.com/antchfx/htmlquery"
)

type htmlExtractor struct {
}

func (xe htmlExtractor) extractFromByteSlice(source []byte, xPath string) (interface{}, error) {
reader := bytes.NewBuffer(source)
rootNode, err := htmlquery.Parse(reader)
if err != nil {
return nil, err
}

// returns the first matched element
foundNode, err := htmlquery.Query(rootNode, xPath)
if foundNode == nil || err != nil {
return nil, fmt.Errorf("no match for the xPath_html: %s", xPath)
}

return foundNode.FirstChild.Data, nil
}

func (xe htmlExtractor) extractFromString(source string, xPath string) (interface{}, error) {
reader := bytes.NewBufferString(source)
rootNode, err := htmlquery.Parse(reader)
if err != nil {
return nil, err
}

// returns the first matched element
foundNode, err := htmlquery.Query(rootNode, xPath)
if foundNode == nil || err != nil {
return nil, fmt.Errorf("no match for this xpath_html")
}

return foundNode.FirstChild.Data, nil
}
120 changes: 120 additions & 0 deletions core/scenario/scripting/extraction/html_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package extraction

import (
"fmt"
"strings"
"testing"
)

func TestHtmlExtraction(t *testing.T) {
expected := "Html Title"
HtmlSource := fmt.Sprintf(`<!DOCTYPE html>
<html>
<body>
<h1>%s</h1>
<p>My first paragraph.</p>
</body>
</html>`, expected)

xe := htmlExtractor{}
xpath := "//body/h1"
val, err := xe.extractFromByteSlice([]byte(HtmlSource), xpath)

if err != nil {
t.Errorf("TestHtmlExtraction %v", err)
}

if !strings.EqualFold(val.(string), expected) {
t.Errorf("TestHtmlExtraction expected: %s, got: %s", expected, val)
}
}

func TestHtmlExtractionSeveralNode(t *testing.T) {
//should extract only the first one
expected := "Html Title"
HtmlSource := fmt.Sprintf(`<!DOCTYPE html>
<html>
<body>
<h1>%s</h1>
<h1>another node</h1>
<p>My first paragraph.</p>
</body>
</html>`, expected)

xe := htmlExtractor{}
xpath := "//h1"
val, err := xe.extractFromByteSlice([]byte(HtmlSource), xpath)

if err != nil {
t.Errorf("TestHtmlExtraction %v", err)
}

if !strings.EqualFold(val.(string), expected) {
t.Errorf("TestHtmlExtraction expected: %s, got: %s", expected, val)
}
}

func TestHtmlExtraction_PathNotFound(t *testing.T) {
expected := "XML Title"
xmlSource := fmt.Sprintf(`<!DOCTYPE html>
<html>
<body>
<h1>%s</h1>
<h1>another node</h1>
<p>My first paragraph.</p>
</body>
</html>`, expected)

xe := htmlExtractor{}
xpath := "//h2"
_, err := xe.extractFromByteSlice([]byte(xmlSource), xpath)

if err == nil {
t.Errorf("TestHtmlExtraction_PathNotFound, should be err, got :%v", err)
}
}

func TestInvalidHtml(t *testing.T) {
xmlSource := `invalid html source`

xe := htmlExtractor{}
xpath := "//input"
_, err := xe.extractFromByteSlice([]byte(xmlSource), xpath)

if err == nil {
t.Errorf("TestInvalidXml, should be err, got :%v", err)
}
}

func TestHtmlComplexExtraction(t *testing.T) {
expected := "Html Title"
HtmlSource := fmt.Sprintf(`<!DOCTYPE html>
<html>
<body>
<script>
if (typeof resourceLoadedSuccessfully === "function") {
resourceLoadedSuccessfully();
}
$(() => {
typeof cssVars === "function" && cssVars({onlyLegacy: true});
})
var trackGeoLocation = false;
alert('#@=$*€');
</script>
<h1>%s</h1>
<p>My first paragraph.</p>
</body>
</html>`, expected)

xe := htmlExtractor{}
xpath := "//body/h1"
val, err := xe.extractFromByteSlice([]byte(HtmlSource), xpath)

if err != nil {
t.Errorf("TestHtmlExtraction %v", err)
}

if !strings.EqualFold(val.(string), expected) {
t.Errorf("TestHtmlExtraction expected: %s, got: %s", expected, val)
}
}
11 changes: 6 additions & 5 deletions core/types/scenario.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (s *Scenario) validate() error {

// add global envs
for key := range s.Envs {
if !envVarNameRegexp.Match([]byte(key)) { // not a valid env definition
if !envVarNameRegexp.MatchString(key) { // not a valid env definition
return fmt.Errorf("env key is not valid: %s", key)
}
definedEnvs[key] = struct{}{} // exist
Expand All @@ -98,7 +98,7 @@ func (s *Scenario) validate() error {
return fmt.Errorf("csv key can not have dot in it: %s", key)
}
for _, s := range splitted {
if !envVarNameRegexp.Match([]byte(s)) { // not a valid env definition
if !envVarNameRegexp.MatchString(s) { // not a valid env definition
return fmt.Errorf("csv key is not valid: %s", key)
}
}
Expand All @@ -112,7 +112,7 @@ func (s *Scenario) validate() error {

// enrich Envs map with captured envs from each step
for _, ce := range st.EnvsToCapture {
if !envVarNameRegexp.Match([]byte(ce.Name)) { // not a valid env definition
if !envVarNameRegexp.MatchString(ce.Name) { // not a valid env definition
return fmt.Errorf("captured env key is not valid: %s", ce.Name)
}
definedEnvs[ce.Name] = struct{}{}
Expand Down Expand Up @@ -251,6 +251,7 @@ type RegexCaptureConf struct {
type EnvCaptureConf struct {
JsonPath *string `json:"json_path"`
Xpath *string `json:"xpath"`
XpathHtml *string `json:"xpath_html"`
RegExp *RegexCaptureConf `json:"regexp"`
Name string `json:"as"`
From SourceType `json:"from"`
Expand Down Expand Up @@ -339,9 +340,9 @@ func validateCaptureConf(conf EnvCaptureConf) error {
}
}

if conf.From == Body && conf.JsonPath == nil && conf.RegExp == nil && conf.Xpath == nil {
if conf.From == Body && conf.JsonPath == nil && conf.RegExp == nil && conf.Xpath == nil && conf.XpathHtml == nil {
return CaptureConfigError{
msg: fmt.Sprintf("%s, one of json_path, regexp, xpath key must be specified when extracting from body", conf.Name),
msg: fmt.Sprintf("%s, one of json_path, regexp, xpath or xpath_html key must be specified when extracting from body", conf.Name),
}
}

Expand Down
Loading

0 comments on commit ae9bcd5

Please sign in to comment.