Merge pull request #246 from bastoscorp/develop

html-query-feature
getanteon · Nov 22, 2023 · 30023a2 · 30023a2
2 parents ffc945f + 0d16767
commit 30023a2
Show file tree

Hide file tree

Showing 11 changed files with 270 additions and 10 deletions.
diff --git a/config/json.go b/config/json.go
@@ -70,6 +70,7 @@ type RegexCaptureConf struct {
 type capturePath struct {
 	JsonPath   *string           `json:"json_path"`
 	XPath      *string           `json:"xpath"`
+	XpathHtml  *string           `json:"xpath_html"`
 	RegExp     *RegexCaptureConf `json:"regexp"`
 	From       string            `json:"from"` // body,header,cookie
 	CookieName *string           `json:"cookie_name"`
@@ -375,6 +376,7 @@ func stepToScenarioStep(s step) (types.ScenarioStep, error) {
 		capConf := types.EnvCaptureConf{
 			JsonPath:   path.JsonPath,
 			Xpath:      path.XPath,
+			XpathHtml:  path.XpathHtml,
 			Name:       name,
 			From:       types.SourceType(path.From),
 			Key:        path.HeaderKey,

diff --git a/core/scenario/scripting/assertion/assert_test.go b/core/scenario/scripting/assertion/assert_test.go
@@ -488,7 +488,7 @@ func TestAssert(t *testing.T) {
 			expected: true,
 		},
 		{
-			input: `equals(xml_path("//item/title"),"ABC")`,
+			input: `equals(xpath("//item/title"),"ABC")`,
 			envs: &evaluator.AssertEnv{
 				Body: `<?xml version="1.0" encoding="UTF-8" ?>
 		<rss version="2.0">
@@ -502,6 +502,19 @@ func TestAssert(t *testing.T) {
 
 			expected: true,
 		},
+		{
+			input: `equals(html_path("//body/h1"),"ABC")`,
+			envs: &evaluator.AssertEnv{
+				Body: `<!DOCTYPE html>
+				<html>
+				<body>
+				<h1>ABC</h1>
+				</body>
+				</html>`,
+			},
+
+			expected: true,
+		},
 		{
 			input: "equals(cookies.test.value, \"value\")",
 			envs: &evaluator.AssertEnv{
@@ -790,7 +803,12 @@ func TestAssert(t *testing.T) {
 			expectedError: "ArgumentError",
 		},
 		{
-			input:         "xml_path(23)", // arg must be string
+			input:         "xpath(23)", // arg must be string
+			expected:      false,
+			expectedError: "ArgumentError",
+		},
+		{
+			input:         "html_path(23)", // arg must be string
 			expected:      false,
 			expectedError: "ArgumentError",
 		},

diff --git a/core/scenario/scripting/assertion/evaluator/evaluator.go b/core/scenario/scripting/assertion/evaluator/evaluator.go
@@ -152,6 +152,15 @@ func Eval(node ast.Node, env *AssertEnv, receivedMap map[string]interface{}) (in
 						}
 					}
 					return xmlExtract(env.Body, xpath)
+				case HTMLPATH:
+					html, ok := args[0].(string)
+					if !ok {
+						return false, ArgumentError{
+							msg:        "htmlpath must be a string",
+							wrappedErr: nil,
+						}
+					}
+					return htmlExtract(env.Body, html)
 				case REGEXP:
 					regexp, ok := args[1].(string)
 					if !ok {

diff --git a/core/scenario/scripting/assertion/evaluator/function.go b/core/scenario/scripting/assertion/evaluator/function.go
@@ -133,6 +133,11 @@ var xmlExtract = func(source interface{}, xPath string) (interface{}, error) {
 	return val, err
 }
 
+var htmlExtract = func(source interface{}, xPath string) (interface{}, error) {
+	val, err := extraction.ExtractFromHtml(source, xPath)
+	return val, err
+}
+
 var regexExtract = func(source interface{}, xPath string, matchNo int64) (interface{}, error) {
 	val, err := extraction.ExtractWithRegex(source, types.RegexCaptureConf{
 		Exp: &xPath,
@@ -194,6 +199,7 @@ var assertionFuncMap = map[string]struct{}{
 	IN:           {},
 	JSONPATH:     {},
 	XMLPATH:      {},
+	HTMLPATH:     {},
 	REGEXP:       {},
 	EXISTS:       {},
 	CONTAINS:     {},
@@ -216,7 +222,8 @@ const (
 	EQUALS       = "equals"
 	IN           = "in"
 	JSONPATH     = "json_path"
-	XMLPATH      = "xml_path"
+	XMLPATH      = "xpath"
+	HTMLPATH     = "html_path"
 	REGEXP       = "regexp"
 	EXISTS       = "exists"
 	CONTAINS     = "contains"

diff --git a/core/scenario/scripting/extraction/base.go b/core/scenario/scripting/extraction/base.go
@@ -49,6 +49,8 @@ func Extract(source interface{}, ce types.EnvCaptureConf) (val interface{}, err
 			val, err = ExtractWithRegex(source, *ce.RegExp)
 		} else if ce.Xpath != nil {
 			val, err = ExtractFromXml(source, *ce.Xpath)
+		} else if ce.XpathHtml != nil {
+			val, err = ExtractFromHtml(source, *ce.XpathHtml)
 		}
 	case types.Cookie:
 		cookies := source.(map[string]*http.Cookie)
@@ -111,6 +113,18 @@ func ExtractFromXml(source interface{}, xPath string) (interface{}, error) {
 	}
 }
 
+func ExtractFromHtml(source interface{}, xPath string) (interface{}, error) {
+	xe := htmlExtractor{}
+	switch s := source.(type) {
+	case []byte: // from response body
+		return xe.extractFromByteSlice(s, xPath)
+	case string: // from response header
+		return xe.extractFromString(s, xPath)
+	default:
+		return "", fmt.Errorf("Unsupported type for extraction source")
+	}
+}
+
 type ExtractionError struct { // UnWrappable
 	msg        string
 	wrappedErr error

diff --git a/core/scenario/scripting/extraction/html.go b/core/scenario/scripting/extraction/html.go
@@ -0,0 +1,43 @@
+package extraction
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/antchfx/htmlquery"
+)
+
+type htmlExtractor struct {
+}
+
+func (xe htmlExtractor) extractFromByteSlice(source []byte, xPath string) (interface{}, error) {
+	reader := bytes.NewBuffer(source)
+	rootNode, err := htmlquery.Parse(reader)
+	if err != nil {
+		return nil, err
+	}
+
+	// returns the first matched element
+	foundNode, err := htmlquery.Query(rootNode, xPath)
+	if foundNode == nil || err != nil {
+		return nil, fmt.Errorf("no match for the xPath_html: %s", xPath)
+	}
+
+	return foundNode.FirstChild.Data, nil
+}
+
+func (xe htmlExtractor) extractFromString(source string, xPath string) (interface{}, error) {
+	reader := bytes.NewBufferString(source)
+	rootNode, err := htmlquery.Parse(reader)
+	if err != nil {
+		return nil, err
+	}
+
+	// returns the first matched element
+	foundNode, err := htmlquery.Query(rootNode, xPath)
+	if foundNode == nil || err != nil {
+		return nil, fmt.Errorf("no match for this xpath_html")
+	}
+
+	return foundNode.FirstChild.Data, nil
+}
diff --git a/core/scenario/scripting/extraction/html_test.go b/core/scenario/scripting/extraction/html_test.go
@@ -0,0 +1,120 @@
+package extraction
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+)
+
+func TestHtmlExtraction(t *testing.T) {
+	expected := "Html Title"
+	HtmlSource := fmt.Sprintf(`<!DOCTYPE html>
+	<html>
+	<body>
+	<h1>%s</h1>
+	<p>My first paragraph.</p>
+	</body>
+	</html>`, expected)
+
+	xe := htmlExtractor{}
+	xpath := "//body/h1"
+	val, err := xe.extractFromByteSlice([]byte(HtmlSource), xpath)
+
+	if err != nil {
+		t.Errorf("TestHtmlExtraction %v", err)
+	}
+
+	if !strings.EqualFold(val.(string), expected) {
+		t.Errorf("TestHtmlExtraction expected: %s, got: %s", expected, val)
+	}
+}
+
+func TestHtmlExtractionSeveralNode(t *testing.T) {
+	//should extract only the first one
+	expected := "Html Title"
+	HtmlSource := fmt.Sprintf(`<!DOCTYPE html>
+	<html>
+	<body>
+	<h1>%s</h1>
+	<h1>another node</h1>
+	<p>My first paragraph.</p>
+	</body>
+	</html>`, expected)
+
+	xe := htmlExtractor{}
+	xpath := "//h1"
+	val, err := xe.extractFromByteSlice([]byte(HtmlSource), xpath)
+
+	if err != nil {
+		t.Errorf("TestHtmlExtraction %v", err)
+	}
+
+	if !strings.EqualFold(val.(string), expected) {
+		t.Errorf("TestHtmlExtraction expected: %s, got: %s", expected, val)
+	}
+}
+
+func TestHtmlExtraction_PathNotFound(t *testing.T) {
+	expected := "XML Title"
+	xmlSource := fmt.Sprintf(`<!DOCTYPE html>
+	<html>
+	<body>
+	<h1>%s</h1>
+	<h1>another node</h1>
+	<p>My first paragraph.</p>
+	</body>
+	</html>`, expected)
+
+	xe := htmlExtractor{}
+	xpath := "//h2"
+	_, err := xe.extractFromByteSlice([]byte(xmlSource), xpath)
+
+	if err == nil {
+		t.Errorf("TestHtmlExtraction_PathNotFound, should be err, got :%v", err)
+	}
+}
+
+func TestInvalidHtml(t *testing.T) {
+	xmlSource := `invalid html source`
+
+	xe := htmlExtractor{}
+	xpath := "//input"
+	_, err := xe.extractFromByteSlice([]byte(xmlSource), xpath)
+
+	if err == nil {
+		t.Errorf("TestInvalidXml, should be err, got :%v", err)
+	}
+}
+
+func TestHtmlComplexExtraction(t *testing.T) {
+	expected := "Html Title"
+	HtmlSource := fmt.Sprintf(`<!DOCTYPE html>
+	<html>
+	<body>
+	<script>
+		if (typeof resourceLoadedSuccessfully === "function") {
+			resourceLoadedSuccessfully();
+		}
+		$(() => {
+			typeof cssVars === "function" && cssVars({onlyLegacy: true});
+		})
+		var trackGeoLocation = false;
+		alert('#@=$*€');
+		</script>
+	<h1>%s</h1>
+	<p>My first paragraph.</p>
+	</body>
+	</html>`, expected)
+
+	xe := htmlExtractor{}
+	xpath := "//body/h1"
+	val, err := xe.extractFromByteSlice([]byte(HtmlSource), xpath)
+
+	if err != nil {
+		t.Errorf("TestHtmlExtraction %v", err)
+	}
+
+	if !strings.EqualFold(val.(string), expected) {
+		t.Errorf("TestHtmlExtraction expected: %s, got: %s", expected, val)
+	}
+}
diff --git a/core/types/scenario.go b/core/types/scenario.go
@@ -251,6 +251,7 @@ type RegexCaptureConf struct {
 type EnvCaptureConf struct {
 	JsonPath   *string           `json:"json_path"`
 	Xpath      *string           `json:"xpath"`
+	XpathHtml  *string           `json:"xpath_html"`
 	RegExp     *RegexCaptureConf `json:"regexp"`
 	Name       string            `json:"as"`
 	From       SourceType        `json:"from"`
@@ -339,9 +340,9 @@ func validateCaptureConf(conf EnvCaptureConf) error {
 		}
 	}
 
-	if conf.From == Body && conf.JsonPath == nil && conf.RegExp == nil && conf.Xpath == nil {
+	if conf.From == Body && conf.JsonPath == nil && conf.RegExp == nil && conf.Xpath == nil && conf.XpathHtml == nil {
 		return CaptureConfigError{
-			msg: fmt.Sprintf("%s, one of json_path, regexp, xpath key must be specified when extracting from body", conf.Name),
+			msg: fmt.Sprintf("%s, one of json_path, regexp, xpath or xpath_html key must be specified when extracting from body", conf.Name),
 		}
 	}
 

diff --git a/engine_docs/README.md b/engine_docs/README.md
@@ -681,7 +681,8 @@ If Ddosify can't receive the response for a request, that step is marked as Fail
 | `not`   | ( param `bool` ) | returns converse of given param |
 | `range`   | ( param `int`, low `int`,high `int` ) | returns param is in range of [low,high): low is included, high is not included. |
 | `json_path`   | ( json_path `string`) | extracts from response body using given json path |
-| `xml_path`   | ( xpath `string` ) | extracts from response body using given xml path |
+| `xpath`   | ( xpath `string` ) | extracts from response body using given xml path |
+| `html_path`   | ( html `string` ) | extracts from response body using given html path |
 | `regexp` | ( param `any`, regexp `string`, matchNo `int` ) | extracts from given value in the first parameter using given regular expression |
 
 ### Operators
@@ -707,6 +708,7 @@ If Ddosify can't receive the response for a request, that step is marked as Fail
 | `status_code != 500`   | same as preceding one|
 | `equals(json_path(\"employees.0.name\"),\"Name\")`   | checks if json extracted value is equal to "Name"|
 | `equals(xpath(\"//item/title\"),\"ABC\")`   | checks if xml extracted value is equal to "ABC" |
+| `equals(html_path(\"//body/h1\"),\"ABC\")`   | checks if html extracted value is equal to "ABC" |
 | `equals(variables.x,100)`   | checks if `x` variable coming from global or captured variables is equal to 100|
 | `equals(variables.x,variables.y)`   | checks if variables `x` and `y` are equal to each other |
 | `equals_on_file(body,\"file.json\")`   | reads from file.json and compares response body with read file |
@@ -761,7 +763,7 @@ Unlike assertions focused on individual steps, which determine the success or fa
 | `less_than(fail_count_perc,0.05)` | Fail count percentage should be less than 5%              |
 
 ## Correlation
-Ddosify enables you to capture variables from steps using **json_path**, **xpath**, or **regular expressions**. Later, in the subsequent steps, you can inject both the captured variables and the scenario-scoped global variables.
+Ddosify enables you to capture variables from steps using **json_path**, **xpath**, **xpath_html**, or **regular expressions**. Later, in the subsequent steps, you can inject both the captured variables and the scenario-scoped global variables.
 
 > **:warning: Points to keep in mind**
 > - You must specify **'header_key'** when capturing from header.
@@ -792,7 +794,7 @@ ddosify -config ddosify_config_correlation.json -debug
 }
 ```
 
-### Capture With XPath
+### Capture With XPath on xml
 ```json
 {
     "steps": [
@@ -805,6 +807,20 @@ ddosify -config ddosify_config_correlation.json -debug
 }
 ```
 
+### Capture With XPath on html
+```json
+{
+    "steps": [
+        {
+            "capture_env": {
+                "TITLE" :{"from":"body","xpath_html":"//body/h1"},             
+            }         
+        }
+    ]
+}
+```
+
+
 ### Capture With Regular Expressions
 ```json
 {

diff --git a/go.mod b/go.mod
@@ -17,9 +17,10 @@ require (
 )
 
 require (
-	github.com/antchfx/xpath v1.2.1 // indirect
+	github.com/antchfx/htmlquery v1.3.0
+	github.com/antchfx/xpath v1.2.3 // indirect
 	github.com/go-ole/go-ole v1.2.6 // indirect
-	github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
+	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 	github.com/jaswdr/faker v1.10.2 // indirect
 	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
 	github.com/mattn/go-isatty v0.0.14 // indirect