Skip to content

Commit

Permalink
NewXMLElement add index
Browse files Browse the repository at this point in the history
  • Loading branch information
Shinku-Chen committed Oct 24, 2024
1 parent 5224b97 commit 9efd916
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 8 deletions.
4 changes: 2 additions & 2 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -1199,8 +1199,8 @@ func (c *Collector) handleOnXML(resp *Response) error {
}

for _, cc := range c.xmlCallbacks {
for _, n := range htmlquery.Find(doc, cc.Query) {
e := NewXMLElementFromHTMLNode(resp, n)
for i, n := range htmlquery.Find(doc, cc.Query) {
e := NewXMLElementFromHTMLNode(resp, n, i)
if c.debugger != nil {
c.debugger.Event(createEvent("xml", resp.Request.ID, c.ID, map[string]string{
"selector": cc.Query,
Expand Down
5 changes: 4 additions & 1 deletion xmlelement.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ type XMLElement struct {
// based on how the XMLElement was created.
DOM interface{}
isHTML bool
// Index stores the position of the current element within all the elements matched by an OnXML callback
Index int
}

// NewXMLElementFromHTMLNode creates a XMLElement from a html.Node.
func NewXMLElementFromHTMLNode(resp *Response, s *html.Node) *XMLElement {
func NewXMLElementFromHTMLNode(resp *Response, s *html.Node, idx int) *XMLElement {
return &XMLElement{
Name: s.Data,
Request: resp.Request,
Expand All @@ -49,6 +51,7 @@ func NewXMLElementFromHTMLNode(resp *Response, s *html.Node) *XMLElement {
DOM: s,
attributes: s.Attr,
isHTML: true,
Index: idx,
}
}

Expand Down
10 changes: 5 additions & 5 deletions xmlelement_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func TestAttr(t *testing.T) {
resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)}
doc, _ := htmlquery.Parse(strings.NewReader(htmlPage))
xmlNode := htmlquery.FindOne(doc, "/html")
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode)
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode, 0)

if xmlElem.Attr("xmlns") != "http://www.w3.org/1999/xhtml" {
t.Fatalf("failed xmlns attribute test: %v != http://www.w3.org/1999/xhtml", xmlElem.Attr("xmlns"))
Expand All @@ -67,7 +67,7 @@ func TestChildText(t *testing.T) {
resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)}
doc, _ := htmlquery.Parse(strings.NewReader(htmlPage))
xmlNode := htmlquery.FindOne(doc, "/html")
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode)
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode, 0)

if text := xmlElem.ChildText("//p"); text != "This is a regular text paragraph." {
t.Fatalf("failed child tag test: %v != This is a regular text paragraph.", text)
Expand All @@ -81,7 +81,7 @@ func TestChildTexts(t *testing.T) {
resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)}
doc, _ := htmlquery.Parse(strings.NewReader(htmlPage))
xmlNode := htmlquery.FindOne(doc, "/html")
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode)
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode, 0)
expected := []string{"First bullet of a bullet list.", "This is the second bullet."}
if texts := xmlElem.ChildTexts("//li"); reflect.DeepEqual(texts, expected) == false {
t.Fatalf("failed child tags test: %v != %v", texts, expected)
Expand All @@ -94,7 +94,7 @@ func TestChildAttr(t *testing.T) {
resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)}
doc, _ := htmlquery.Parse(strings.NewReader(htmlPage))
xmlNode := htmlquery.FindOne(doc, "/html")
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode)
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode, 0)

if attr := xmlElem.ChildAttr("/body/ul/li[1]", "class"); attr != "list-item-1" {
t.Fatalf("failed child attribute test: %v != list-item-1", attr)
Expand All @@ -108,7 +108,7 @@ func TestChildAttrs(t *testing.T) {
resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)}
doc, _ := htmlquery.Parse(strings.NewReader(htmlPage))
xmlNode := htmlquery.FindOne(doc, "/html")
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode)
xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode, 0)

attrs := xmlElem.ChildAttrs("/body/ul/li", "class")
if len(attrs) != 2 {
Expand Down

0 comments on commit 9efd916

Please sign in to comment.