Skip to content

Commit

Permalink
Resolve xml:base URLs without switching out the BaseStack
Browse files Browse the repository at this point in the history
This provides an equivalent fix that doesn't do any inelegant swapping
out of the BaseStack. It also doesn't change `goxpp`'s public API by
essentially copying `XmlBaseResolveUrl` to `gofeed`.
  • Loading branch information
cristoper committed Feb 29, 2024
1 parent cf5c66f commit d6a83eb
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 17 deletions.
20 changes: 6 additions & 14 deletions atom/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -658,20 +658,12 @@ func (ap *Parser) parseAtomText(p *xpp.XMLPullParser) (string, error) {
InnerXML string `xml:",innerxml"`
}

// DecodeElement pops the base stack if the element contains an xml:base
// attribute, so we need to save and restore it before resolving any
// relative URLs below
oldBase := p.BaseStack
// get current base URL before it is clobbered by DecodeElement
base := p.BaseStack.Top()
err := p.DecodeElement(&text)
if err != nil {
return "", err
}
newBase := p.BaseStack
p.BaseStack = oldBase
defer func() {
// pop base when we're done with the decoded element
p.BaseStack = newBase
}()

result := text.InnerXML
result = strings.TrimSpace(result)
Expand All @@ -682,7 +674,7 @@ func (ap *Parser) parseAtomText(p *xpp.XMLPullParser) (string, error) {
if strings.Contains(result, "<![CDATA[") {
result = shared.StripCDATA(result)
if lowerType == "html" || strings.Contains(lowerType, "xhtml") {
result, _ = shared.ResolveHTML(p, result)
result, _ = shared.ResolveHTML(base, result)
}
} else {
// decode non-CDATA contents depending on type
Expand All @@ -693,12 +685,12 @@ func (ap *Parser) parseAtomText(p *xpp.XMLPullParser) (string, error) {
result, err = shared.DecodeEntities(result)
} else if strings.Contains(lowerType, "xhtml") {
result = ap.stripWrappingDiv(result)
result, _ = shared.ResolveHTML(p, result)
result, _ = shared.ResolveHTML(base, result)
} else if lowerType == "html" {
result = ap.stripWrappingDiv(result)
result, err = shared.DecodeEntities(result)
if err == nil {
result, _ = shared.ResolveHTML(p, result)
result, _ = shared.ResolveHTML(base, result)
}
} else {
decodedStr, err := base64.StdEncoding.DecodeString(result)
Expand All @@ -711,7 +703,7 @@ func (ap *Parser) parseAtomText(p *xpp.XMLPullParser) (string, error) {
// resolve relative URIs in URI-containing elements according to xml:base
name := strings.ToLower(p.Name)
if atomUriElements[name] {
resolved, err := p.XmlBaseResolveUrl(result)
resolved, err := shared.XmlBaseResolveUrl(base, result)
if resolved != nil && err == nil {
result = resolved.String()
}
Expand Down
27 changes: 24 additions & 3 deletions internal/shared/xmlbase.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package shared
import (
"bytes"
"fmt"
"net/url"
"strings"

xpp "github.com/mmcdole/goxpp"
Expand Down Expand Up @@ -95,11 +96,31 @@ func resolveAttrs(p *xpp.XMLPullParser) error {
return nil
}

// resolve u relative to b
func XmlBaseResolveUrl(b *url.URL, u string) (*url.URL, error) {
relURL, err := url.Parse(u)
if err != nil {
return nil, err
}

if b == nil {
return relURL, nil
}

if b.Path != "" && u != "" && b.Path[len(b.Path)-1] != '/' {
// There's no reason someone would use a path in xml:base if they
// didn't mean for it to be a directory
b.Path = b.Path + "/"
}
absURL := b.ResolveReference(relURL)
return absURL, nil
}

// Transforms html by resolving any relative URIs in attributes
// if an error occurs during parsing or serialization, then the original string
// is returned along with the error.
func ResolveHTML(p *xpp.XMLPullParser, relHTML string) (string, error) {
if p.BaseStack.Top() == nil {
func ResolveHTML(base *url.URL, relHTML string) (string, error) {
if base == nil {
return relHTML, nil
}

Expand All @@ -117,7 +138,7 @@ func ResolveHTML(p *xpp.XMLPullParser, relHTML string) (string, error) {
if n.Type == html.ElementNode {
for i, a := range n.Attr {
if htmlURIAttrs[a.Key] {
absVal, err := p.XmlBaseResolveUrl(a.Val)
absVal, err := XmlBaseResolveUrl(base, a.Val)
if absVal != nil && err == nil {
n.Attr[i].Val = absVal.String()
}
Expand Down

0 comments on commit d6a83eb

Please sign in to comment.