Skip to content

Commit

Permalink
Fixed newline handling and input character filtering for processing
Browse files Browse the repository at this point in the history
  • Loading branch information
phax committed Jun 7, 2023
1 parent 8c70510 commit 05a6d0a
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 21 deletions.
142 changes: 125 additions & 17 deletions ph-css/src/main/java/com/helger/css/parser/CSSParseHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,103 @@ public static String extractStringValue (@Nullable final String sStr)
return sStr;
}

/**
* A special char iterator based on
* https://www.w3.org/TR/css-syntax-3/#css-filter-code-points
*
* @author Philip Helger
*/
private static final class CSSCharIterator
{
private final char [] m_aSrc;
private final int m_nSrcLen;
private int m_nIndex = 0;

public CSSCharIterator (@Nonnull final String sSrc)
{
m_aSrc = sSrc.toCharArray ();
m_nSrcLen = sSrc.length ();
}

public boolean hasNext ()
{
return m_nIndex < m_nSrcLen;
}

private char _current ()
{
return m_aSrc[m_nIndex];
}

private char _next ()
{
return m_aSrc[m_nIndex + 1];
}

/**
* @return Next character to come without modifying the index
*/
public char lookahead ()
{
char ret = _current ();
switch (ret)
{
case 0:
ret = (char) 0xfffd;
break;
case '\f':
ret = '\n';
break;
case '\r':
// No matter if followed by \n or not
ret = '\n';
break;
}
return ret;
}

/**
* @return Next character and advancing the index (by 1 or 2)
*/
public char next ()
{
// See
char ret = _current ();
int nAdvance = 1;
switch (ret)
{
case 0:
ret = (char) 0xfffd;
break;
case '\f':
ret = '\n';
break;
case '\r':
{
if (hasNext () && _next () == '\n')
{
// Handle \r\n as one \n
nAdvance = 2;
}
ret = '\n';
break;
}
}
// Move forward
m_nIndex += nAdvance;
return ret;
}
}

private static boolean _isNewLine (final char c)
{
// \r is NOT a new line char
return c == '\n';
}

private static boolean _isWhitespace (final char c)
{
return c == '\n' || c == '\t' || c == ' ';
return _isNewLine (c) || c == '\t' || c == ' ';
}

private static boolean _isHexChar (final char c)
Expand All @@ -113,26 +207,26 @@ public static String unescapeURL (@Nonnull final String sEscapedURL)
return sEscapedURL;
}

// The source length is always longer
final int nSrcLen = sEscapedURL.length ();
final StringBuilder aSB = new StringBuilder (nSrcLen);
int nCharIndex = 0;
while (nCharIndex < nSrcLen)
// The source length is never shorter

This comment has been minimized.

Copy link
@shagkur

shagkur Jun 8, 2023

Contributor

@phax Again, genernal note: Might make sense to extract this unescape logic in it's own general unescape method and have it available at other places during the parsing? To me the spec doesn't state the the escaping only applies to the url, does it?

This comment has been minimized.

Copy link
@shagkur

shagkur Jun 13, 2023

Contributor

As discussed in #91 there're 2 layers of unescaping when reading a CSS. And this one here is and should only be valid for unescaping a URL value. Comment solved.

final StringBuilder aSB = new StringBuilder (sEscapedURL.length ());

final CSSCharIterator aIter = new CSSCharIterator (sEscapedURL);
while (aIter.hasNext ())
{
final char c = sEscapedURL.charAt (nCharIndex);
nCharIndex++;
final char c = aIter.next ();

if (c == URL_ESCAPE_CHAR)
{
int nCodePoint = 0;
int nHexCount = 0;
while (nHexCount <= 6 && nCharIndex < nSrcLen)

while (nHexCount <= 6 && aIter.hasNext ())

This comment has been minimized.

Copy link
@shagkur

shagkur Jun 8, 2023

Contributor

@phax Shouldn't this be nHexCount < 6? Spec states "up to 6"

{
final char cNext = sEscapedURL.charAt (nCharIndex);
final char cNext = aIter.lookahead ();
if (_isHexChar (cNext))
{
aIter.next ();
nHexCount++;
nCharIndex++;
nCodePoint = (nCodePoint * 16) + StringHelper.getHexValue (cNext);
}
else
Expand All @@ -142,11 +236,11 @@ public static String unescapeURL (@Nonnull final String sEscapedURL)
if (nHexCount > 0)
{
// Check for a trailing whitespace and evtl. skip it
if (nCharIndex < nSrcLen)
if (aIter.hasNext ())
{
final char cNext = sEscapedURL.charAt (nCharIndex);
final char cNext = aIter.lookahead ();
if (_isWhitespace (cNext))
nCharIndex++;
aIter.next ();
}

if (nCodePoint > '\uFFFF')
Expand All @@ -156,13 +250,27 @@ public static String unescapeURL (@Nonnull final String sEscapedURL)
}
else
{
// Append \ verbose
aSB.append (c);
// No hex char found - check for newline
// Goal is to make "\\nx" should become "x"
final char cNext = aIter.lookahead ();
if (_isNewLine (cNext))
{
// Consume newline char
aIter.next ();

// Take following char as it is
aSB.append (aIter.next ());
}
else
{
// Append \ verbose
aSB.append (c);
}
}
}
else
{
// Copy as is
// Copy char as is
aSB.append (c);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,9 @@ public void testSplitNumber ()
@Test
public void testUnescapeCSSURL ()
{
// Nothing to escape
assertEquals ("bla.gif", CSSParseHelper.unescapeURL ("bla.gif"));
assertEquals ("/foo/bla.gif", CSSParseHelper.unescapeURL ("/foo/bla.gif"));
if (false)
assertEquals ("/foo/bla().gif", CSSParseHelper.unescapeURL ("/foo/bla\\(\\).gif"));
if (false)
assertEquals ("\\\\server\\foo\\bla.gif", CSSParseHelper.unescapeURL ("\\\\\\\\server\\\\foo\\\\bla.gif"));
assertEquals ("/home/data/image.png", CSSParseHelper.unescapeURL ("\\2f home\\2f data\\2f image.png"));
assertEquals ("/home/data/image.png", CSSParseHelper.unescapeURL ("\\2fhome\\2f data\\2fimage.png"));
assertEquals ("/home /data /image.png", CSSParseHelper.unescapeURL ("\\2fhome \\2f data \\2f image.png"));
Expand All @@ -79,5 +76,13 @@ public void testUnescapeCSSURL ()
assertEquals ("AZ", CSSParseHelper.unescapeURL ("\\00041 Z"));
assertEquals ("AZ", CSSParseHelper.unescapeURL ("\\000041Z"));
assertEquals ("AZ", CSSParseHelper.unescapeURL ("\\000041 Z"));
assertEquals ("a not so very long title", CSSParseHelper.unescapeURL ("a not so very long title"));
assertEquals ("a not so very long title", CSSParseHelper.unescapeURL ("a not s\\\no very long title"));
assertEquals ("a not so very long title", CSSParseHelper.unescapeURL ("a not s\\\r\no very long title"));
assertEquals ("a not so very long title", CSSParseHelper.unescapeURL ("a not s\\\fo very long title"));
assertEquals ("A\nZ", CSSParseHelper.unescapeURL ("\\041 \nZ"));
assertEquals ("A\nZ", CSSParseHelper.unescapeURL ("\\041 \r\nZ"));
assertEquals ("A\n", CSSParseHelper.unescapeURL ("\\041 \n"));
assertEquals ("A\n", CSSParseHelper.unescapeURL ("\\041 \r\n"));
}
}

0 comments on commit 05a6d0a

Please sign in to comment.