Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for bad byte offset locations when bytes precede '%PDF-'. #29

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 130 additions & 18 deletions src/PdfSharp/Pdf.IO/Parser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1031,30 +1031,142 @@ internal PdfTrailer ReadTrailer()
throw new Exception("The StartXRef table could not be found, the file cannot be opened.");

ReadSymbol(Symbol.StartXRef);
_lexer.Position = ReadInteger();
int startxref = _lexer.Position = ReadInteger();

// Must be before the first 'goto valid_xref;' statement.
int xref_offset = 0;

// Check for valid startxref
if (IsValidXref())
{
goto valid_xref;
}

// If we reach this point, we have an invalid startxref
// First look for bytes preceding "%PDF-". Some pdf producers ignore these.
if (length >= 1024)
{
// "%PDF-" should be in this range
string header = _lexer.ReadRawString(0, 1024);
idx = header.IndexOf("%PDF-", StringComparison.Ordinal);
}
else
{
string header = _lexer.ReadRawString(0, length);
idx = header.IndexOf("%PDF-", StringComparison.Ordinal);
}

if (idx > 0)
{
//_lexer.ByteOffset = idx;
_lexer.Position = startxref + idx;
if (IsValidXref())
{
xref_offset = idx;
goto valid_xref;
}
}

valid_xref:
_lexer.Position = startxref + xref_offset;

// Read all trailers.
while (true)
{
PdfTrailer trailer = ReadXRefTableAndTrailer(_document._irefTable, xref_offset);
// 1st trailer seems to be the best.
if (_document._trailer == null)
_document._trailer = trailer;
int prev = trailer.Elements.GetInteger(PdfTrailer.Keys.Prev);
if (prev == 0)
break;
//if (prev > lexer.PdfLength)
// break;
_lexer.Position = prev;
}

return _document._trailer;
}

// Read all trailers.
/// <summary>
/// Checks that the current _lexer location is a valid xref.
/// </summary>
/// <returns></returns>
private bool IsValidXref()
{
int length = _lexer.PdfLength;
int position = _lexer.Position;
// Make sure not inside a stream.

string content = "";
int content_pos = position;
while (true)
{
PdfTrailer trailer = ReadXRefTableAndTrailer(_document._irefTable);
// 1st trailer seems to be the best.
if (_document._trailer == null)
_document._trailer = trailer;
int prev = trailer.Elements.GetInteger(PdfTrailer.Keys.Prev);
if (prev == 0)
// look for stream and endstream in 1k chunks.
int read_length = Math.Min(1024, length - content_pos);
content += _lexer.ReadRawString(content_pos, read_length);

int ss = content.IndexOf("stream", StringComparison.Ordinal);
int es = content.IndexOf("endstream", StringComparison.Ordinal);
int eof = content.IndexOf("%%EOF", StringComparison.Ordinal);

if (ss != es)
{
if (ss == -1)
{
if (eof != -1 && eof < es)
break;
else
return false;
}
else if (es == -1)
break;
else if (ss < es)
break;
else if (ss > es)
{
if (eof != -1 && eof < ss && eof < es)
break;
else
return false;
}
}

if (eof != -1)
break;
//if (prev > lexer.PdfLength)
// break;
_lexer.Position = prev;

content_pos = content_pos + read_length;
if (content_pos + read_length >= length)
{
// reached the end of the document without finding either.
break;
}
}

return _document._trailer;
_lexer.Position = position;

Symbol symbol = ScanNextToken();
if (symbol == Symbol.XRef)
{
return true;
}

if (symbol == Symbol.Integer)
{
// Just because we have an integer, doesn't mean the startxref is actually valid
if (ScanNextToken() == Symbol.Integer && ScanNextToken() == Symbol.Obj)
{
return true;
}
}

return false;
}

/// <summary>
/// Reads cross reference table(s) and trailer(s).
/// </summary>
private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable, int xrefOffset)
{
Debug.Assert(xrefTable != null);

Expand All @@ -1072,7 +1184,7 @@ private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
int length = ReadInteger();
for (int id = start; id < start + length; id++)
{
int position = ReadInteger();
int position = ReadInteger() + xrefOffset;
int generation = ReadInteger();
ReadSymbol(Symbol.Keyword);
string token = _lexer.Token;
Expand Down Expand Up @@ -1114,10 +1226,10 @@ private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
return null;
}

/// <summary>
/// Reads cross reference stream(s).
/// </summary>
private PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable)
/// <summary>
/// Reads cross reference stream(s).
/// </summary>
private PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable)
{
// Read cross reference stream.
//Debug.Assert(_lexer.Symbol == Symbol.Integer);
Expand Down