diff --git a/src/PdfSharp/Pdf.IO/Parser.cs b/src/PdfSharp/Pdf.IO/Parser.cs
index a5d12ac3..c08e0af6 100644
--- a/src/PdfSharp/Pdf.IO/Parser.cs
+++ b/src/PdfSharp/Pdf.IO/Parser.cs
@@ -1031,30 +1031,142 @@ internal PdfTrailer ReadTrailer()
throw new Exception("The StartXRef table could not be found, the file cannot be opened.");
ReadSymbol(Symbol.StartXRef);
- _lexer.Position = ReadInteger();
+ int startxref = _lexer.Position = ReadInteger();
+
+ // Must be before the first 'goto valid_xref;' statement.
+ int xref_offset = 0;
+
+ // Check for valid startxref
+ if (IsValidXref())
+ {
+ goto valid_xref;
+ }
+
+ // If we reach this point, we have an invalid startxref
+ // First look for bytes preceding "%PDF-". Some pdf producers ignore these.
+ if (length >= 1024)
+ {
+ // "%PDF-" should be in this range
+ string header = _lexer.ReadRawString(0, 1024);
+ idx = header.IndexOf("%PDF-", StringComparison.Ordinal);
+ }
+ else
+ {
+ string header = _lexer.ReadRawString(0, length);
+ idx = header.IndexOf("%PDF-", StringComparison.Ordinal);
+ }
+
+ if (idx > 0)
+ {
+ //_lexer.ByteOffset = idx;
+ _lexer.Position = startxref + idx;
+ if (IsValidXref())
+ {
+ xref_offset = idx;
+ goto valid_xref;
+ }
+ }
+
+ valid_xref:
+ _lexer.Position = startxref + xref_offset;
+
+ // Read all trailers.
+ while (true)
+ {
+ PdfTrailer trailer = ReadXRefTableAndTrailer(_document._irefTable, xref_offset);
+ // 1st trailer seems to be the best.
+ if (_document._trailer == null)
+ _document._trailer = trailer;
+ int prev = trailer.Elements.GetInteger(PdfTrailer.Keys.Prev);
+ if (prev == 0)
+ break;
+ //if (prev > lexer.PdfLength)
+ // break;
+ _lexer.Position = prev;
+ }
+
+ return _document._trailer;
+ }
- // Read all trailers.
+ ///
+ /// Checks that the current _lexer location is a valid xref.
+ ///
+ ///
+ private bool IsValidXref()
+ {
+ int length = _lexer.PdfLength;
+ int position = _lexer.Position;
+ // Make sure not inside a stream.
+
+ string content = "";
+ int content_pos = position;
while (true)
{
- PdfTrailer trailer = ReadXRefTableAndTrailer(_document._irefTable);
- // 1st trailer seems to be the best.
- if (_document._trailer == null)
- _document._trailer = trailer;
- int prev = trailer.Elements.GetInteger(PdfTrailer.Keys.Prev);
- if (prev == 0)
+ // look for stream and endstream in 1k chunks.
+ int read_length = Math.Min(1024, length - content_pos);
+ content += _lexer.ReadRawString(content_pos, read_length);
+
+ int ss = content.IndexOf("stream", StringComparison.Ordinal);
+ int es = content.IndexOf("endstream", StringComparison.Ordinal);
+ int eof = content.IndexOf("%%EOF", StringComparison.Ordinal);
+
+ if (ss != es)
+ {
+ if (ss == -1)
+ {
+ if (eof != -1 && eof < es)
+ break;
+ else
+ return false;
+ }
+ else if (es == -1)
+ break;
+ else if (ss < es)
+ break;
+ else if (ss > es)
+ {
+ if (eof != -1 && eof < ss && eof < es)
+ break;
+ else
+ return false;
+ }
+ }
+
+ if (eof != -1)
break;
- //if (prev > lexer.PdfLength)
- // break;
- _lexer.Position = prev;
+
+ content_pos = content_pos + read_length;
+ if (content_pos + read_length >= length)
+ {
+ // reached the end of the document without finding either.
+ break;
+ }
}
- return _document._trailer;
+ _lexer.Position = position;
+
+ Symbol symbol = ScanNextToken();
+ if (symbol == Symbol.XRef)
+ {
+ return true;
+ }
+
+ if (symbol == Symbol.Integer)
+ {
+ // Just because we have an integer, doesn't mean the startxref is actually valid
+ if (ScanNextToken() == Symbol.Integer && ScanNextToken() == Symbol.Obj)
+ {
+ return true;
+ }
+ }
+
+ return false;
}
///
/// Reads cross reference table(s) and trailer(s).
///
- private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
+ private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable, int xrefOffset)
{
Debug.Assert(xrefTable != null);
@@ -1072,7 +1184,7 @@ private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
int length = ReadInteger();
for (int id = start; id < start + length; id++)
{
- int position = ReadInteger();
+ int position = ReadInteger() + xrefOffset;
int generation = ReadInteger();
ReadSymbol(Symbol.Keyword);
string token = _lexer.Token;
@@ -1114,10 +1226,10 @@ private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
return null;
}
- ///
- /// Reads cross reference stream(s).
- ///
- private PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable)
+ ///
+ /// Reads cross reference stream(s).
+ ///
+ private PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable)
{
// Read cross reference stream.
//Debug.Assert(_lexer.Symbol == Symbol.Integer);