diff --git a/azure-pipelines.yml b/azure-pipelines.yml
new file mode 100644
index 00000000..066d959f
--- /dev/null
+++ b/azure-pipelines.yml
@@ -0,0 +1,50 @@
+# ASP.NET Core (.NET Framework)
+# Build and test ASP.NET Core projects targeting the full .NET Framework.
+# Add steps that publish symbols, save build artifacts, and more:
+# https://docs.microsoft.com/azure/devops/pipelines/languages/dotnet-core
+
+trigger:
+- Release
+
+pool:
+ vmImage: 'windows-latest'
+
+variables:
+ solution: '**/*.sln'
+ buildPlatform: 'Any CPU'
+ buildConfiguration: 'Release'
+
+steps:
+- task: NuGetToolInstaller@1
+
+- task: NuGetCommand@2
+ inputs:
+ restoreSolution: '$(solution)'
+
+- task: VSBuild@1
+ inputs:
+ solution: '$(solution)'
+ msbuildArgs: '/p:DeployOnBuild=true /p:WebPublishMethod=Package /p:PackageAsSingleFile=true /p:SkipInvalidConfigurations=true /p:DesktopBuildPackageLocation="$(build.artifactStagingDirectory)\WebApp.zip" /p:DeployIisAppPath="Default Web Site"'
+ platform: '$(buildPlatform)'
+ configuration: '$(buildConfiguration)'
+
+- task: VSTest@2
+ inputs:
+ platform: '$(buildPlatform)'
+ configuration: '$(buildConfiguration)'
+
+- task: NuGetCommand@2
+ inputs:
+ command: 'pack'
+ packagesToPack: '**/*.csproj'
+ versioningScheme: 'byPrereleaseNumber'
+ majorVersion: '1'
+ minorVersion: '0'
+ patchVersion: '0'
+
+- task: NuGetCommand@2
+ inputs:
+ command: 'push'
+ packagesToPush: '$(Build.ArtifactStagingDirectory)/**/*.nupkg;!$(Build.ArtifactStagingDirectory)/**/*.symbols.nupkg'
+ nuGetFeedType: 'internal'
+ publishVstsFeed: 'b23e5b36-79b9-4a22-a765-20dec00e216d'
diff --git a/src/PdfSharp-gdi/PdfSharp-gdi.csproj b/src/PdfSharp-gdi/PdfSharp-gdi.csproj
index 941eee61..4ab868f8 100644
--- a/src/PdfSharp-gdi/PdfSharp-gdi.csproj
+++ b/src/PdfSharp-gdi/PdfSharp-gdi.csproj
@@ -98,6 +98,28 @@
none
AllRules.ruleset
+
+ true
+ bin\x64\Debug\
+ TRACE;DEBUG;GDI;UseGdiObjects
+ 285212672
+ 4096
+ x64
+ default
+ prompt
+ AllRules.ruleset
+
+
+ bin\x64\Release\
+ TRACE;GDI;UseGdiObjects
+ 285212672
+ bin\Release\PdfSharp-gdi.xml
+ true
+ true
+ 4096
+ x64
+ AllRules.ruleset
+
System
diff --git a/src/PdfSharp/Pdf.Advanced/PdfTrailer.cs b/src/PdfSharp/Pdf.Advanced/PdfTrailer.cs
index cd56e94f..3810ca9a 100644
--- a/src/PdfSharp/Pdf.Advanced/PdfTrailer.cs
+++ b/src/PdfSharp/Pdf.Advanced/PdfTrailer.cs
@@ -75,7 +75,7 @@ public PdfTrailer(PdfCrossReferenceStream trailer)
if (id != null)
Elements.SetValue(Keys.ID, id);
}
-
+
public int Size
{
get { return Elements.GetInteger(Keys.Size); }
@@ -218,6 +218,37 @@ internal void Finish()
_document._irefTable.IsUnderConstruction = false;
}
+ ///
+ /// Constructs the PdfTrailer from a document.
+ ///
+ /// the parser used to read the file.
+ internal void ConstructFromDocument(Parser parser)
+ {
+ // TODO - May need to also search for encryption related trailer info
+ PdfCrossReferenceTable xrefTable = _document._irefTable;
+ Elements.SetInteger(Keys.Size, xrefTable.ObjectTable.Count);
+
+ // find the root.
+ PdfDictionary rootToUse = null;
+ foreach (var reference in xrefTable.AllReferences)
+ {
+ PdfObject obj = parser.ReadObject(null, reference.ObjectID, false, false);
+ if (obj is PdfDictionary dObj)
+ {
+ if (dObj.Elements[PdfCatalog.Keys.Type] as PdfName == "/Catalog")
+ {
+ if (rootToUse == null)
+ rootToUse = dObj;
+ else if (dObj.ObjectID.GenerationNumber > rootToUse.ObjectID.GenerationNumber)
+ rootToUse = dObj;
+ }
+ }
+ }
+
+ if (rootToUse != null)
+ Elements.SetReference(Keys.Root, rootToUse);
+ }
+
///
/// Predefined keys of this dictionary.
///
diff --git a/src/PdfSharp/Pdf.IO/Lexer.cs b/src/PdfSharp/Pdf.IO/Lexer.cs
index 5bff4193..c0e54b70 100644
--- a/src/PdfSharp/Pdf.IO/Lexer.cs
+++ b/src/PdfSharp/Pdf.IO/Lexer.cs
@@ -76,98 +76,138 @@ public int Position
}
}
- ///
- /// Reads the next token and returns its type. If the token starts with a digit, the parameter
- /// testReference specifies how to treat it. If it is false, the lexer scans for a single integer.
- /// If it is true, the lexer checks if the digit is the prefix of a reference. If it is a reference,
- /// the token is set to the object ID followed by the generation number separated by a blank
- /// (the 'R' is omitted from the token).
- ///
- // /// Indicates whether to test the next token if it is a reference.
- public Symbol ScanNextToken()
+ ///
+ /// Reads the next token and returns its type. If the token starts with a digit, the parameter
+ /// testReference specifies how to treat it. If it is false, the lexer scans for a single integer.
+ /// If it is true, the lexer checks if the digit is the prefix of a reference. If it is a reference,
+ /// the token is set to the object ID followed by the generation number separated by a blank
+ /// (the 'R' is omitted from the token).
+ ///
+ // /// Indicates whether to test the next token if it is a reference.
+ public Symbol ScanNextToken()
+ {
+ return ScanNextToken(out int location);
+ }
+
+ ///
+ /// Reads the next token and returns its type. If the token starts with a digit, the parameter
+ /// testReference specifies how to treat it. If it is false, the lexer scans for a single integer.
+ /// If it is true, the lexer checks if the digit is the prefix of a reference. If it is a reference,
+ /// the token is set to the object ID followed by the generation number separated by a blank
+ /// (the 'R' is omitted from the token).
+ ///
+ // /// The start position of the next token.
+ public Symbol ScanNextToken(out int position)
+ {
+ Symbol symbol = Symbol.None;
+ if (!TryScanNextToken(out symbol, out position))
+ ParserDiagnostics.HandleUnexpectedCharacter(_nextChar);
+ return symbol;
+ }
+
+ public bool TryScanNextToken(out Symbol symbol, out int position)
{
Again:
_token = new StringBuilder();
char ch = MoveToNonWhiteSpace();
- switch (ch)
+ position = Position;
+ switch (ch)
{
case '%':
// Eat comments, the parser doesn't handle them
//return symbol = ScanComment();
- ScanComment();
+ symbol = _symbol = ScanComment();
+ // Do not eat EOF
+ if (symbol == Symbol.Eof)
+ return true;
goto Again;
case '/':
- return _symbol = ScanName();
-
- //case 'R':
- // if (Lexer.IsWhiteSpace(nextChar))
- // {
- // ScanNextChar();
- // return Symbol.R;
- // }
- // break;
-
+ symbol = _symbol = ScanName();
+ return true;
+
case '+': //TODO is it so easy?
case '-':
- return _symbol = ScanNumber();
+ symbol = _symbol = ScanNumber();
+ return true;
case '(':
- return _symbol = ScanLiteralString();
+ symbol = _symbol = ScanLiteralString();
+ return true;
case '[':
ScanNextChar(true);
- return _symbol = Symbol.BeginArray;
+ symbol = _symbol = Symbol.BeginArray;
+ return true;
case ']':
ScanNextChar(true);
- return _symbol = Symbol.EndArray;
+ symbol = _symbol = Symbol.EndArray;
+ return true;
case '<':
if (_nextChar == '<')
{
ScanNextChar(true);
ScanNextChar(true);
- return _symbol = Symbol.BeginDictionary;
+ symbol = _symbol = Symbol.BeginDictionary;
+ return true;
}
- return _symbol = ScanHexadecimalString();
+ symbol = _symbol = ScanHexadecimalString();
+ return true;
case '>':
if (_nextChar == '>')
{
ScanNextChar(true);
ScanNextChar(true);
- return _symbol = Symbol.EndDictionary;
+ symbol = _symbol = Symbol.EndDictionary;
+ return true;
}
- ParserDiagnostics.HandleUnexpectedCharacter(_nextChar);
- break;
+
+ symbol = _symbol = Symbol.None;
+ return false;
case '.':
- return _symbol = ScanNumber();
+ symbol = _symbol = ScanNumber();
+ return true;
}
if (char.IsDigit(ch))
#if true_
- return ScanNumberOrReference();
+ symbol = ScanNumberOrReference();
+ return true;
#else
if (PeekReference())
- return _symbol = ScanNumber();
+ {
+ symbol = _symbol = ScanNumber();
+ return true;
+ }
else
- return _symbol = ScanNumber();
+ {
+ symbol = _symbol = ScanNumber();
+ return true;
+ }
#endif
if (char.IsLetter(ch))
- return _symbol = ScanKeyword();
+ {
+ symbol = _symbol = ScanKeyword();
+ return true;
+ }
if (ch == Chars.EOF)
- return _symbol = Symbol.Eof;
+ {
+ symbol = _symbol = Symbol.Eof;
+ return true;
+ }
// #???
-
- ParserDiagnostics.HandleUnexpectedCharacter(ch);
- return _symbol = Symbol.None;
+
+ symbol = _symbol = Symbol.None;
+ return false;
}
-
+
///
/// Reads the raw content of a stream.
///
@@ -190,7 +230,77 @@ public byte[] ReadStream(int length)
else
pos = _idxChar + 1;
- _pdfSteam.Position = pos;
+ // Producer:
+ // Problem: Incorrect stream length
+ // Fix: Find the endstream keyword and measure the length
+ // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
+
+ // Producer:
+ // Problem: Not all pdf producers add a eol marker before endstream
+ // Fix: double check for endstream without the eol marker
+ // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
+
+ // Producer:
+ // Problem: Some pdf producers replace the eol marker with a carriage return
+ // Fix: double check for endstream without the eol marker
+ // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
+
+ // Verify stream length and resolve if bad
+ string nendstream = $"{'\n'}endstream";
+ string rendstream = $"{'\r'}endstream";
+ string rnendstream = $"{'\r'}{'\n'}endstream";
+ string endstream = "endstream";
+
+ string postStream = ReadRawString(pos + length, rnendstream.Length);
+
+ bool bValid = postStream.StartsWith(nendstream) ||
+ postStream.StartsWith(rendstream) ||
+ postStream.StartsWith(rnendstream) ||
+ postStream.StartsWith(endstream); // Not all pdf producers add a eol marker before endstream
+
+ if (!bValid)
+ {
+ string[] endstreamValues = { nendstream, rendstream, endstream };
+
+ int IndexOfEndStream(string val)
+ {
+ // Find the smallest value
+ int offset = -1;
+
+ foreach (var es in endstreamValues)
+ {
+ int o = val.IndexOf(es, StringComparison.Ordinal);
+ if (o < offset || offset == -1)
+ {
+ offset = o;
+ }
+ }
+
+ return offset;
+ }
+
+
+ // find the first endstream occurrence
+ // first check to see if it is within the specified stream length.
+ int idxOffset = IndexOfEndStream(postStream);
+ if (idxOffset != -1)
+ {
+ length = length + idxOffset;
+ }
+
+ if (idxOffset == -1)
+ {
+ // TODO:: read in chunks
+ postStream = ReadRawString(pos, _pdfLength - pos);
+ idxOffset = IndexOfEndStream(postStream);
+ if (idxOffset != -1)
+ {
+ length = idxOffset;
+ }
+ }
+ }
+
+ _pdfSteam.Position = pos;
byte[] bytes = new byte[length];
int read = _pdfSteam.Read(bytes, 0, length);
Debug.Assert(read == length);
@@ -247,20 +357,78 @@ public Symbol ScanName()
while (true)
{
char ch = AppendAndScanNextChar();
- if (IsWhiteSpace(ch) || IsDelimiter(ch) || ch == Chars.EOF)
- return _symbol = Symbol.Name;
- if (ch == '#')
- {
- ScanNextChar(true);
- char[] hex = new char[2];
- hex[0] = _currChar;
- hex[1] = _nextChar;
- ScanNextChar(true);
- // TODO Check syntax
- ch = (char)(ushort)int.Parse(new string(hex), NumberStyles.AllowHexSpecifier);
- _currChar = ch;
- }
+ if (ch == '#')
+ {
+ ScanNextChar(true);
+ char[] hex = new char[2];
+ hex[0] = _currChar;
+ hex[1] = _nextChar;
+ ScanNextChar(true);
+ // TODO Check syntax
+ ch = (char)(ushort)int.Parse(new string(hex), NumberStyles.AllowHexSpecifier);
+ _currChar = ch;
+ continue;
+ }
+
+ if (IsNameOrCommentDelimiter(ch) || ch == Chars.EOF)
+ {
+ return _symbol = Symbol.Name;
+ }
+
+ if (IsWhiteSpace(ch))
+ {
+ //TODO: Check that the white space is valid.
+ return _symbol = Symbol.Name;
+ }
+
+ //Handle invalid delimiters
+ switch (ch)
+ {
+ case '(':
+ //TODO: Handle invalid delimiters
+ return _symbol = Symbol.Name;
+ case ')':
+ //TODO: Handle invalid delimiters
+ return _symbol = Symbol.Name;
+ case '<':
+ //TODO: Handle invalid delimiters
+ return _symbol = Symbol.Name;
+ case '>':
+ //TODO: Handle invalid delimiters
+ return _symbol = Symbol.Name;
+ case '[':
+ //TODO: Not Complete
+ if (IsWhiteSpace(_nextChar) || IsDelimiter(_nextChar) || char.IsNumber(_nextChar) || _nextChar == '.' || _nextChar == '-' || PeekArrayKeyword())
+ {
+ return _symbol = Symbol.Name;
+ }
+ break;
+ case ']':
+ //TODO: Not Complete
+ if (IsWhiteSpace(_nextChar) || IsDelimiter(_nextChar) || _nextChar == Chars.EOF)
+ {
+ return _symbol = Symbol.Name;
+ }
+
+ string tkn = Token;
+
+ int position = Position;
+ ScanNextChar(true);
+ MoveToNonWhiteSpace();
+ bool isRef = PeekReference();
+ Position = position;
+ _token = new StringBuilder(tkn);
+ if (isRef)
+ return _symbol = Symbol.Name;
+ break;
+ case '{':
+ //TODO: Handle invalid delimiters
+ return _symbol = Symbol.Name;
+ case '}':
+ //TODO: Handle invalid delimiters
+ return _symbol = Symbol.Name;
+ }
}
}
@@ -634,20 +802,22 @@ internal char ScanNextChar(bool handleCRLF)
// Treat single CR as LF.
_currChar = Chars.LF;
}
- }
+ //Console.WriteLine();
+ }
}
+ //Console.Write(_currChar);
return _currChar;
}
- /////
- ///// Resets the current token to the empty string.
- /////
- //void ClearToken()
- //{
- // _token.Length = 0;
- //}
+ /////
+ ///// Resets the current token to the empty string.
+ /////
+ //void ClearToken()
+ //{
+ // _token.Length = 0;
+ //}
- bool PeekReference()
+ bool PeekReference()
{
// A Reference has the form "nnn mmm R". The implementation of the parser used a
// reduce/shift algorithm in the first place. But this case is the only one we need to
@@ -695,6 +865,39 @@ bool PeekReference()
return false;
}
+ bool PeekArrayKeyword()
+ {
+ StringBuilder token = _token;
+ int position = Position;
+ ScanNextChar(true);
+
+ //Pretty sure I want to skip any non white space
+ char ch = MoveToNonWhiteSpace();
+
+ //reset the _token
+ _token = new StringBuilder();
+
+ while (!IsWhiteSpace(ch) && !IsDelimiter(ch))
+ {
+ ch = AppendAndScanNextChar();
+ }
+
+ bool b_is_keyword = false;
+ switch (_token.ToString())
+ {
+ case "null":
+ case "true":
+ case "false":
+ b_is_keyword = true;
+ break;
+ }
+
+ Position = position;
+ _token = token;
+
+ return b_is_keyword;
+ }
+
///
/// Appends current character to the token and reads next one.
///
@@ -882,10 +1085,24 @@ internal static bool IsDelimiter(char ch)
return false;
}
- ///
- /// Gets the length of the PDF output.
- ///
- public int PdfLength
+ ///
+ /// Indicates whether the specified character is a PDF delimiter character.
+ ///
+ internal static bool IsNameOrCommentDelimiter(char ch)
+ {
+ switch (ch)
+ {
+ case '/':
+ case '%':
+ return true;
+ }
+ return false;
+ }
+
+ ///
+ /// Gets the length of the PDF output.
+ ///
+ public int PdfLength
{
get { return _pdfLength; }
}
diff --git a/src/PdfSharp/Pdf.IO/Parser.cs b/src/PdfSharp/Pdf.IO/Parser.cs
index 07b353a9..94bc7d20 100644
--- a/src/PdfSharp/Pdf.IO/Parser.cs
+++ b/src/PdfSharp/Pdf.IO/Parser.cs
@@ -80,6 +80,22 @@ public int MoveToObject(PdfObjectID objectID)
return _lexer.Position = position;
}
+ ///
+ /// Tries to set PDF input stream position to the specified object.
+ ///
+ public bool TryMoveToObject(PdfObjectID objectID, out int position)
+ {
+ position = _document._irefTable[objectID].Position;
+ if (position == -1)
+ {
+ position = _lexer.Position;
+ return false;
+ }
+
+ _lexer.Position = position;
+ return true;
+ }
+
public Symbol Symbol
{
get { return _lexer.Symbol; }
@@ -118,7 +134,8 @@ public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool incl
int generationNumber = objectID.GenerationNumber;
if (!fromObjecStream)
{
- MoveToObject(objectID);
+ if (!TryMoveToObject(objectID, out int position))
+ return null;
objectNumber = ReadInteger();
generationNumber = ReadInteger();
}
@@ -261,49 +278,35 @@ public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool incl
ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
break;
}
- symbol = ScanNextToken();
- if (symbol == Symbol.BeginStream)
+
+ int revert_pos = _lexer.Position;
+
+ ParserState state = SaveState();
+ TryScanNextToken(out symbol);
+ if (symbol == Symbol.BeginStream || symbol == Symbol.None)
{
+ if (symbol == Symbol.None)
+ {
+ // Failed to get a proper symbol
+ // probably missing "stream" token
+ RestoreState(state);
+ }
+
PdfDictionary dict = (PdfDictionary)pdfObject;
Debug.Assert(checkForStream, "Unexpected stream...");
-#if true_
- ReadStream(dict);
-#else
+
int length = GetStreamLength(dict);
byte[] bytes = _lexer.ReadStream(length);
-#if true_
- if (dict.Elements.GetString("/Filter") == "/FlateDecode")
- {
- if (dict.Elements["/Subtype"] == null)
- {
- try
- {
- byte[] decoded = Filtering.FlateDecode.Decode(bytes);
- if (decoded.Length == 0)
- goto End;
- string pageContent = Filtering.FlateDecode.DecodeToString(bytes);
- if (pageContent.Length > 100)
- pageContent = pageContent.Substring(pageContent.Length - 100);
- pageContent.GetType();
- bytes = decoded;
- dict.Elements.Remove("/Filter");
- dict.Elements.SetInteger("/Length", bytes.Length);
- }
- catch
- {
- }
- }
- End: ;
- }
-#endif
+
PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
dict.Stream = stream;
- ReadSymbol(Symbol.EndStream);
- symbol = ScanNextToken();
-#endif
+
+ revert_pos = _lexer.Position;
+ while ((symbol = ScanNextToken()) == Symbol.EndStream);
}
- if (!fromObjecStream && symbol != Symbol.EndObj)
- ParserDiagnostics.ThrowParserException(PSSR.UnexpectedToken(_lexer.Token));
+ if (!fromObjecStream && symbol != Symbol.EndObj)
+ _lexer.Position = revert_pos;
+
return pdfObject;
}
@@ -322,7 +325,7 @@ private void ReadStream(PdfDictionary dict)
Debug.Assert(dict.Stream == null, "Dictionary already has a stream.");
dict.Stream = stream;
ReadSymbol(Symbol.EndStream);
- ScanNextToken();
+ while (ScanNextToken() == Symbol.EndStream);
}
// HACK: Solve problem more general.
@@ -339,11 +342,47 @@ private int GetStreamLength(PdfDictionary dict)
if (reference != null)
{
ParserState state = SaveState();
- object length = ReadObject(null, reference.ObjectID, false, false);
+ object pdf_obj = ReadObject(null, reference.ObjectID, false, false);
RestoreState(state);
- int len = ((PdfIntegerObject)length).Value;
- dict.Elements["/Length"] = new PdfInteger(len);
- return len;
+
+
+
+
+ int len = -1;
+ if (pdf_obj is PdfIntegerObject length_obj)
+ {
+ len = length_obj.Value;
+ }
+ // For whatever reason, ReadObject() did not return a valid PdfIntegerObject
+ else
+ {
+ // Read 1k chunks until we find an "endstream" symbol
+ string content = "";
+ int read_pos = _lexer.Position;
+ int se = -1;
+ while (true)
+ {
+ int read_len = Math.Min(_lexer.PdfLength - read_pos, 1024);
+ content += _lexer.ReadRawString(read_pos, read_len);
+ read_pos += 1024;
+
+ se = content.IndexOf("endstream", StringComparison.Ordinal);
+ if (se != -1)
+ {
+ len = se - 2; // By spec, the stream should start on a new line. remove crlf chars from the count.
+ break;
+ }
+
+ if (read_pos >= _lexer.PdfLength)
+ break;
+ }
+ }
+
+ if (len != -1)
+ {
+ dict.Elements["/Length"] = new PdfInteger(len);
+ return len;
+ }
}
throw new InvalidOperationException("Cannot retrieve stream length.");
}
@@ -537,10 +576,41 @@ private void ParseObject(Symbol stop)
//case Symbol.StartXRef:
//case Symbol.Eof:
default:
- ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
- SkipCharsUntil(stop);
- return;
- }
+ // Any Keyword can be treated as a literal string.
+ switch (stop)
+ {
+ case Symbol.EndArray:
+ // Arrays are space delimited.
+ while (true)
+ {
+ char ch = _lexer.AppendAndScanNextChar();
+ if (Lexer.IsWhiteSpace(ch) || ch == Chars.EOF || ch == Chars.BracketRight)
+ {
+ _stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.RawEncoding));
+ break;
+ }
+ }
+ break;
+ case Symbol.EndDictionary:
+ // Dictionaries are key value pairs where key must be a name.
+ while (true)
+ {
+ char ch = _lexer.AppendAndScanNextChar();
+ if (ch == Chars.Slash || ch == Chars.Greater)
+ {
+ _stack.Shift(new PdfString(_lexer.Token, PdfStringFlags.RawEncoding));
+ break;
+ }
+ }
+ break;
+ default:
+ ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
+ SkipCharsUntil(stop);
+ break;
+ }
+
+ return;
+ }
}
ParserDiagnostics.ThrowParserException("Unexpected end of file."); // TODO L10N using PSSR.
}
@@ -549,6 +619,16 @@ private Symbol ScanNextToken()
{
return _lexer.ScanNextToken();
}
+
+ private Symbol ScanNextToken(out int position)
+ {
+ return _lexer.ScanNextToken(out position);
+ }
+
+ private bool TryScanNextToken(out Symbol symbol)
+ {
+ return _lexer.TryScanNextToken(out symbol, out int position);
+ }
private Symbol ScanNextToken(out string token)
{
@@ -1030,11 +1110,29 @@ internal PdfTrailer ReadTrailer()
if (idx == -1)
throw new Exception("The StartXRef table could not be found, the file cannot be opened.");
- ReadSymbol(Symbol.StartXRef);
- _lexer.Position = ReadInteger();
-
- // Read all trailers.
- while (true)
+ Symbol s = ReadSymbol(Symbol.StartXRef);
+ _lexer.Position = ReadInteger();
+
+ // Producer: iText1.3.1 by lowagie.com (based on itext-paulo-154)
+ // Problem: certificate data added to the start of file. Invalid startxref byte offset
+ // Fix: We could search for the a valid xref table but all byte offsets are probably incorrect.
+ // Probably best to just recreate the xref table.
+ // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.5.5
+
+ // Check for valid startxref
+ if (!IsValidXref())
+ {
+ PdfTrailer trailer;
+ bool bSuccess = TryRecreateXRefTableAndTrailer(out trailer, _document);
+ if (!bSuccess)
+ throw new Exception("Could not recreate the xref table or trailer.");
+
+ _document._trailer = trailer;
+ return _document._trailer;
+ }
+
+ // Read all trailers.
+ while (true)
{
PdfTrailer trailer = ReadXRefTableAndTrailer(_document._irefTable);
// 1st trailer seems to be the best.
@@ -1052,9 +1150,191 @@ internal PdfTrailer ReadTrailer()
}
///
- /// Reads cross reference table(s) and trailer(s).
+ /// Checks that the current _lexer location is a valid xref.
///
- private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
+ ///
+ private bool IsValidXref()
+ {
+ int position = _lexer.Position;
+ try
+ {
+ Symbol symbol = ScanNextToken();
+ if (symbol == Symbol.XRef) // xref table
+ {
+ _lexer.Position = position;
+ return true;
+ }
+
+ if (symbol == Symbol.Integer) // Linearization parameter dictionary
+ {
+ // Just because we have an integer, doesn't mean the startxref is actually valid
+ if (ScanNextToken() == Symbol.Integer && ScanNextToken() == Symbol.Obj)
+ {
+ _lexer.Position = position;
+ return true;
+ }
+ }
+
+ _lexer.Position = position;
+ return false;
+ }
+ catch
+ {
+ _lexer.Position = position;
+ return false;
+ }
+ }
+
+ private bool TryRecreateXRefTableAndTrailer(out PdfTrailer trailer, PdfDocument document)
+ {
+ PdfCrossReferenceTable xrefTable = document._irefTable;
+ trailer = null;
+ int length = _lexer.PdfLength;
+
+ // because some pdf producers put random info before the header, we need to find a proper starting position.
+ // i.e. Producer: iText1.3.1 by lowagie.com (based on itext-paulo-154)
+ int startIdx = -1;
+ string contents = "";
+ for (int i = 0, pos = 0; startIdx == -1 && pos < length; i++, pos = 1024 * i)
+ {
+ int len = Math.Min(1024, length - pos);
+ contents = $"{contents}{_lexer.ReadRawString(pos, len)}";
+ startIdx = contents.IndexOf("%PDF-1.", StringComparison.Ordinal);
+ }
+
+ if (startIdx == -1)
+ return false;
+
+ // Don't look past the last %%EOF marker
+ int endIdx = -1;
+ contents = "";
+ for (int i = 1; endIdx == -1; i++)
+ {
+ int pos = length - (1024 * i);
+ int len = 1024;
+
+ if (pos < 0)
+ {
+ len = len + pos;
+ pos = 0;
+ }
+
+ contents = $"{_lexer.ReadRawString(pos, len)}{contents}";
+ endIdx = contents.LastIndexOf("%%EOF", StringComparison.Ordinal);
+ if (endIdx != -1)
+ endIdx = length - contents.Length + endIdx;
+
+ if (pos == 0)
+ break;
+ }
+
+ if (endIdx == -1)
+ return false;
+
+ endIdx = endIdx + 5; // This should be where Eof char is
+
+ // Recreate the xref table.
+ //
+ // When symbol == Symbol.Obj
+ // [0] - generation
+ // [1] - id
+ TokenInfo[] token_stack = new TokenInfo[2];
+
+ _lexer.Position = startIdx;
+ while (true)
+ {
+ Symbol symbol = ScanNextToken(out int position);
+ if (symbol == Symbol.Eof)
+ {
+ // Check if it's the last EOF
+ if (_lexer.Position >= endIdx)
+ break; // This is the end of the file.
+ }
+
+ // we need to skip over streams entirely
+ if (symbol == Symbol.BeginStream)
+ {
+ // We're not reading any data from the object so wee need to find endstream
+ int pos = _lexer.Position;
+ string trail = "";
+ int trail_pos = pos;
+ while (true)
+ {
+ // look for endstream in 1k chunks.
+ int trail_length = Math.Min(1024, length - trail_pos);
+ trail += _lexer.ReadRawString(trail_pos, trail_length);
+ int stop = trail.IndexOf("endstream", StringComparison.Ordinal);
+ if (stop != -1)
+ {
+ _lexer.Position = stop + pos;
+ break;
+ }
+
+ trail_pos = trail_pos + trail_length;
+ if (trail_pos + trail_length >= length)
+ {
+ // No endstream was found.
+ throw new Exception("endstream not found.");
+ }
+ }
+ }
+
+ if (symbol == Symbol.Obj &&
+ token_stack[0].Symbol == Symbol.Integer &&
+ token_stack[1].Symbol == Symbol.Integer)
+ {
+ // TODO:: Do we only need the most recent revision?
+ PdfObjectID objectID = new PdfObjectID(token_stack[1].Number, token_stack[0].Number);
+ if (!xrefTable.Contains(objectID))
+ xrefTable.Add(new PdfReference(objectID, token_stack[1].Position));
+ //ReadObject(null, objectID, false, false); // Can't do this because the object value will never be set after
+ //SkipCharsUntil(Symbol.EndObj); // Can't do this because streams will cause exceptions
+ }
+
+ token_stack[1] = token_stack[0];
+ TokenInfo token_info = new TokenInfo { Symbol = symbol, Position = position };
+ if (symbol == Symbol.Integer)
+ token_info.Number = _lexer.TokenToInteger;
+ token_stack[0] = token_info;
+ }
+
+ // find the root.
+// foreach (var reference in xrefTable.AllReferences)
+// {
+// PdfObject obj = ReadObject(null, reference.ObjectID, false, false);
+// if (obj is PdfDictionary dObj)
+// {
+// if (dObj.Elements[PdfCatalog.Keys.Type] as PdfName == "/Catalog")
+// {
+// PdfCatalog catalog = new PdfCatalog(dObj);
+// }
+// }
+// }
+
+
+
+
+
+
+
+
+ trailer = new PdfTrailer(_document);
+ trailer.ConstructFromDocument(this);
+
+ return true;
+ }
+
+ struct TokenInfo
+ {
+ public int Position;
+ public Symbol Symbol;
+ public int Number;
+ }
+
+ ///
+ /// Reads cross reference table(s) and trailer(s).
+ ///
+ private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
{
Debug.Assert(xrefTable != null);
@@ -1082,6 +1362,14 @@ private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
// Skip unused entries.
if (token != "n")
continue;
+
+ // Mac OS X 10.12.6 Quartz PDFContext fails to mark 0 position entries as free.
+ // According to spec, we could skip anything less than 8 (e.g. '%PDF-1.n' where n is a digit between 0 and 7 must be the header of a file)
+ // but anything between 0 and 8 (1-7) could be the indication of a much larger problem.
+ // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.5.2
+ // Skip 0 position entries.
+ if (position == 0)
+ continue;
#if true
//!!!new 2018-03-14 begin
// Check if the object at the address has the correct ID and generation.
@@ -1129,7 +1417,7 @@ private PdfTrailer ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
}
return null;
}
-
+
///
/// Checks the x reference table entry. Returns true if everything is correct.
/// Return false if the keyword "obj" was found, but ID or Generation are incorrect.
diff --git a/src/PdfSharp/Pdf.IO/PdfReader.cs b/src/PdfSharp/Pdf.IO/PdfReader.cs
index 08a9f965..21eaef5b 100644
--- a/src/PdfSharp/Pdf.IO/PdfReader.cs
+++ b/src/PdfSharp/Pdf.IO/PdfReader.cs
@@ -279,7 +279,9 @@ public static PdfDocument Open(Stream stream, string password, PdfDocumentOpenMo
public static PdfDocument Open(Stream stream, string password, PdfDocumentOpenMode openmode, PdfPasswordProvider passwordProvider)
{
PdfDocument document;
+#if !DEBUG
try
+#endif
{
Lexer lexer = new Lexer(stream);
document = new PdfDocument(lexer);
@@ -439,7 +441,7 @@ public static PdfDocument Open(Stream stream, string password, PdfDocumentOpenMo
{
Debug.WriteLine(ex.Message);
// 4STLA rethrow exception to notify caller.
- throw;
+ //throw;
}
}
else
@@ -500,11 +502,13 @@ public static PdfDocument Open(Stream stream, string password, PdfDocumentOpenMo
document._irefTable.CheckConsistence();
}
}
+#if !DEBUG
catch (Exception ex)
{
Debug.WriteLine(ex.Message);
throw;
}
+#endif
return document;
}
diff --git a/src/PdfSharp/Pdf.IO/PdfWriter.cs b/src/PdfSharp/Pdf.IO/PdfWriter.cs
index 08071c5c..feafe404 100644
--- a/src/PdfSharp/Pdf.IO/PdfWriter.cs
+++ b/src/PdfSharp/Pdf.IO/PdfWriter.cs
@@ -245,7 +245,7 @@ public void Write(PdfName value)
case '[':
case ']':
case '#':
- break;
+ break;
default:
pdf.Append(name[idx]);
diff --git a/src/PdfSharp/Pdf/PdfDictionary.cs b/src/PdfSharp/Pdf/PdfDictionary.cs
index 428140a1..b24138af 100644
--- a/src/PdfSharp/Pdf/PdfDictionary.cs
+++ b/src/PdfSharp/Pdf/PdfDictionary.cs
@@ -652,8 +652,10 @@ public PdfRectangle GetRectangle(string key, bool create)
array.Elements.GetReal(2), array.Elements.GetReal(3));
this[key] = value;
}
- else
- value = (PdfRectangle)obj;
+ else if (obj is PdfRectangle rectangle)
+ {
+ value = rectangle;
+ }
return value;
}
diff --git a/src/PdfSharp/Pdf/PdfPages.cs b/src/PdfSharp/Pdf/PdfPages.cs
index de2ee441..eb612221 100644
--- a/src/PdfSharp/Pdf/PdfPages.cs
+++ b/src/PdfSharp/Pdf/PdfPages.cs
@@ -612,8 +612,9 @@ PdfDictionary[] GetKids(PdfReference iref, PdfPage.InheritedValues values, PdfDi
PdfPage.InheritValues(kid, values);
return new PdfDictionary[] { kid };
}
-
- if (string.IsNullOrEmpty(type))
+
+ // If it has kids, it's logically not going to be type page.
+ if (string.IsNullOrEmpty(type) && !kid.Elements.ContainsKey("/Kids"))
{
// Type is required. If type is missing, assume it is "/Page" and hope it will work.
// TODO Implement a "Strict" mode in PDFsharp and don't do this in "Strict" mode.