Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize resources while importing pages #105

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/PdfSharp/Pdf/PdfDictionary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
using System.Collections;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Reflection;
using System.Text;
using PdfSharp.Drawing;
Expand Down Expand Up @@ -1507,7 +1508,12 @@ public int Count
/// </summary>
public void CopyTo(KeyValuePair<string, PdfItem>[] array, int arrayIndex)
{
throw new NotImplementedException();
var elements = _elements.ToArray();

for (int i = arrayIndex; i < Math.Min(array.Length, elements.Length); i++)
{
array[i] = elements[i];
}
}

/// <summary>
Expand Down
213 changes: 212 additions & 1 deletion src/PdfSharp/Pdf/PdfObject.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
using System.Diagnostics;
using PdfSharp.Pdf.Advanced;
using PdfSharp.Pdf.IO;
using System;
using System.Diagnostics;
using System.Linq;

namespace PdfSharp.Pdf
{
Expand Down Expand Up @@ -174,6 +177,7 @@ internal virtual PdfDocument Document
}
}
}

internal PdfDocument _document;

/// <summary>
Expand Down Expand Up @@ -411,16 +415,223 @@ internal static PdfObject ImportClosure(PdfImportedObjectTable importedObjectTab
return elements[0];
}

///// <summary>
///// Imports a resource object and its transitive closure to the specified document.
///// </summary>
/// <param name="importedObjectTable">The imported object table of the owner for the external document.</param>
/// <param name="owner">The document that owns the cloned objects.</param>
/// <param name="externalObject">The root object to be cloned.</param>
/// <returns>The clone of the root object</returns>
internal static PdfObject ImportResourcesClosure(PdfImportedObjectTable importedObjectTable, PdfDocument owner, PdfObject externalObject, string[] usedResources)
{
Debug.Assert(ReferenceEquals(importedObjectTable.Owner, owner), "importedObjectTable does not belong to the owner.");
Debug.Assert(ReferenceEquals(importedObjectTable.ExternalDocument, externalObject.Owner),
"The ExternalDocument of the importedObjectTable does not belong to the owner of object to be imported.");

// Get transitive closure of external object.
PdfObject[] elements = externalObject.Owner.Internals.GetClosure(externalObject);
int count = elements.Length;

// 1st loop. Already imported objects are reused and new ones are cloned.
for (int idx = 0; idx < count; idx++)
{
PdfObject obj = elements[idx];
Debug.Assert(!ReferenceEquals(obj.Owner, owner));

if (idx != 0 && importedObjectTable.Contains(obj.ObjectID))
{
// Case: External object was already imported.
PdfReference iref = importedObjectTable[obj.ObjectID];
Debug.Assert(iref != null);
Debug.Assert(iref.Value != null);
Debug.Assert(iref.Document == owner);
// Replace external object by the already cloned counterpart.
elements[idx] = iref.Value;
}
else
{
// Case: External object was not yet imported earlier and must be cloned.
Debug.Assert(obj.Owner != owner);
PdfObject clone = obj.Clone();
Debug.Assert(clone.Reference == null);
clone.Document = owner;
if (obj.Reference != null)
{
// Case: The cloned object was an indirect object.
// Add clone to new owner document.
owner._irefTable.Add(clone);
Debug.Assert(clone.Reference != null);

if (idx == 0 && clone is PdfDictionary dict)
{
CleanUpResourceDictionary(owner, dict, usedResources, importedObjectTable);
}
else
{
importedObjectTable.Add(obj.ObjectID, clone.Reference);
}

DeepCopyItem(owner, importedObjectTable, clone);
}
else
{
// Case: The cloned object was a direct object.
// Only the root object can be a direct object.
Debug.Assert(idx == 0);
}
// Replace external object by its clone.
elements[idx] = clone;
}
}

// 2nd loop. Fix up indirect references that still refers to the external document.
for (int idx = 0; idx < count; idx++)
{
PdfObject obj = elements[idx];
Debug.Assert(owner != null);
FixUpObject(importedObjectTable, importedObjectTable.Owner, obj);
}

// Return the imported root object.
return elements[0];
}

private static void DeepCopyItem(PdfDocument owner, PdfImportedObjectTable importedObjectTable, PdfItem item)
{
Debug.Assert(!(item is PdfReference), "irefs must be resolved before cloning!");

switch (item)
{
case PdfDictionary dict:
DeepCopyObjects(owner, importedObjectTable, dict);
break;

case PdfArray array:
DeepCopyObjects(owner, importedObjectTable, array);
break;
}
}

private static void DeepCopyObjects(PdfDocument owner, PdfImportedObjectTable importedObjectTable, PdfDictionary dict)
{
var pdfNames = dict.Elements.KeyNames;

foreach (var pdfName in pdfNames)
{
var childElement = dict.Elements[pdfName];
dict.Elements[pdfName] = CloneItem(owner, importedObjectTable, childElement);
}
}

private static void DeepCopyObjects(PdfDocument owner, PdfImportedObjectTable importedObjectTable, PdfArray array)
{
for (int i = 0; i < array.Elements.Count; i++)
{
var childElement = array.Elements[i];
array.Elements[i] = CloneItem(owner, importedObjectTable, childElement); ;
}
}

private static PdfItem CloneItem(PdfDocument owner, PdfImportedObjectTable importedObjectTable, PdfItem childElement)
{
if (childElement is PdfReference childRef)
{
if (childRef.Document == owner)
return childElement;
Debug.Assert(childRef.Document != owner);

if (importedObjectTable.Contains(childRef.ObjectID))
{
childElement = importedObjectTable[childRef.ObjectID];
}
else
{
var clone = childRef.Value.Clone();
clone.Document = owner;
owner._irefTable.Add(clone);
childElement = clone.Reference;
importedObjectTable.Add(childRef.ObjectID, clone.Reference);

DeepCopyItem(owner, importedObjectTable, clone);
}
}
else
{
childElement = childElement.Clone();
DeepCopyItem(owner, importedObjectTable, childElement);
}

return childElement;
}

private static void CleanUpResourceDictionary(PdfDocument owner, PdfDictionary dictionary, string[] usedResources, PdfImportedObjectTable importedObjectTable)
{
foreach (var element in dictionary.Elements.ToArray())
{
var value = element.Value;

if (value is PdfReference iref)
{
var clone = iref.Value.Clone();
clone.Document = owner;
Debug.Assert(clone.Reference == null);
owner._irefTable.Add(clone);
value = clone;
dictionary.Elements[element.Key] = clone.Reference;
}

if (value is PdfDictionary childDict)
{
foreach (var childDictElement in childDict.Elements.ToArray())
{
if (!usedResources.Contains(childDictElement.Key))
{
childDict.Elements.Remove(childDictElement.Key);
}
else
{
var childElement = childDictElement.Value;
if (childElement is PdfReference childRef)
{
if (importedObjectTable.Contains(childRef.ObjectID))
{
childElement = importedObjectTable[childRef.ObjectID];
}
else
{
Debug.Assert(childRef.Document != owner);
var clone = childRef.Value.Clone();
clone.Document = owner;
owner._irefTable.Add(clone);
childElement = clone.Reference;
importedObjectTable.Add(childRef.ObjectID, clone.Reference);

DeepCopyItem(owner, importedObjectTable, clone);
}
}
else
{
childElement = childElement.Clone();
}

childDict.Elements[childDictElement.Key] = childElement;
}
}
}
}
}

/// <summary>
/// Replace all indirect references to external objects by their cloned counterparts
/// owned by the importer document.
/// </summary>
static void FixUpObject(PdfImportedObjectTable iot, PdfDocument owner, PdfObject value)
private static void FixUpObject(PdfImportedObjectTable iot, PdfDocument owner, PdfObject value)
{
Debug.Assert(ReferenceEquals(iot.Owner, owner));

PdfDictionary dict;
PdfArray array;

if ((dict = value as PdfDictionary) != null)
{
// Case: The object is a dictionary.
Expand Down
81 changes: 71 additions & 10 deletions src/PdfSharp/Pdf/PdfPages.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,20 @@
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
#endregion

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Collections;
#endregion PDFsharp - A .NET library for processing PDF

using PdfSharp.Events;
using PdfSharp.Pdf.IO;
using PdfSharp.Pdf.Advanced;
using PdfSharp.Pdf.Annotations;
using PdfSharp.Pdf.IO;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace PdfSharp.Pdf
{
Expand Down Expand Up @@ -455,15 +457,16 @@ public void MovePage(int oldIndex, int newIndex)
/// of their transitive closure. Any reuse of already imported objects is not intended because
/// any modification of an imported page must not change another page.
/// </summary>
PdfPage ImportExternalPage(PdfPage importPage)
private PdfPage ImportExternalPage(PdfPage importPage)
{
if (importPage.Owner._openMode != PdfDocumentOpenMode.Import)
throw new InvalidOperationException("A PDF document must be opened with PdfDocumentOpenMode.Import to import pages from it.");

PdfPage page = new PdfPage(_document);

var usedResources = GetUsedContentResources(importPage);
// ReSharper disable AccessToStaticMemberViaDerivedType for a better code readability.
CloneElement(page, importPage, PdfPage.Keys.Resources, false);
CloneResourceElement(page, importPage, usedResources);
CloneElement(page, importPage, PdfPage.Keys.Contents, false);
CloneElement(page, importPage, PdfPage.Keys.MediaBox, true);
CloneElement(page, importPage, PdfPage.Keys.CropBox, true);
Expand All @@ -483,6 +486,23 @@ PdfPage ImportExternalPage(PdfPage importPage)
return page;
}

private string[] GetUsedContentResources(PdfPage page)
{
var contentObject = page.Elements[PdfPage.Keys.Contents];
if (contentObject is PdfReference iref)
contentObject = iref.Value;

if (!(contentObject is PdfDictionary content))
return new string[0];
var stream = content.Stream.ToString();

var resources = stream
.Split(' ', '\n')
.Where(s => s.StartsWith("/"));

return resources.ToArray();
}

/// <summary>
/// Helper function for ImportExternalPage.
/// </summary>
Expand Down Expand Up @@ -532,7 +552,48 @@ void CloneElement(PdfPage page, PdfPage importPage, string key, bool deepcopy)
}
}

static PdfReference RemapReference(PdfPage[] newPages, PdfPage[] impPages, PdfReference iref)
/// <summary>
/// Helper function for ImportExternalPage.
/// </summary>
private void CloneResourceElement(PdfPage page, PdfPage importPage, string[] usedResources)
{
Debug.Assert(page != null);
Debug.Assert(page.Owner == _document);
Debug.Assert(importPage.Owner != null);
Debug.Assert(importPage.Owner != _document);

var key = PdfPage.Keys.Resources;

PdfItem item = importPage.Elements[key];
if (item != null)
{
var importedObjectTable = Owner.FormTable.GetImportedObjectTable(importPage);

// The item can be indirect. If so, replace it by its value.
if (item is PdfReference)
item = ((PdfReference)item).Value;
if (item is PdfObject)
{
PdfObject root = (PdfObject)item;
// The owner can be null if the item is not a reference.
if (root.Owner == null)
root.Document = importPage.Owner;
root = ImportResourcesClosure(importedObjectTable, page.Owner, root, usedResources);

if (root.Reference == null)
page.Elements[key] = root;
else
page.Elements[key] = root.Reference;
}
else
{
// Simple items are just cloned.
page.Elements[key] = item.Clone();
}
}
}

private static PdfReference RemapReference(PdfPage[] newPages, PdfPage[] impPages, PdfReference iref)
{
// Directs the iref to a one of the imported pages?
for (int idx = 0; idx < newPages.Length; idx++)
Expand Down