Skip to content

Commit

Permalink
First draft for incremental saving
Browse files Browse the repository at this point in the history
  • Loading branch information
packdat committed May 4, 2024
1 parent 91d71c5 commit 0e29203
Show file tree
Hide file tree
Showing 15 changed files with 337 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public PdfPages Pages
if (_pages == null)
{
_pages = (PdfPages?)Elements.GetValue(Keys.Pages, VCF.CreateIndirect) ?? NRT.ThrowOnNull<PdfPages>();
if (Owner.IsImported)
if (Owner.IsImported && Owner._openMode != PdfDocumentOpenMode.Append)
_pages.FlattenPageTree();
}
return _pages;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public PdfContent(PdfDictionary dict) // HACK PdfContent
: base(dict)
{
// A PdfContent dictionary is always unfiltered.
Decode();
Owner.IrefTable.IgnoreModify(Decode); // decode modifies the object, ignore that
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ public PdfContent AppendContent()
{
Debug.Assert(Owner != null);

SetModified();
if (Owner._openMode != PdfDocumentOpenMode.Append)
SetModified();
PdfContent content = new PdfContent(Owner);
Owner.IrefTable.Add(content);
Debug.Assert(content.Reference != null);
Expand All @@ -64,7 +65,8 @@ public PdfContent PrependContent()
{
Debug.Assert(Owner != null);

SetModified();
if (Owner._openMode != PdfDocumentOpenMode.Append)
SetModified();
PdfContent content = new PdfContent(Owner);
Owner.IrefTable.Add(content);
Debug.Assert(content.Reference != null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,47 @@ public PdfCrossReferenceTable(PdfDocument document)
public Dictionary<PdfObjectID, PdfReference> ObjectTable = [];

/// <summary>
/// Used to collect modified objects for incremental updates
/// </summary>
public Dictionary<PdfObjectID, PdfReference> ModifiedObjects = [];

/// Gets or sets a value indicating whether this table is under construction.
/// It is true while reading a PDF file.
/// </summary>
internal bool IsUnderConstruction { get; set; }

internal bool ReadyForModification { get; set; }

internal void MarkAsModified(PdfReference? pdfReference)
{
if (pdfReference == null || !ReadyForModification)
return;

if (pdfReference.ObjectID.IsEmpty)
throw new ArgumentException("ObjectID must not be empty", nameof(pdfReference.ObjectID));

ModifiedObjects[pdfReference.ObjectID] = pdfReference;
}

/// <summary>
/// Used to temporarily ignore modifications to objects<br></br>
/// (i.e. when doing type-transformations that do not change the structure of the document)
/// </summary>
/// <param name="action"></param>
internal void IgnoreModify(Action action)
{
var prev = ReadyForModification;
ReadyForModification = false;
try
{
action();
}
finally
{
ReadyForModification = prev;
}
}

/// <summary>
/// Adds a cross-reference entry to the table. Used when parsing the trailer.
/// </summary>
Expand All @@ -46,6 +82,9 @@ public void Add(PdfReference iref)
throw new InvalidOperationException("Object already in table.");

ObjectTable.Add(iref.ObjectID, iref);

if (ReadyForModification && _document.IsAppending)
ModifiedObjects[iref.ObjectID] = iref;
}

/// <summary>
Expand All @@ -65,6 +104,9 @@ public void Add(PdfObject value)
throw new InvalidOperationException("Object already in table.");

ObjectTable.Add(value.ObjectID, value.ReferenceNotNull);

if (ReadyForModification && _document.IsAppending)
ModifiedObjects[value.ObjectID] = value.Reference;
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ namespace PdfSharp.Pdf.Advanced
// Reference: 3.4.4 File Trailer / Page 96
class PdfTrailer : PdfDictionary
{
/// <summary>
/// Gets or sets the position of this trailer in the input-stream<br></br>
/// Only meaningful for loaded documents; will be zero for new documents
/// </summary>
internal SizeType Position { get; set; }

/// <summary>
/// Initializes a new instance of PdfTrailer.
/// </summary>
Expand Down Expand Up @@ -211,8 +217,9 @@ internal void Finish()

Elements.Remove(Keys.Prev);

Debug.Assert(_document.IrefTable.IsUnderConstruction == false);
Debug.Assert(_document.IrefTable.IsUnderConstruction == false); // Why ??
_document.IrefTable.IsUnderConstruction = false;
_document.IrefTable.ReadyForModification = true;
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public PdfAnnotations Parent
/// </summary>
public PdfRectangle Rectangle
{
get => Elements.GetRectangle(Keys.Rect, true);
get => Elements.GetRectangle(Keys.Rect);
set
{
Elements.SetRectangle(Keys.Rect, value);
Expand Down
26 changes: 16 additions & 10 deletions src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Parser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1161,11 +1161,13 @@ internal PdfTrailer ReadTrailer()
// Read position behind 'startxref'.
_lexer.Position = ReadSize();

var xrefStart = _lexer.Position;

// Read all trailers.
PdfTrailer? newerTrailer = null;
while (true)
{
var trailer = ReadXRefTableAndTrailer(_document.IrefTable);
var trailer = ReadXRefTableAndTrailer(_document.IrefTable, xrefStart);

// Return the first found trailer, which is the one 'startxref' points to.
// This is the current trailer, even for incrementally updated files.
Expand All @@ -1183,6 +1185,7 @@ internal PdfTrailer ReadTrailer()

// Continue loading previous trailer and cache this one as the newerTrailer to add its previous trailer.
_lexer.Position = prev;
xrefStart = prev;
newerTrailer = trailer;
}
return _document.Trailer;
Expand All @@ -1191,7 +1194,7 @@ internal PdfTrailer ReadTrailer()
/// <summary>
/// Reads cross-reference table(s) and trailer(s).
/// </summary>
PdfTrailer? ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable)
PdfTrailer? ReadXRefTableAndTrailer(PdfCrossReferenceTable xrefTable, SizeType xrefStart)
{
Debug.Assert(xrefTable != null);

Expand Down Expand Up @@ -1260,7 +1263,10 @@ internal PdfTrailer ReadTrailer()
else if (symbol == Symbol.Trailer)
{
ReadSymbol(Symbol.BeginDictionary);
var trailer = new PdfTrailer(_document);
var trailer = new PdfTrailer(_document)
{
Position = xrefStart
};
ReadDictionary(trailer, false);
return trailer;
}
Expand All @@ -1275,8 +1281,8 @@ internal PdfTrailer ReadTrailer()
// Reference: 3.4.7 Cross-Reference Streams / Page 93
// TODO: We have not yet tested PDF files larger than 2 GiB because we have none and cannot produce one.

// The parsed integer is the object ID of the cross-reference stream object.
return ReadXRefStream(xrefTable);
// The parsed integer is the object ID of the cross-reference stream.
return ReadXRefStream(xrefTable, xrefStart);
}
return null;
}
Expand Down Expand Up @@ -1330,14 +1336,11 @@ bool CheckXRefTableEntry(SizeType position, int id, int generation, out int idCh
/// <summary>
/// Reads cross-reference stream(s).
/// </summary>
PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable)
PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable, SizeType xrefStart)
{
// Read cross-reference stream.
//Debug.Assert(_lexer.Symbol == Symbol.Integer);

// NEEDED???
var xrefStart = _lexer.Position - _lexer.Token.Length;

int number = _lexer.TokenToInteger;
int generation = ReadInteger();
// According to specs, generation number "shall not" be "other than zero".
Expand All @@ -1357,7 +1360,10 @@ PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable)
ReadSymbol(Symbol.BeginDictionary);
var objectID = new PdfObjectID(number, generation);

var xrefStream = new PdfCrossReferenceStream(_document);
var xrefStream = new PdfCrossReferenceStream(_document)
{
Position = xrefStart
};

ReadDictionary(xrefStream, false);
ReadSymbol(Symbol.BeginStream);
Expand Down
33 changes: 24 additions & 9 deletions src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/PdfReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// See the LICENSE file in the solution root for more information.

using Microsoft.Extensions.Logging;
using Microsoft.VisualBasic;
using PdfSharp.Internal;
using PdfSharp.Logging;
using PdfSharp.Pdf.Advanced;
Expand Down Expand Up @@ -345,7 +346,8 @@ PdfDocument OpenFromStream(Stream stream, string? password, PdfDocumentOpenMode
throw new PdfReaderException(PSSR.InvalidPassword);
}
}
else if (validity == PasswordValidity.UserPassword && openMode == PdfDocumentOpenMode.Modify)
else if (validity == PasswordValidity.UserPassword
&& (openMode == PdfDocumentOpenMode.Modify || openMode == PdfDocumentOpenMode.Append))
{
if (passwordProvider != null)
{
Expand Down Expand Up @@ -445,16 +447,19 @@ void FinishReferences()
"All references saved in IrefTable should have been created when their referred PdfObject has been accessible.");

// Get and update object's references.
FinishItemReferences(iref.Value, _document, finishedObjects);
FinishItemReferences(iref.Value, iref, _document, finishedObjects);
}

// why setting it here AND in Trailer.Finish ??
_document.IrefTable.IsUnderConstruction = false;

// Fix references of trailer values and then objects and irefs are consistent.
_document.Trailer.Finish();

Debug.Assert(_document.IrefTable.ModifiedObjects.Count == 0, "There should be no modified objects");
}

void FinishItemReferences(PdfItem? pdfItem, PdfDocument document, HashSet<PdfObject> finishedObjects)
void FinishItemReferences(PdfItem? pdfItem, PdfReference itemReference, PdfDocument document, HashSet<PdfObject> finishedObjects)
{
// Only PdfObjects may contain further PdfReferences.
if (pdfItem is not PdfObject pdfObject)
Expand All @@ -478,10 +483,12 @@ void FinishItemReferences(PdfItem? pdfItem, PdfDocument document, HashSet<PdfObj
switch (pdfObject)
{
case PdfDictionary childDictionary:
FinishChildReferences(childDictionary, finishedObjects);
FinishChildReferences(childDictionary, childDictionary.Reference ?? itemReference, finishedObjects);
childDictionary.SetModified(false);
break;
case PdfArray childArray:
FinishChildReferences(childArray, finishedObjects);
FinishChildReferences(childArray, childArray.Reference ?? itemReference, finishedObjects);
childArray.SetModified(false);
break;
}
#else
Expand All @@ -493,8 +500,13 @@ void FinishItemReferences(PdfItem? pdfItem, PdfDocument document, HashSet<PdfObj
#endif
}

void FinishChildReferences(PdfDictionary dictionary, HashSet<PdfObject> finishedObjects)
void FinishChildReferences(PdfDictionary dictionary, PdfReference containingReference, HashSet<PdfObject> finishedObjects)
{
if (dictionary.ObjectNumber == 15)
GetType();
if (dictionary.Reference is null && dictionary.ContainingReference is null)
dictionary.ContainingReference = containingReference;

// Dictionary elements are modified inside loop. Avoid "Collection was modified; enumeration operation may not execute" error occuring in net 4.7.2.
// There is no way to access KeyValuePairs via index natively to use a for loop with.
// Instead, enumerate Keys and get value via Elements[key], which shall be O(1).
Expand All @@ -511,12 +523,15 @@ void FinishChildReferences(PdfDictionary dictionary, HashSet<PdfObject> finished
}

// Get and update item's references.
FinishItemReferences(item, _document, finishedObjects);
FinishItemReferences(item, containingReference, _document, finishedObjects);
}
}

void FinishChildReferences(PdfArray array, HashSet<PdfObject> finishedObjects)
void FinishChildReferences(PdfArray array, PdfReference containingReference, HashSet<PdfObject> finishedObjects)
{
if (array.Reference is null && array.ContainingReference is null)
array.ContainingReference = containingReference;

var elements = array.Elements;
for (var i = 0; i < elements.Count; i++)
{
Expand All @@ -531,7 +546,7 @@ void FinishChildReferences(PdfArray array, HashSet<PdfObject> finishedObjects)
}

// Get and update item's references.
FinishItemReferences(item, _document, finishedObjects);
FinishItemReferences(item, containingReference, _document, finishedObjects);
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/PdfWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,8 @@ public void WriteEof(PdfDocument document, SizeType startxref)
WriteRaw(startxref.ToString(CultureInfo.InvariantCulture));
WriteRaw("\n%%EOF\n");
SizeType fileSize = (SizeType)_stream.Position;
if (_layout == PdfWriterLayout.Verbose)
// position check required for incremental updates to avoid overwriting the start of the file
if (_layout == PdfWriterLayout.Verbose && _commentPosition > 0)
{
TimeSpan duration = DateTime.Now - document._creation;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,10 @@ public enum PdfDocumentOpenMode
/// and is e.g. useful for browsing information about a collection of PDF documents in a user interface.
/// </summary>
InformationOnly, // TODO: not yet implemented

/// <summary>
/// Comparable to <see cref="Modify"/> but changes are appended to the document when saving
/// </summary>
Append
}
}
Loading

0 comments on commit 0e29203

Please sign in to comment.