Skip to content

Commit

Permalink
feat: allow asking for using xref streams, which opens the option to …
Browse files Browse the repository at this point in the history
…build very large files (#291)
  • Loading branch information
galkahana authored Dec 8, 2024
1 parent 015bd5e commit 88a7ba0
Show file tree
Hide file tree
Showing 8 changed files with 241 additions and 39 deletions.
76 changes: 54 additions & 22 deletions PDFWriter/DocumentContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ DocumentContext::DocumentContext()
mObjectsContext = NULL;
mParserExtender = NULL;
mModifiedDocumentIDExists = false;
SetWriteXrefAsXrefStream(false);
}

DocumentContext::~DocumentContext(void)
Expand All @@ -80,12 +81,26 @@ void DocumentContext::SetObjectsContext(ObjectsContext* inObjectsContext)
mPNGImageHandler.SetOperationsContexts(this, mObjectsContext);
#endif
mExtGStateRegistry.SetObjectsContext(mObjectsContext);
SetupXrefMaxWritePositionValidation();
}

void DocumentContext::SetupXrefMaxWritePositionValidation()
{
// Validating Max Xref position to be 10 digits long is only relevant for regular xref writing.
// Cancel validation if xref stream is used instead.
if(mObjectsContext)
mObjectsContext->GetInDirectObjectsRegistry().SetShouldValidateMaxWritePositionForXref(!mWriteXrefAsXrefStream);
}

void DocumentContext::SetEmbedFonts(bool inEmbedFonts) {
mUsedFontsRepository.SetEmbedFonts(inEmbedFonts);
}

void DocumentContext::SetWriteXrefAsXrefStream(bool inWriteXrefAsXrefStream) {
mWriteXrefAsXrefStream = inWriteXrefAsXrefStream;
SetupXrefMaxWritePositionValidation();
}

void DocumentContext::SetOutputFileInformation(OutputFile* inOutputFile)
{
// just save the output file path for the ID generation in the end
Expand Down Expand Up @@ -222,13 +237,20 @@ EStatusCode DocumentContext::FinalizeNewPDF()
if (status != eSuccess)
break;

status = mObjectsContext->WriteXrefTable(xrefTablePosition);
if(status != eSuccess)
break;
if(mWriteXrefAsXrefStream) {
status = WriteXrefStream(xrefTablePosition);
if(status != eSuccess)
break;
} else {
status = mObjectsContext->WriteXrefTable(xrefTablePosition);
if(status != eSuccess)
break;

status = WriteTrailerDictionary();
if(status != eSuccess)
break;
status = WriteTrailerDictionary();
if(status != eSuccess)
break;

}

WriteXrefReference(xrefTablePosition);
WriteFinalEOF();
Expand Down Expand Up @@ -2343,8 +2365,31 @@ void DocumentContext::UnRegisterCopyingContext(PDFDocumentCopyingContext* inCopy
mCopyingContexts.erase(inCopyingContext);
}


bool DocumentContext::RequiresXrefStream(PDFParser* inModifiedFileParser)
{
// modification requires xref stream if the original document uses one...so just ask trailer
if(!inModifiedFileParser->GetTrailer())
return false;

PDFObjectCastPtr<PDFName> typeObject = inModifiedFileParser->GetTrailer()->QueryDirectObject("Type");

if(!typeObject)
return false;

return typeObject->GetValue() == "XRef";


}


EStatusCode DocumentContext::SetupModifiedFile(PDFParser* inModifiedFileParser)
{
// determine if file requires xref stream, in which case set it up
if(RequiresXrefStream(inModifiedFileParser)) {
SetWriteXrefAsXrefStream(true); // it may already have been setup to be true earlier by the users request, but if not, and this file requires it, set it up now
}

// setup trailer and save original document ID

if(!inModifiedFileParser->GetTrailer())
Expand Down Expand Up @@ -2514,9 +2559,11 @@ EStatusCode DocumentContext::FinalizeModifiedPDF(PDFParser* inModifiedFileParser
status = CopyEncryptionDictionary(inModifiedFileParser);
if(status != eSuccess)
break;
if(RequiresXrefStream(inModifiedFileParser))
if(mWriteXrefAsXrefStream)
{
status = WriteXrefStream(xrefTablePosition);
if(status != eSuccess)
break;
}
else
{
Expand Down Expand Up @@ -2781,21 +2828,6 @@ EStatusCode DocumentContext::CopyEncryptionDictionary(PDFParser* inModifiedFileP
return eSuccess;
}

bool DocumentContext::RequiresXrefStream(PDFParser* inModifiedFileParser)
{
// modification requires xref stream if the original document uses one...so just ask trailer
if(!inModifiedFileParser->GetTrailer())
return false;

PDFObjectCastPtr<PDFName> typeObject = inModifiedFileParser->GetTrailer()->QueryDirectObject("Type");

if(!typeObject)
return false;

return typeObject->GetValue() == "XRef";


}

EStatusCode DocumentContext::WriteXrefStream(LongFilePositionType& outXrefPosition)
{
Expand Down
3 changes: 3 additions & 0 deletions PDFWriter/DocumentContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ namespace PDFHummus
void SetObjectsContext(ObjectsContext* inObjectsContext);
void SetOutputFileInformation(OutputFile* inOutputFile);
void SetEmbedFonts(bool inEmbedFonts);
void SetWriteXrefAsXrefStream(bool inWriteXrefAsXrefStream);
PDFHummus::EStatusCode WriteHeader(EPDFVersion inPDFVersion);
PDFHummus::EStatusCode FinalizeNewPDF();
PDFHummus::EStatusCode FinalizeModifiedPDF(PDFParser* inModifiedFileParser,EPDFVersion inModifiedPDFVersion);
Expand Down Expand Up @@ -413,6 +414,7 @@ namespace PDFHummus
StringAndULongPairToHummusImageInformationMap mImagesInformation;
EncryptionHelper mEncryptionHelper;
ExtGStateRegistry mExtGStateRegistry;
bool mWriteXrefAsXrefStream;

void WriteHeaderComment(EPDFVersion inPDFVersion);
void Write4BinaryBytes();
Expand Down Expand Up @@ -461,5 +463,6 @@ namespace PDFHummus
bool RequiresXrefStream(PDFParser* inModifiedFileParser);
PDFHummus::EStatusCode WriteXrefStream(LongFilePositionType& outXrefPosition);
HummusImageInformation& GetImageInformationStructFor(const std::string& inImageFile,unsigned long inImageIndex);
void SetupXrefMaxWritePositionValidation();
};
}
29 changes: 23 additions & 6 deletions PDFWriter/IndirectObjectsReferenceRegistry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ using namespace PDFHummus;
IndirectObjectsReferenceRegistry::IndirectObjectsReferenceRegistry(void)
{
SetupInitialFreeObject();
SetShouldValidateMaxWritePositionForXref(true);
}

void IndirectObjectsReferenceRegistry::SetupInitialFreeObject()
Expand All @@ -51,6 +52,11 @@ void IndirectObjectsReferenceRegistry::SetupInitialFreeObject()
mObjectsWritesRegistry.push_back(singleFreeObjectInformation);
}

void IndirectObjectsReferenceRegistry::SetShouldValidateMaxWritePositionForXref(bool inShouldValidate)
{
mShouldValidateMaxWritePositionForXref = inShouldValidate;
}

IndirectObjectsReferenceRegistry::~IndirectObjectsReferenceRegistry(void)
{
}
Expand All @@ -70,6 +76,20 @@ ObjectIDType IndirectObjectsReferenceRegistry::AllocateNewObjectID()
return newObjectID;
}

EStatusCode IndirectObjectsReferenceRegistry::MaybeValidateMaxWritePositionForXref(LongFilePositionType inWritePosition)
{
if(!mShouldValidateMaxWritePositionForXref)
return PDFHummus::eSuccess;

if(inWritePosition > 9999999999LL) // if write position is larger than what can be represented by 10 digits, xref write will fail
{
TRACE_LOG1("IndirectObjectsReferenceRegistry::MaybeValidateMaxWritePositionForXref, Write position out of bounds. Trying to write an object at position that cannot be represented in Xref = %lld. probably means file got too long",inWritePosition);
return PDFHummus::eFailure;
}

return PDFHummus::eSuccess;
}


EStatusCode IndirectObjectsReferenceRegistry::MarkObjectAsWritten(ObjectIDType inObjectID,LongFilePositionType inWritePosition)
{
Expand All @@ -86,9 +106,8 @@ EStatusCode IndirectObjectsReferenceRegistry::MarkObjectAsWritten(ObjectIDType i
return PDFHummus::eFailure; // trying to mark as written an object that was already marked as such in the past. probably a mistake [till we have revisions]
}

if(inWritePosition > 9999999999LL) // if write position is larger than what can be represented by 10 digits, xref write will fail
if(MaybeValidateMaxWritePositionForXref(inWritePosition) != PDFHummus::eSuccess) // if write position is larger than what can be represented by 10 digits, xref write will fail
{
TRACE_LOG1("IndirectObjectsReferenceRegistry::MarkObjectAsWritten, Write position out of bounds. Trying to write an object at position that cannot be represented in Xref = %lld. probably means file got too long",inWritePosition);
return PDFHummus::eFailure;
}

Expand Down Expand Up @@ -155,12 +174,10 @@ PDFHummus::EStatusCode IndirectObjectsReferenceRegistry::MarkObjectAsUpdated(Obj
return PDFHummus::eFailure;
}

if(inNewWritePosition > 9999999999LL) // if write position is larger than what can be represented by 10 digits, xref write will fail
if(MaybeValidateMaxWritePositionForXref(inNewWritePosition) != PDFHummus::eSuccess) // if write position is larger than what can be represented by 10 digits, xref write will fail
{
TRACE_LOG1("IndirectObjectsReferenceRegistry::MarkObjectAsUpdated, Write position out of bounds. Trying to write an object at position that cannot be represented in Xref = %lld. probably means file got too long",inNewWritePosition);
return PDFHummus::eFailure;
}

}

mObjectsWritesRegistry[inObjectID].mIsDirty = true;
mObjectsWritesRegistry[inObjectID].mWritePosition = inNewWritePosition;
Expand Down
4 changes: 4 additions & 0 deletions PDFWriter/IndirectObjectsReferenceRegistry.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,16 @@ class IndirectObjectsReferenceRegistry
void Reset();

void SetupXrefFromModifiedFile(PDFParser* inModifiedFileParser);

void SetShouldValidateMaxWritePositionForXref(bool inShouldValidateMaxWritePositionForXref);

private:
ObjectWriteInformationVector mObjectsWritesRegistry;
bool mShouldValidateMaxWritePositionForXref;

void SetupInitialFreeObject();
void AppendExistingItem(ObjectWriteInformation::EObjectReferenceType inObjectReferenceType,
unsigned long inGenerationNumber,
LongFilePositionType inWritePosition);
PDFHummus::EStatusCode MaybeValidateMaxWritePositionForXref(LongFilePositionType inWritePosition);
};
25 changes: 15 additions & 10 deletions PDFWriter/PDFWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ PDFWriter::~PDFWriter(void)
{
}

EPDFVersion thisOrDefaultVersion(EPDFVersion inPDFVersion) {
return ePDFVersionUndefined == inPDFVersion ? ePDFVersion14 : inPDFVersion;
EPDFVersion thisOrDefaultVersion(EPDFVersion inPDFVersion, bool inWriteXrefAsXrefStream) {
// if version is undefined, return 1.4 if xref stream is not used, 1.5 if it is (As this would be the lower version it's supported in)
return ePDFVersionUndefined == inPDFVersion ? (inWriteXrefAsXrefStream ? ePDFVersion15: ePDFVersion14) : inPDFVersion;
}

EStatusCode PDFWriter::StartPDF(
Expand All @@ -66,6 +67,7 @@ EStatusCode PDFWriter::StartPDF(
const LogConfiguration& inLogConfiguration,
const PDFCreationSettings& inPDFCreationSettings)
{
EPDFVersion pdfVersion = thisOrDefaultVersion(inPDFVersion, inPDFCreationSettings.WriteXrefAsXrefStream);
SetupLog(inLogConfiguration);
SetupCreationSettings(inPDFCreationSettings);

Expand All @@ -77,7 +79,7 @@ EStatusCode PDFWriter::StartPDF(
mDocumentContext.SetOutputFileInformation(&mOutputFile);

if (inPDFCreationSettings.DocumentEncryptionOptions.ShouldEncrypt) {
mDocumentContext.SetupEncryption(inPDFCreationSettings.DocumentEncryptionOptions, thisOrDefaultVersion(inPDFVersion));
mDocumentContext.SetupEncryption(inPDFCreationSettings.DocumentEncryptionOptions, pdfVersion);
if (!mDocumentContext.SupportsEncryption()) {
mOutputFile.CloseFile(); // close the file, to keep things clean
return eFailure;
Expand All @@ -86,7 +88,7 @@ EStatusCode PDFWriter::StartPDF(

mIsModified = false;

return mDocumentContext.WriteHeader(thisOrDefaultVersion(inPDFVersion));
return mDocumentContext.WriteHeader(pdfVersion);
}

EStatusCode PDFWriter::EndPDF()
Expand Down Expand Up @@ -174,6 +176,7 @@ void PDFWriter::SetupCreationSettings(const PDFCreationSettings& inPDFCreationSe
{
mObjectsContext.SetCompressStreams(inPDFCreationSettings.CompressStreams);
mDocumentContext.SetEmbedFonts(inPDFCreationSettings.EmbedFonts);
mDocumentContext.SetWriteXrefAsXrefStream(inPDFCreationSettings.WriteXrefAsXrefStream);
}

void PDFWriter::ReleaseLog()
Expand Down Expand Up @@ -555,18 +558,19 @@ EStatusCode PDFWriter::StartPDFForStream(IByteWriterWithPosition* inOutputStream
const LogConfiguration& inLogConfiguration,
const PDFCreationSettings& inPDFCreationSettings)
{
EPDFVersion pdfVersion = thisOrDefaultVersion(inPDFVersion, inPDFCreationSettings.WriteXrefAsXrefStream);
SetupLog(inLogConfiguration);
SetupCreationSettings(inPDFCreationSettings);
if (inPDFCreationSettings.DocumentEncryptionOptions.ShouldEncrypt) {
mDocumentContext.SetupEncryption(inPDFCreationSettings.DocumentEncryptionOptions, thisOrDefaultVersion(inPDFVersion));
mDocumentContext.SetupEncryption(inPDFCreationSettings.DocumentEncryptionOptions, pdfVersion);
if (!mDocumentContext.SupportsEncryption())
return eFailure;
}

mObjectsContext.SetOutputStream(inOutputStream);
mIsModified = false;

return mDocumentContext.WriteHeader(thisOrDefaultVersion(inPDFVersion));
return mDocumentContext.WriteHeader(pdfVersion);
}
EStatusCode PDFWriter::EndPDFForStream()
{
Expand Down Expand Up @@ -691,7 +695,7 @@ EStatusCode PDFWriter::ModifyPDF(const std::string& inModifiedFile,

// do setup for modification
mIsModified = true;
status = SetupStateFromModifiedFile(inModifiedFile, thisOrDefaultVersion(inPDFVersion), inPDFCreationSettings);
status = SetupStateFromModifiedFile(inModifiedFile, inPDFVersion, inPDFCreationSettings);
}
while (false);

Expand Down Expand Up @@ -724,13 +728,14 @@ EStatusCode PDFWriter::ModifyPDFForStream(

mIsModified = true;

return SetupStateFromModifiedStream(inModifiedSourceStream, thisOrDefaultVersion(inPDFVersion), inPDFCreationSettings);
return SetupStateFromModifiedStream(inModifiedSourceStream, inPDFVersion, inPDFCreationSettings);
}

EStatusCode PDFWriter::SetupStateFromModifiedStream(IByteReaderWithPosition* inModifiedSourceStream,
EPDFVersion inPDFVersion,
const PDFCreationSettings& inPDFCreationSettings)
{
EPDFVersion pdfVersion = thisOrDefaultVersion(inPDFVersion, inPDFCreationSettings.WriteXrefAsXrefStream);
EStatusCode status;
PDFParsingOptions parsingOptions;

Expand Down Expand Up @@ -761,7 +766,7 @@ EStatusCode PDFWriter::SetupStateFromModifiedStream(IByteReaderWithPosition* inM
}
}

mModifiedFileVersion = thisOrDefaultVersion(inPDFVersion);
mModifiedFileVersion = pdfVersion;
}
while (false);

Expand All @@ -778,7 +783,7 @@ EStatusCode PDFWriter::SetupStateFromModifiedFile(const std::string& inModifiedF
if(status != eSuccess)
break;

status = SetupStateFromModifiedStream(mModifiedFile.GetInputStream(), thisOrDefaultVersion(inPDFVersion), inPDFCreationSettings);
status = SetupStateFromModifiedStream(mModifiedFile.GetInputStream(), inPDFVersion, inPDFCreationSettings);
}
while(false);

Expand Down
8 changes: 7 additions & 1 deletion PDFWriter/PDFWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,16 @@ struct PDFCreationSettings
bool CompressStreams;
bool EmbedFonts;
EncryptionOptions DocumentEncryptionOptions;
bool WriteXrefAsXrefStream;

PDFCreationSettings(bool inCompressStreams, bool inEmbedFonts,EncryptionOptions inDocumentEncryptionOptions = EncryptionOptions::DefaultEncryptionOptions()):DocumentEncryptionOptions(inDocumentEncryptionOptions){
PDFCreationSettings(
bool inCompressStreams,
bool inEmbedFonts,
EncryptionOptions inDocumentEncryptionOptions = EncryptionOptions::DefaultEncryptionOptions(),
bool inWriteXrefAsXrefStream = false):DocumentEncryptionOptions(inDocumentEncryptionOptions){
CompressStreams = inCompressStreams;
EmbedFonts = inEmbedFonts;
WriteXrefAsXrefStream = inWriteXrefAsXrefStream;
}

};
Expand Down
1 change: 1 addition & 0 deletions PDFWriterTesting/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ create_test_sourcelist (Tests
UppercaseSequanceTest.cpp
WatermarkTest.cpp
WatermarkWithContextOpacityTest.cpp
XrefStreamsTest.cpp
)

# add the testing executable
Expand Down
Loading

0 comments on commit 88a7ba0

Please sign in to comment.