1.17.1
- Add new overloads to the
TextExtractor.Extract
allowing users to provide their own extraction result assemblers. Example:
public class CustomResult
{
public string Text { get; set; }
public IDictionary<string, string[]> Metadata { get; set; }
}
public static CustomResult CreateCustomResult(string text, Metadata metadata)
{
var metaDataDictionary = metadata.names().ToDictionary(name => name, metadata.getValues);
return new CustomResult
{
Metadata = metaDataDictionary,
Text = text,
};
}
[Test]
public void should_extract_author_list_from_pdf()
{
var textExtractionResult = new TextExtractor().Extract("file_with_authors.pdf", CreateCustomResult);
textExtractionResult.Metadata["meta:author"].Should().ContainInOrder("Fred Jones, M. D.", "Donald Evans D. M.");
}