Skip to content

v1.17.1

Latest
Compare
Choose a tag to compare
@KevM KevM released this 03 Apr 14:19

1.17.1

  • Add new overloads to the TextExtractor.Extract allowing users to provide their own extraction result assemblers. Example:
public class CustomResult
{
    public string Text { get; set; }
    public IDictionary<string, string[]> Metadata { get; set; }
}

public static CustomResult CreateCustomResult(string text, Metadata metadata)
{
    var metaDataDictionary = metadata.names().ToDictionary(name => name, metadata.getValues);

    return new CustomResult
    {
        Metadata = metaDataDictionary,
        Text = text,
    };
}

[Test]
public void should_extract_author_list_from_pdf()
{
    var textExtractionResult = new TextExtractor().Extract("file_with_authors.pdf", CreateCustomResult);

    textExtractionResult.Metadata["meta:author"].Should().ContainInOrder("Fred Jones, M. D.", "Donald Evans D. M.");
}