diff --git a/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java b/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java index f1ae137b9163..c9a865ec85b2 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/discovery/indexobject/IndexFactoryImpl.java @@ -118,20 +118,10 @@ protected void writeDocument(SolrInputDocument doc, FullTextContentStreams strea ParseContext tikaContext = new ParseContext(); // Use Apache Tika to parse the full text stream(s) + boolean extractionSucceeded = false; try (InputStream fullTextStreams = streams.getStream()) { tikaParser.parse(fullTextStreams, tikaHandler, tikaMetadata, tikaContext); - - // Write Tika metadata to "tika_meta_*" fields. - // This metadata is not very useful right now, - // but we'll keep it just in case it becomes more useful. - for (String name : tikaMetadata.names()) { - for (String value : tikaMetadata.getValues(name)) { - doc.addField("tika_meta_" + name, value); - } - } - - // Save (parsed) full text to "fulltext" field - doc.addField("fulltext", tikaHandler.toString()); + extractionSucceeded = true; } catch (SAXException saxe) { // Check if this SAXException is just a notice that this file was longer than the character limit. // Unfortunately there is not a unique, public exception type to catch here. This error is thrown @@ -141,6 +131,7 @@ protected void writeDocument(SolrInputDocument doc, FullTextContentStreams strea // log that we only indexed up to that configured limit log.info("Full text is larger than the configured limit (discovery.solr.fulltext.charLimit)." + " Only the first {} characters were indexed.", charLimit); + extractionSucceeded = true; } else { log.error("Tika parsing error. Could not index full text.", saxe); throw new IOException("Tika parsing error. Could not index full text.", saxe); @@ -148,11 +139,19 @@ protected void writeDocument(SolrInputDocument doc, FullTextContentStreams strea } catch (TikaException | IOException ex) { log.error("Tika parsing error. Could not index full text.", ex); throw new IOException("Tika parsing error. Could not index full text.", ex); - } finally { - // Add document to index - solr.add(doc); } - return; + if (extractionSucceeded) { + // Write Tika metadata to "tika_meta_*" fields. + // This metadata is not very useful right now, + // but we'll keep it just in case it becomes more useful. + for (String name : tikaMetadata.names()) { + for (String value : tikaMetadata.getValues(name)) { + doc.addField("tika_meta_" + name, value); + } + } + // Save (parsed) full text to "fulltext" field + doc.addField("fulltext", tikaHandler.toString()); + } } // Add document to index solr.add(doc);