From 2785ec7848c6ab5a72a76dc3fa992b39e49e5a04 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 18 Aug 2023 11:22:27 +0200 Subject: [PATCH] Throw Exception if bulk indexing fails (#475) --- app/controllers/Index.java | 11 ++++++++--- test/index/TestBadDocuments.java | 26 ++++++++++++++++++++++++++ test/index/corruptDocument.json | 2 ++ 3 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 test/index/TestBadDocuments.java create mode 100644 test/index/corruptDocument.json diff --git a/app/controllers/Index.java b/app/controllers/Index.java index e044c2d4..6e769c4b 100644 --- a/app/controllers/Index.java +++ b/app/controllers/Index.java @@ -4,11 +4,13 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; @@ -108,7 +110,7 @@ public static Result start() throws IOException { * @param pathToJson Path to the JSON file to index * @throws IOException if json file cannot be found */ - public static void initialize(String pathToJson) throws IOException { + public static void initialize(String pathToJson) throws IOException, ElasticsearchException { long minimumSize = Long.parseLong(Application.CONFIG.getString("index.file.minsize")); if (new File(pathToJson).length() >= minimumSize) { @@ -199,14 +201,17 @@ static void createEmptyIndex(final Client aClient, final String aIndexName, } static void indexData(final Client aClient, final String aPath, - final String aIndex) throws IOException { + final String aIndex) throws IOException, ElasticsearchException { final BulkRequestBuilder bulkRequest = aClient.prepareBulk(); try (BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(aPath), StandardCharsets.UTF_8))) { readData(bulkRequest, br, aClient, aIndex); } - bulkRequest.execute().actionGet(); + BulkResponse bulkResponse = bulkRequest.execute().actionGet(); + if (bulkResponse.hasFailures()) { + throw new ElasticsearchException("Bulk insert failed: " + bulkResponse.buildFailureMessage()); + } aClient.admin().indices().refresh(new RefreshRequest()).actionGet(); } diff --git a/test/index/TestBadDocuments.java b/test/index/TestBadDocuments.java new file mode 100644 index 00000000..dda30a79 --- /dev/null +++ b/test/index/TestBadDocuments.java @@ -0,0 +1,26 @@ +/* Copyright 2023, hbz. Licensed under the EPL 2.0 */ + +package index; + +import java.io.IOException; + +import controllers.Index; +import org.elasticsearch.ElasticsearchException; +import org.junit.Test; + +import static org.junit.Assert.assertTrue; + +@SuppressWarnings("javadoc") +public class TestBadDocuments { + + @Test + public void logIndexFailure() { + System.setProperty("config.resource", "test.conf"); + try { + Index.initialize("test/index/corruptDocument.json"); + } catch (ElasticsearchException | IOException e) { + Class clazz = e.getClass(); + assertTrue(e.getClass().getName() == "org.elasticsearch.ElasticsearchException"); + } + } +} diff --git a/test/index/corruptDocument.json b/test/index/corruptDocument.json new file mode 100644 index 00000000..49744ff5 --- /dev/null +++ b/test/index/corruptDocument.json @@ -0,0 +1,2 @@ +{"index":{"_index":"organisations","_type":"organisation","_id":""}} +{"rs":"130750039039","type":{"corrupt":true},"classification":{"id":"http://purl.org/lobid/libtype#n60","type":"Concept","label":{"de":"Zentrale Universitätsbibliothek","en":"Central University Library"}},"@context":"http://lobid.org/organisations/context.jsonld","url":"http://www.uni-greifswald.de/bibliothek/html","provides":"http://www.ub.uni-greifswald.de:2324/","name":"Universitätsbibliothek Greifswald","containedIn":"http://sws.geonames.org/6551180/","location":[{"type":"Place","address":{"postalCode":"17489","streetAddress":"Felix-Hausdorff-Str. 10","addressLocality":"Greifswald","addressCountry":"DE","type":"PostalAddress"},"openingHoursSpecification":{"description":"Mo-Fr.: 8-24, Sa: 9-24 Uhr"}}],"id":"http://lobid.org/organisations/DE-9#!","isil":"DE-9","fundertype":{"id":"http://purl.org/lobid/fundertype#n02","type":"Concept","label":{"de":"Land","en":"Federal State"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n10","type":"Concept","label":{"de":"1.000.001 und mehr","en":"1,000,001 and more"}}},"dbsID":"AA009","sameAs":["http://www.wikidata.org/entity/Q2496314","http://ld.zdb-services.de/resource/organisations/DE-9"]}