Skip to content

Commit

Permalink
Throw Exception if bulk indexing fails (#475)
Browse files Browse the repository at this point in the history
  • Loading branch information
dr0i committed Aug 21, 2023
1 parent cb1ab1d commit 2785ec7
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 3 deletions.
11 changes: 8 additions & 3 deletions app/controllers/Index.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
Expand Down Expand Up @@ -108,7 +110,7 @@ public static Result start() throws IOException {
* @param pathToJson Path to the JSON file to index
* @throws IOException if json file cannot be found
*/
public static void initialize(String pathToJson) throws IOException {
public static void initialize(String pathToJson) throws IOException, ElasticsearchException {
long minimumSize =
Long.parseLong(Application.CONFIG.getString("index.file.minsize"));
if (new File(pathToJson).length() >= minimumSize) {
Expand Down Expand Up @@ -199,14 +201,17 @@ static void createEmptyIndex(final Client aClient, final String aIndexName,
}

static void indexData(final Client aClient, final String aPath,
final String aIndex) throws IOException {
final String aIndex) throws IOException, ElasticsearchException {
final BulkRequestBuilder bulkRequest = aClient.prepareBulk();
try (BufferedReader br =
new BufferedReader(new InputStreamReader(new FileInputStream(aPath),
StandardCharsets.UTF_8))) {
readData(bulkRequest, br, aClient, aIndex);
}
bulkRequest.execute().actionGet();
BulkResponse bulkResponse = bulkRequest.execute().actionGet();
if (bulkResponse.hasFailures()) {
throw new ElasticsearchException("Bulk insert failed: " + bulkResponse.buildFailureMessage());
}
aClient.admin().indices().refresh(new RefreshRequest()).actionGet();
}

Expand Down
26 changes: 26 additions & 0 deletions test/index/TestBadDocuments.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/* Copyright 2023, hbz. Licensed under the EPL 2.0 */

package index;

import java.io.IOException;

import controllers.Index;
import org.elasticsearch.ElasticsearchException;
import org.junit.Test;

import static org.junit.Assert.assertTrue;

@SuppressWarnings("javadoc")
public class TestBadDocuments {

@Test
public void logIndexFailure() {
System.setProperty("config.resource", "test.conf");
try {
Index.initialize("test/index/corruptDocument.json");
} catch (ElasticsearchException | IOException e) {
Class clazz = e.getClass();
assertTrue(e.getClass().getName() == "org.elasticsearch.ElasticsearchException");
}
}
}
2 changes: 2 additions & 0 deletions test/index/corruptDocument.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"index":{"_index":"organisations","_type":"organisation","_id":""}}
{"rs":"130750039039","type":{"corrupt":true},"classification":{"id":"http://purl.org/lobid/libtype#n60","type":"Concept","label":{"de":"Zentrale Universitätsbibliothek","en":"Central University Library"}},"@context":"http://lobid.org/organisations/context.jsonld","url":"http://www.uni-greifswald.de/bibliothek/html","provides":"http://www.ub.uni-greifswald.de:2324/","name":"Universitätsbibliothek Greifswald","containedIn":"http://sws.geonames.org/6551180/","location":[{"type":"Place","address":{"postalCode":"17489","streetAddress":"Felix-Hausdorff-Str. 10","addressLocality":"Greifswald","addressCountry":"DE","type":"PostalAddress"},"openingHoursSpecification":{"description":"Mo-Fr.: 8-24, Sa: 9-24 Uhr"}}],"id":"http://lobid.org/organisations/DE-9#!","isil":"DE-9","fundertype":{"id":"http://purl.org/lobid/fundertype#n02","type":"Concept","label":{"de":"Land","en":"Federal State"}},"collects":{"type":"Collection","extent":{"id":"http://purl.org/lobid/stocksize#n10","type":"Concept","label":{"de":"1.000.001 und mehr","en":"1,000,001 and more"}}},"dbsID":"AA009","sameAs":["http://www.wikidata.org/entity/Q2496314","http://ld.zdb-services.de/resource/organisations/DE-9"]}

0 comments on commit 2785ec7

Please sign in to comment.