diff --git a/app/transformation/TransformDbs.java b/app/transformation/TransformDbs.java index 6c08082f..97afe2dc 100644 --- a/app/transformation/TransformDbs.java +++ b/app/transformation/TransformDbs.java @@ -6,9 +6,6 @@ import org.metafacture.csv.CsvDecoder; import org.metafacture.json.JsonEncoder; import org.metafacture.io.LineReader; -import org.metafacture.triples.StreamToTriples; -import org.metafacture.triples.TripleFilter; -import org.metafacture.triples.TripleCollect; import org.metafacture.io.ObjectWriter; import org.metafacture.io.FileOpener; import org.metafacture.metafix.Metafix; @@ -23,22 +20,15 @@ public class TransformDbs { static void process(final String outputPath, String geoLookupServer) throws FileNotFoundException { final FileOpener opener = new FileOpener(); - StreamToTriples streamToTriples = new StreamToTriples(); - streamToTriples.setRedirect(true); opener.setEncoding("UTF-8"); final CsvDecoder decoder = new CsvDecoder(','); decoder.setHasHeader(true); - final TripleFilter tripleFilter = new TripleFilter(); - tripleFilter.setSubjectPattern(".+"); // Remove entries without id JsonEncoder encodeJson = new JsonEncoder(); encodeJson.setPrettyPrinting(true); opener// .setReceiver(new LineReader())// .setReceiver(decoder)// .setReceiver(new Metafix("conf/fix-dbs.fix"))// - .setReceiver(streamToTriples)// - .setReceiver(tripleFilter)// - .setReceiver(new TripleCollect())// .setReceiver(TransformAll.fixEnriched(geoLookupServer))// .setReceiver(encodeJson)// .setReceiver(TransformAll.esBulk())// diff --git a/app/transformation/TransformSigel.java b/app/transformation/TransformSigel.java index 2d729e76..e3375bf5 100644 --- a/app/transformation/TransformSigel.java +++ b/app/transformation/TransformSigel.java @@ -19,12 +19,9 @@ import org.metafacture.framework.helpers.DefaultObjectPipe; import org.metafacture.json.JsonEncoder; import org.metafacture.metafix.Metafix; -import org.metafacture.triples.StreamToTriples; import org.metafacture.biblio.pica.PicaXmlHandler; import org.metafacture.xml.XmlDecoder; -import org.metafacture.triples.TripleFilter; import org.metafacture.xml.XmlElementSplitter; -import org.metafacture.triples.TripleCollect; import org.metafacture.io.ObjectWriter; import org.metafacture.xml.XmlFilenameWriter; import org.metafacture.io.FileOpener; @@ -54,19 +51,12 @@ static void process(String startOfUpdates, int intervalSize, final String outputPath, String geoLookupServer) throws IOException { splitUpSigelDump(); final FileOpener splitFileOpener = new FileOpener(); - StreamToTriples streamToTriples = new StreamToTriples(); - streamToTriples.setRedirect(true); - final TripleFilter tripleFilter = new TripleFilter(); - tripleFilter.setSubjectPattern(".+"); // Remove entries without id JsonEncoder encodeJson = new JsonEncoder(); encodeJson.setPrettyPrinting(true); splitFileOpener// .setReceiver(new XmlDecoder())// .setReceiver(new PicaXmlHandler())// - .setReceiver(new Metafix("conf/fix-sigel.fix"))// - .setReceiver(streamToTriples)// - .setReceiver(tripleFilter)// - .setReceiver(new TripleCollect())// + .setReceiver(new Metafix("conf/fix-sigel.fix"))//fix also kicks out all records without _id .setReceiver(TransformAll.fixEnriched(geoLookupServer))// .setReceiver(encodeJson)// .setReceiver(TransformAll.esBulk())// diff --git a/conf/fix-dbs.fix b/conf/fix-dbs.fix index e455da33..8dc50bf9 100644 --- a/conf/fix-dbs.fix +++ b/conf/fix-dbs.fix @@ -3,7 +3,6 @@ do once("maps") end replace_all("inr"," ","") -copy_field("inr","_id") replace_all("isil"," ","") replace_all("isil","/","-") @@ -34,3 +33,7 @@ end vacuum() + +unless exists("inr") + reject() +end diff --git a/conf/fix-sigel.fix b/conf/fix-sigel.fix index cada2b56..ca5c623d 100644 --- a/conf/fix-sigel.fix +++ b/conf/fix-sigel.fix @@ -9,7 +9,6 @@ copy_field("008H.g","worldcatRegistryId") replace_all("@dbsId"," ","") replace_all("isil"," ","") if any_match("@dbsId","[A-Z]{2}\\d{3}") - copy_field("@dbsId","_id") copy_field("@dbsId","inr") else copy_field("isil","_id") @@ -71,3 +70,9 @@ end vacuum() + +unless exists("inr") + unless exists("isil") + reject() + end +end