Skip to content

Commit

Permalink
Change workflows and fixes so that no triples are needed
Browse files Browse the repository at this point in the history
  • Loading branch information
TobiasNx committed Aug 7, 2023
1 parent ae14c90 commit 6f850da
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 23 deletions.
10 changes: 0 additions & 10 deletions app/transformation/TransformDbs.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
import org.metafacture.csv.CsvDecoder;
import org.metafacture.json.JsonEncoder;
import org.metafacture.io.LineReader;
import org.metafacture.triples.StreamToTriples;
import org.metafacture.triples.TripleFilter;
import org.metafacture.triples.TripleCollect;
import org.metafacture.io.ObjectWriter;
import org.metafacture.io.FileOpener;
import org.metafacture.metafix.Metafix;
Expand All @@ -23,22 +20,15 @@
public class TransformDbs {
static void process(final String outputPath, String geoLookupServer) throws FileNotFoundException {
final FileOpener opener = new FileOpener();
StreamToTriples streamToTriples = new StreamToTriples();
streamToTriples.setRedirect(true);
opener.setEncoding("UTF-8");
final CsvDecoder decoder = new CsvDecoder(',');
decoder.setHasHeader(true);
final TripleFilter tripleFilter = new TripleFilter();
tripleFilter.setSubjectPattern(".+"); // Remove entries without id
JsonEncoder encodeJson = new JsonEncoder();
encodeJson.setPrettyPrinting(true);
opener//
.setReceiver(new LineReader())//
.setReceiver(decoder)//
.setReceiver(new Metafix("conf/fix-dbs.fix"))//
.setReceiver(streamToTriples)//
.setReceiver(tripleFilter)//
.setReceiver(new TripleCollect())//
.setReceiver(TransformAll.fixEnriched(geoLookupServer))//
.setReceiver(encodeJson)//
.setReceiver(TransformAll.esBulk())//
Expand Down
12 changes: 1 addition & 11 deletions app/transformation/TransformSigel.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,9 @@
import org.metafacture.framework.helpers.DefaultObjectPipe;
import org.metafacture.json.JsonEncoder;
import org.metafacture.metafix.Metafix;
import org.metafacture.triples.StreamToTriples;
import org.metafacture.biblio.pica.PicaXmlHandler;
import org.metafacture.xml.XmlDecoder;
import org.metafacture.triples.TripleFilter;
import org.metafacture.xml.XmlElementSplitter;
import org.metafacture.triples.TripleCollect;
import org.metafacture.io.ObjectWriter;
import org.metafacture.xml.XmlFilenameWriter;
import org.metafacture.io.FileOpener;
Expand Down Expand Up @@ -54,19 +51,12 @@ static void process(String startOfUpdates, int intervalSize,
final String outputPath, String geoLookupServer) throws IOException {
splitUpSigelDump();
final FileOpener splitFileOpener = new FileOpener();
StreamToTriples streamToTriples = new StreamToTriples();
streamToTriples.setRedirect(true);
final TripleFilter tripleFilter = new TripleFilter();
tripleFilter.setSubjectPattern(".+"); // Remove entries without id
JsonEncoder encodeJson = new JsonEncoder();
encodeJson.setPrettyPrinting(true);
splitFileOpener//
.setReceiver(new XmlDecoder())//
.setReceiver(new PicaXmlHandler())//
.setReceiver(new Metafix("conf/fix-sigel.fix"))//
.setReceiver(streamToTriples)//
.setReceiver(tripleFilter)//
.setReceiver(new TripleCollect())//
.setReceiver(new Metafix("conf/fix-sigel.fix"))//fix also kicks out all records without _id
.setReceiver(TransformAll.fixEnriched(geoLookupServer))//
.setReceiver(encodeJson)//
.setReceiver(TransformAll.esBulk())//
Expand Down
5 changes: 4 additions & 1 deletion conf/fix-dbs.fix
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ do once("maps")
end

replace_all("inr"," ","")
copy_field("inr","_id")

replace_all("isil"," ","")
replace_all("isil","/","-")
Expand Down Expand Up @@ -34,3 +33,7 @@ end


vacuum()

unless exists("inr")
reject()
end
7 changes: 6 additions & 1 deletion conf/fix-sigel.fix
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ copy_field("008H.g","worldcatRegistryId")
replace_all("@dbsId"," ","")
replace_all("isil"," ","")
if any_match("@dbsId","[A-Z]{2}\\d{3}")
copy_field("@dbsId","_id")
copy_field("@dbsId","inr")
else
copy_field("isil","_id")
Expand Down Expand Up @@ -71,3 +70,9 @@ end


vacuum()

unless exists("inr")
unless exists("isil")
reject()
end
end

0 comments on commit 6f850da

Please sign in to comment.