From fdb0dc5d9b6003176efdc5a05ea839dbd2bce93c Mon Sep 17 00:00:00 2001 From: cbobed Date: Thu, 8 Apr 2021 19:13:58 +0200 Subject: [PATCH 1/4] Added the tool to gather and create the Janusgraph schema --- .../graphml/cli/JanusgraphSchemaGatherer.java | 390 ++++++++++++++++++ .../graphml/export/support/GraphMLUtils.java | 4 + 2 files changed, 394 insertions(+) create mode 100644 rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java diff --git a/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java b/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java new file mode 100644 index 0000000..2f7ca82 --- /dev/null +++ b/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java @@ -0,0 +1,390 @@ +package uk.ac.rothamsted.kg.rdf2pg.graphml.cli; + +import java.io.BufferedOutputStream; + +/** + * TODO: comment me! + * + * @author cbobed + *
Date:
4 Apr 2021
+ * + */ + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Hashtable; + +import javax.xml.parsers.*; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import javax.xml.stream.events.XMLEvent; + +import org.xml.sax.*; +import org.xml.sax.helpers.DefaultHandler; + +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; +import uk.ac.rothamsted.kg.rdf2pg.cli.CliCommand; + +public class JanusgraphSchemaGatherer extends CliCommand { + + @Option ( + names = { "-i", "--input" }, + description = "GraphML file to extract the schema information from", + required = true + ) + protected String graphMLFilename = ""; + + @Option ( + names = { "-o", "--output"}, + description = "Output file to write the groovy scrip to", + required = true + ) + protected String groovyFilename = ""; + + @Option ( + names = { "-k", "--key"}, + description = "Name of the key property serving as ID to create an index on it if required", + required = false + ) + protected String key = ""; + + @Override + public final Integer call () throws Exception + { + + long start = System.currentTimeMillis(); + + HashSet vertexLabels = new HashSet<>(); + HashSet vertexProperties = new HashSet<>(); + HashSet edgeLabels = new HashSet<>(); + HashSet edgeProperties = new HashSet<>(); + boolean sanityCheck = true; + + gatherAllInformation(graphMLFilename, vertexLabels, vertexProperties, edgeLabels, edgeProperties); + sanityCheck = checkSanity(vertexLabels, vertexProperties, edgeLabels, edgeProperties); + if (sanityCheck) { + writeGroovyScript(groovyFilename, + vertexLabels, + vertexProperties, + edgeLabels, + edgeProperties, + !"".equalsIgnoreCase(key), + key); + } + // printSchemaInformation(vertexLabels, vertexProperties, edgeLabels, edgeProperties); + long end = System.currentTimeMillis(); + System.out.println("took aprox: "+((end-start)/1000)+" s. "); + + return sanityCheck?1:-1; + } + + public final Hashtable getAttributesAsHashtable (XMLStreamReader xmlr) { + Hashtable values = new Hashtable(); + for (int i=0; i vertexLabels, + HashSet vertexProperties, + HashSet edgeLabels, + HashSet edgeProperties ) throws XMLStreamException, FileNotFoundException + { + XMLInputFactory xmlif = XMLInputFactory.newInstance(); + XMLStreamReader xmlr = xmlif.createXMLStreamReader(filename, + new FileInputStream(filename)); + Hashtable attribs; + + // when XMLStreamReader is created, + // it is positioned at START_DOCUMENT event. + int eventType = xmlr.getEventType(); + + boolean insideNode = false; + boolean insideEdge = false; + // check if there are more events + // in the input stream + while(xmlr.hasNext()) { + xmlr.next(); + eventType = xmlr.getEventType(); + switch (eventType){ + case XMLEvent.START_ELEMENT: + if ("node".equalsIgnoreCase(xmlr.getLocalName()) ) { + insideNode = true; + attribs = getAttributesAsHashtable(xmlr); + if (attribs.containsKey("labelV")) { + vertexLabels.add(attribs.get("labelV")); + } + + } + else if ("edge".equalsIgnoreCase(xmlr.getLocalName())) { + insideEdge = true; + attribs = getAttributesAsHashtable(xmlr); + if (attribs.containsKey("labelE")) { + edgeLabels.add(attribs.get("labelE")); + } + } + else if ("key".equalsIgnoreCase(xmlr.getLocalName())) { + attribs = getAttributesAsHashtable(xmlr); + if (attribs.containsKey("for")) { + if ("node".equalsIgnoreCase(attribs.get("for"))) { + // it should be the same as id + vertexProperties.add(attribs.get("attr.name")); + vertexProperties.add(attribs.get("id")); + } + else if ("edge".equalsIgnoreCase(attribs.get("for"))) { + edgeProperties.add(attribs.get("attr.name")); + edgeProperties.add(attribs.get("id")); + } + } + } + else if ("data".equalsIgnoreCase(xmlr.getLocalName())) { + attribs = getAttributesAsHashtable(xmlr); + if (attribs.containsKey("key")) { + if (insideNode && "labelV".equalsIgnoreCase(attribs.get("key")) && xmlr.hasNext()) { + xmlr.next(); + vertexLabels.add(xmlr.getText()); + } + else if (insideNode && !"labelV".equalsIgnoreCase(attribs.get("key"))) { + vertexProperties.add(attribs.get("key")); + } + else if (insideEdge && "labelE".equalsIgnoreCase(attribs.get("key")) && xmlr.hasNext()) { + xmlr.next(); + edgeLabels.add(xmlr.getText()); + } + else if (insideEdge && !"labelE".equalsIgnoreCase(attribs.get("key"))) { + edgeProperties.add(attribs.get("key")); + } + } + } + break; + case XMLEvent.END_ELEMENT: + if (xmlr.getLocalName().equalsIgnoreCase("node")) { + insideNode = false; + } + else if (xmlr.getLocalName().equalsIgnoreCase("edge")) { + insideEdge = false; + } + break; + case XMLEvent.PROCESSING_INSTRUCTION: + break; + case XMLEvent.CHARACTERS: + break; + case XMLEvent.COMMENT: + case XMLEvent.START_DOCUMENT: + case XMLEvent.END_DOCUMENT: + case XMLEvent.ENTITY_REFERENCE: + break; + case XMLEvent.ATTRIBUTE: + System.out.println("attribute"); + break; + case XMLEvent.DTD: + break; + case XMLEvent.CDATA: + System.out.println("CDATA"); + break; + case XMLEvent.SPACE: + break; + } + + } + } + + public final boolean checkSanity (HashSet vertexLabels, + HashSet vertexProperties, + HashSet edgeLabels, + HashSet edgeProperties) + { + String VL_LABEL = "vertexLabels"; + String VP_LABEL = "vertexProperties"; + String EL_LABEL = "edgeLabels"; + // String EP_LABEL = "edgeProperties"; + + boolean everythingOK = true; + HashSet auxSet = null; + HashMap> collisions = new HashMap>(); + /* reserved keywords: vertex, element, edge, property, label, key */ + + everythingOK = containsNoReservedWord(vertexLabels); + if (!everythingOK) log.error("Vertex labels containing reserved words"); + + everythingOK &= containsNoReservedWord (vertexProperties); + if (!everythingOK) log.error("Vertex properties containing reserved words"); + + everythingOK &= containsNoReservedWord (edgeLabels); + if (!everythingOK) log.error("Edge labels containing reserved words"); + + everythingOK &= containsNoReservedWord(edgeProperties); + if (!everythingOK) log.error("Edge properties containing reserved words"); + + // we check the intersection of the different label names + // to avoid collisions + auxSet = new HashSet(vertexLabels); + auxSet.retainAll(vertexProperties); + collisions.put(VL_LABEL, auxSet); + everythingOK = auxSet.isEmpty(); + + auxSet = new HashSet(vertexLabels); + auxSet.retainAll(edgeLabels); + // we keep track of the potential collisions is everything is ok, this should be empty + collisions.get(VL_LABEL).addAll(auxSet); + everythingOK = auxSet.isEmpty(); + + auxSet = new HashSet(vertexLabels); + auxSet.retainAll(edgeLabels); + collisions.get(VL_LABEL).addAll(auxSet); + everythingOK = auxSet.isEmpty(); + + auxSet = new HashSet(vertexProperties); + auxSet.retainAll(edgeLabels); + collisions.put(VP_LABEL, auxSet); + everythingOK = auxSet.isEmpty(); + + auxSet = new HashSet(vertexProperties); + auxSet.retainAll(edgeProperties); + collisions.get(VP_LABEL).addAll(auxSet); + everythingOK = auxSet.isEmpty(); + + auxSet = new HashSet(edgeLabels); + auxSet.retainAll(edgeProperties); + collisions.put(EL_LABEL, auxSet); + everythingOK = auxSet.isEmpty(); + + if (!everythingOK) { + if (!collisions.get(VL_LABEL).isEmpty()) { + log.error("collisions with the vertex labels: "); + for (String lab: collisions.get(VL_LABEL)) { + log.error(lab); + } + } + if (!collisions.get(VP_LABEL).isEmpty()) { + log.error("collisions with the vertex properties: "); + for (String lab: collisions.get(VP_LABEL)) { + log.error(lab); + } + } + if (!collisions.get(EL_LABEL).isEmpty()) { + log.error("collisions with the edge labels: "); + for (String lab: collisions.get(EL_LABEL)) { + log.error(lab); + } + } + } + + return everythingOK; + } + + public final void writeGroovyScript (String filename, + HashSet vertexLabels, + HashSet vertexProperties, + HashSet edgeLabels, + HashSet edgeProperties, + boolean createKeyIndex, + String key) throws FileNotFoundException { + + try ( PrintStream out = new PrintStream ( + new BufferedOutputStream ( + new FileOutputStream (filename ) ) ) + ) + { + // we build the schema with the default settings + // some tunning might be required depending on the particular scenario + out.println("graph.tx().commit()"); + out.println("m = graph.openManagement()"); + for (String vl: vertexLabels) { + out.println("m.makeVertexLabel('"+vl+"').make()"); + } + for (String vp: vertexProperties) { + out.println("m.makePropertyKey('"+vp+"').dataType(String.class).make()"); + } + for (String el: edgeLabels) { + out.println("m.makeEdgeLabel('"+el+"').multiplicity(MULTI).make()"); + } + for (String ep: edgeProperties) { + out.println("m.makePropertyKey('"+ep+"').dataType(String.class).make()"); + } + out.println("m.commit()"); + out.println(""); + + if (createKeyIndex) { + out.println("graph.tx().commit()"); + out.println("m = graph.openManagement()"); + out.println("prop = m.getPropertyKey('"+key+"')"); + out.println("m.buildIndex('"+key+"Comp', Vertex.class).addKey(prop).buildCompositeIndex()"); + out.println("prop = m.getPropertyKey('"+key+"')"); + out.println("m.buildIndex('"+key+"Mixed', Vertex.class).addKey(prop).buildMixedIndex('search')"); + out.println("m.commit()"); + + //Wait for the index to become available + + out.println("ManagementSystem.awaitGraphIndexStatus(graph, '"+key+"Comp').call()"); + out.println("ManagementSystem.awaitGraphIndexStatus(graph, '"+key+"Mixed').call()"); + out.println("graph.tx().commit()"); + + out.println("m = graph.openManagement()"); + + out.println("idx = m.getGraphIndex('"+key+"Comp')"); + out.println("m.updateIndex(idx, SchemaAction.ENABLE_INDEX)"); + out.println("idx = m.getGraphIndex('"+key+"Mixed')"); + out.println("m.updateIndex(idx, SchemaAction.ENABLE_INDEX)"); + + out.println("m.commit()"); + out.println("graph.tx().commit()"); + } + out.flush(); + } + + + } + +public static boolean containsNoReservedWord(HashSet set) { + + return !( set.contains("vertex") || + set.contains("element") || + set.contains("edge") || + set.contains("property") || + set.contains("label") || + set.contains("key") ); + } + +public final static void printSchemaInformation(HashSet vertexLabels, + HashSet vertexProperties, + HashSet edgeLabels, + HashSet edgeProperties) { + System.out.println("----------------"); + System.out.println("Vertex labels::"); + System.out.println("----------------"); + for (String s: vertexLabels) { + System.out.println(s); + } + System.out.println("----------------"); + System.out.println("Vertex properties::"); + System.out.println("----------------"); + for (String s: vertexProperties) { + System.out.println(s); + } + System.out.println("----------------"); + System.out.println("Edge labels::"); + System.out.println("----------------"); + for (String s: edgeLabels) { + System.out.println(s); + } + System.out.println("----------------"); + System.out.println("Edge Properties::"); + System.out.println("----------------"); + for (String s: edgeProperties) { + System.out.println(s); + } + } + + +} + + diff --git a/rdf2graphml/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/export/support/GraphMLUtils.java b/rdf2graphml/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/export/support/GraphMLUtils.java index eede188..1297944 100644 --- a/rdf2graphml/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/export/support/GraphMLUtils.java +++ b/rdf2graphml/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/export/support/GraphMLUtils.java @@ -27,6 +27,10 @@ public class GraphMLUtils { /** Some constants related to GraphML **/ + /* In janusgraph, the following words are reserved ones: vertex, element, edge, property, label, key + they cannot be used for properties of either the nodes or the edges */ + + public static final String GRAPHML_TAG_HEADER = " \n" + " Date: Fri, 9 Apr 2021 08:35:02 +0200 Subject: [PATCH 2/4] Minor fixes to the tool for gathering the janusgraph schema --- .../graphml/cli/JanusgraphSchemaGatherer.java | 97 ++++++++++++------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java b/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java index 2f7ca82..067d79f 100644 --- a/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java +++ b/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java @@ -24,12 +24,15 @@ import javax.xml.stream.XMLStreamReader; import javax.xml.stream.events.XMLEvent; +import org.springframework.context.annotation.AnnotationConfigApplicationContext; import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; +import picocli.CommandLine; import picocli.CommandLine.Command; import picocli.CommandLine.Option; import uk.ac.rothamsted.kg.rdf2pg.cli.CliCommand; +import uk.ac.rothamsted.kg.rdf2pg.cli.Rdf2PGCli; public class JanusgraphSchemaGatherer extends CliCommand { @@ -81,7 +84,7 @@ public final Integer call () throws Exception long end = System.currentTimeMillis(); System.out.println("took aprox: "+((end-start)/1000)+" s. "); - return sanityCheck?1:-1; + return sanityCheck?0:-1; } public final Hashtable getAttributesAsHashtable (XMLStreamReader xmlr) { @@ -344,46 +347,68 @@ public final void writeGroovyScript (String filename, } -public static boolean containsNoReservedWord(HashSet set) { - - return !( set.contains("vertex") || - set.contains("element") || - set.contains("edge") || - set.contains("property") || - set.contains("label") || - set.contains("key") ); - } - -public final static void printSchemaInformation(HashSet vertexLabels, - HashSet vertexProperties, - HashSet edgeLabels, - HashSet edgeProperties) { - System.out.println("----------------"); - System.out.println("Vertex labels::"); - System.out.println("----------------"); - for (String s: vertexLabels) { - System.out.println(s); + public static boolean containsNoReservedWord(HashSet set) { + + return !( set.contains("vertex") || + set.contains("element") || + set.contains("edge") || + set.contains("property") || + set.contains("label") || + set.contains("key") ); } - System.out.println("----------------"); - System.out.println("Vertex properties::"); - System.out.println("----------------"); - for (String s: vertexProperties) { - System.out.println(s); + + public final static void printSchemaInformation(HashSet vertexLabels, + HashSet vertexProperties, + HashSet edgeLabels, + HashSet edgeProperties) { + System.out.println("----------------"); + System.out.println("Vertex labels::"); + System.out.println("----------------"); + for (String s: vertexLabels) { + System.out.println(s); + } + System.out.println("----------------"); + System.out.println("Vertex properties::"); + System.out.println("----------------"); + for (String s: vertexProperties) { + System.out.println(s); + } + System.out.println("----------------"); + System.out.println("Edge labels::"); + System.out.println("----------------"); + for (String s: edgeLabels) { + System.out.println(s); + } + System.out.println("----------------"); + System.out.println("Edge Properties::"); + System.out.println("----------------"); + for (String s: edgeProperties) { + System.out.println(s); + } } - System.out.println("----------------"); - System.out.println("Edge labels::"); - System.out.println("----------------"); - for (String s: edgeLabels) { - System.out.println(s); + + + // The main (it's currently outside spring as it doesn't need any configuration) + public static void main ( String... args ) + { + int exitCode = 0; + + try { + var cli = new JanusgraphSchemaGatherer() ; + var cmd = new CommandLine ( cli); + exitCode = cmd.execute ( args ); } - System.out.println("----------------"); - System.out.println("Edge Properties::"); - System.out.println("----------------"); - for (String s: edgeProperties) { - System.out.println(s); + catch ( Throwable ex ) + { + ex.printStackTrace ( System.err ); + exitCode = 1; } + finally + { + System.exit ( exitCode ); + } } - + } From 9bb715b3c5c8df601f721b61f729fc21a0731aa7 Mon Sep 17 00:00:00 2001 From: cbobed Date: Fri, 9 Apr 2021 10:21:33 +0200 Subject: [PATCH 3/4] Minor functional changes to janusgraphSchemaGatherer --- .../graphml/cli/JanusgraphSchemaGatherer.java | 122 +++++++----------- 1 file changed, 45 insertions(+), 77 deletions(-) diff --git a/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java b/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java index 067d79f..680ec9a 100644 --- a/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java +++ b/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java @@ -50,6 +50,13 @@ public class JanusgraphSchemaGatherer extends CliCommand { ) protected String groovyFilename = ""; + @Option ( + names = { "-d", "--disable-check"}, + description = "Disable collision checks", + required = false + ) + protected boolean disableCheck= false; + @Option ( names = { "-k", "--key"}, description = "Name of the key property serving as ID to create an index on it if required", @@ -70,16 +77,20 @@ public final Integer call () throws Exception boolean sanityCheck = true; gatherAllInformation(graphMLFilename, vertexLabels, vertexProperties, edgeLabels, edgeProperties); - sanityCheck = checkSanity(vertexLabels, vertexProperties, edgeLabels, edgeProperties); - if (sanityCheck) { - writeGroovyScript(groovyFilename, - vertexLabels, - vertexProperties, - edgeLabels, - edgeProperties, - !"".equalsIgnoreCase(key), - key); + if (!disableCheck) { + checkSanity(vertexLabels, vertexProperties, edgeLabels, edgeProperties); } + else + { + sanityCheck = true; + } + writeGroovyScript(groovyFilename, + vertexLabels, + vertexProperties, + edgeLabels, + edgeProperties, + !"".equalsIgnoreCase(key), + key); // printSchemaInformation(vertexLabels, vertexProperties, edgeLabels, edgeProperties); long end = System.currentTimeMillis(); System.out.println("took aprox: "+((end-start)/1000)+" s. "); @@ -200,88 +211,33 @@ else if (xmlr.getLocalName().equalsIgnoreCase("edge")) { } } - public final boolean checkSanity (HashSet vertexLabels, + public final void checkSanity (HashSet vertexLabels, HashSet vertexProperties, HashSet edgeLabels, HashSet edgeProperties) { - String VL_LABEL = "vertexLabels"; - String VP_LABEL = "vertexProperties"; - String EL_LABEL = "edgeLabels"; - // String EP_LABEL = "edgeProperties"; - - boolean everythingOK = true; + HashSet auxSet = null; - HashMap> collisions = new HashMap>(); /* reserved keywords: vertex, element, edge, property, label, key */ - everythingOK = containsNoReservedWord(vertexLabels); - if (!everythingOK) log.error("Vertex labels containing reserved words"); - - everythingOK &= containsNoReservedWord (vertexProperties); - if (!everythingOK) log.error("Vertex properties containing reserved words"); - - everythingOK &= containsNoReservedWord (edgeLabels); - if (!everythingOK) log.error("Edge labels containing reserved words"); - - everythingOK &= containsNoReservedWord(edgeProperties); - if (!everythingOK) log.error("Edge properties containing reserved words"); + + if (!containsNoReservedWord(vertexLabels)) log.warn("Vertex labels containing reserved words"); + if (!containsNoReservedWord (vertexProperties)) log.warn("Vertex properties containing reserved words"); + if (!containsNoReservedWord (edgeLabels)) log.warn("Edge labels containing reserved words"); + if (!containsNoReservedWord(edgeProperties)) log.error("Edge properties containing reserved words"); // we check the intersection of the different label names // to avoid collisions - auxSet = new HashSet(vertexLabels); - auxSet.retainAll(vertexProperties); - collisions.put(VL_LABEL, auxSet); - everythingOK = auxSet.isEmpty(); - - auxSet = new HashSet(vertexLabels); - auxSet.retainAll(edgeLabels); - // we keep track of the potential collisions is everything is ok, this should be empty - collisions.get(VL_LABEL).addAll(auxSet); - everythingOK = auxSet.isEmpty(); - auxSet = new HashSet(vertexLabels); - auxSet.retainAll(edgeLabels); - collisions.get(VL_LABEL).addAll(auxSet); - everythingOK = auxSet.isEmpty(); + checkCollisions(vertexLabels, vertexProperties, "vertex labels vs vertexProperties"); + checkCollisions(vertexLabels, edgeLabels, "vertex labels vs edgeLabels"); + checkCollisions(vertexLabels, edgeProperties, "vertex labels vs edgeProperties"); - auxSet = new HashSet(vertexProperties); - auxSet.retainAll(edgeLabels); - collisions.put(VP_LABEL, auxSet); - everythingOK = auxSet.isEmpty(); - - auxSet = new HashSet(vertexProperties); - auxSet.retainAll(edgeProperties); - collisions.get(VP_LABEL).addAll(auxSet); - everythingOK = auxSet.isEmpty(); - - auxSet = new HashSet(edgeLabels); - auxSet.retainAll(edgeProperties); - collisions.put(EL_LABEL, auxSet); - everythingOK = auxSet.isEmpty(); + checkCollisions(vertexProperties, edgeLabels, "vertex properties vs edgeLabels"); + checkCollisions(vertexProperties, edgeProperties, "vertex properties vs edgeProperties"); - if (!everythingOK) { - if (!collisions.get(VL_LABEL).isEmpty()) { - log.error("collisions with the vertex labels: "); - for (String lab: collisions.get(VL_LABEL)) { - log.error(lab); - } - } - if (!collisions.get(VP_LABEL).isEmpty()) { - log.error("collisions with the vertex properties: "); - for (String lab: collisions.get(VP_LABEL)) { - log.error(lab); - } - } - if (!collisions.get(EL_LABEL).isEmpty()) { - log.error("collisions with the edge labels: "); - for (String lab: collisions.get(EL_LABEL)) { - log.error(lab); - } - } - } + checkCollisions(edgeLabels, edgeProperties, "edge labels vs edgeProperties"); - return everythingOK; } public final void writeGroovyScript (String filename, @@ -357,6 +313,18 @@ public static boolean containsNoReservedWord(HashSet set) { set.contains("key") ); } + public void checkCollisions (HashSet set1, HashSet set2, String message) { + HashSet auxSet = new HashSet(set1); + auxSet.retainAll(set2); + if (!auxSet.isEmpty()) { + log.warn("Collisions with the "+message+": "); + for (String lab: auxSet){ + log.warn("-> "+lab); + } + } + } + + public final static void printSchemaInformation(HashSet vertexLabels, HashSet vertexProperties, HashSet edgeLabels, From d21f8de0cc9456ead6b6904d174b2fb98d15295f Mon Sep 17 00:00:00 2001 From: cbobed Date: Fri, 30 Apr 2021 16:46:51 +0200 Subject: [PATCH 4/4] Some options added and an improvement in the filtering Added an option to include the connection information Added a check to avoid duplicated property keys --- .../graphml/cli/JanusgraphSchemaGatherer.java | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java b/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java index 680ec9a..39fad35 100644 --- a/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java +++ b/rdf2graphml-cli/src/main/java/uk/ac/rothamsted/kg/rdf2pg/graphml/cli/JanusgraphSchemaGatherer.java @@ -64,6 +64,13 @@ public class JanusgraphSchemaGatherer extends CliCommand { ) protected String key = ""; + @Option ( + names = {"-c", "--include-connection"}, + description = "Include generic connection commands to the script", + required = false + ) + protected boolean includeConnection = false; + @Override public final Integer call () throws Exception { @@ -90,6 +97,7 @@ public final Integer call () throws Exception edgeLabels, edgeProperties, !"".equalsIgnoreCase(key), + includeConnection, key); // printSchemaInformation(vertexLabels, vertexProperties, edgeLabels, edgeProperties); long end = System.currentTimeMillis(); @@ -246,6 +254,7 @@ public final void writeGroovyScript (String filename, HashSet edgeLabels, HashSet edgeProperties, boolean createKeyIndex, + boolean includeConnection, String key) throws FileNotFoundException { try ( PrintStream out = new PrintStream ( @@ -253,6 +262,10 @@ public final void writeGroovyScript (String filename, new FileOutputStream (filename ) ) ) ) { + if (includeConnection) { + out.println(":remote connect tinkerpop.serverl conf/remote.yaml session"); + out.println(":remote console"); + } // we build the schema with the default settings // some tunning might be required depending on the particular scenario out.println("graph.tx().commit()"); @@ -267,7 +280,12 @@ public final void writeGroovyScript (String filename, out.println("m.makeEdgeLabel('"+el+"').multiplicity(MULTI).make()"); } for (String ep: edgeProperties) { - out.println("m.makePropertyKey('"+ep+"').dataType(String.class).make()"); + // avoiding the possible collisions in the propertyKeys not + // to break the uniqueness constraint + // TO BE ANALYZED: might be neeeded to be applied as well to Vertex and Edge labels + if (!vertexProperties.contains(ep)) { + out.println("m.makePropertyKey('"+ep+"').dataType(String.class).make()"); + } } out.println("m.commit()"); out.println(""); @@ -297,6 +315,9 @@ public final void writeGroovyScript (String filename, out.println("m.commit()"); out.println("graph.tx().commit()"); } + if (includeConnection) { + out.println(":exit"); + } out.flush(); }