Skip to content

Commit

Permalink
Added kaltura_id to columnFormats, removed kaltura types from attachm…
Browse files Browse the repository at this point in the history
…ent_names, added a template for attachment_names, and fixed a performance issue with searching the xml.
  • Loading branch information
cbeach47 committed Aug 6, 2018
1 parent f82c362 commit 4c88752
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 21 deletions.
3 changes: 2 additions & 1 deletion openequella-toolbox/blank.properties
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export.items.output=/my/export/file.csv
export.items.filter.dateCreated=2013-01-01

# CSV of metadata paths
# Keywords: UUID, Version, attachment_names, name, description
# Keywords: UUID, Version, attachment_names, name, description, kaltura_id
# The rest of the columns are assumed to be xpaths. The script will automatically add prefix 'xml/' and add a suffix of '/text()'
export.items.columnFormat=my/metadata/title,UUID,Version,attachment_names,my/metadata/data1,my/metadata/data2,my/metadata/data3
export.items.attachment.path.template=/Attachments/@HASH/@UUID/@VERSION/@FILENAME
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public static enum ToolboxFunction {
public static final String EXPORT_ITEMS_OUTPUT_FILE = "export.items.output";
public static final String EXPORT_ITEMS_COLUMN_FORMAT = "export.items.columnFormat";
public static final String EXPORT_ITEMS_FILTER_DATE_CREATED = "export.items.filter.dateCreated";
public static final String EXPORT_ITEMS_ATTACHMENT_PATH_TEMPLATE = "export.items.attachment.path.template";


// general
public static final String OEQ_URL = "oeq.url";
Expand All @@ -72,13 +74,18 @@ public static enum ToolboxFunction {
private boolean validConfig = true;
private String filepath;

// Meant for testing purposes
public Config(Properties props) {
store = props;
}

public Config(String path) {
filepath = path;
try (InputStream input = new FileInputStream(path)) {
LOGGER.info("Using [{}] as the configuration file.", path);
store = new Properties();
store.load(input);

checkConfig(TOOLBOX_FUNCTION, true, true);
if(validConfig) {
switch (ToolboxFunction.valueOf(getConfig(TOOLBOX_FUNCTION))) {
Expand Down Expand Up @@ -165,6 +172,7 @@ private void checkConfigsExportItems() {
}
}
checkConfig(EXPORT_ITEMS_COLUMN_FORMAT, true, true);
checkConfig(EXPORT_ITEMS_ATTACHMENT_PATH_TEMPLATE, true, true);
}

private void checkConfig(String key, boolean displayValue, boolean required) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
Expand All @@ -28,6 +29,13 @@
import java.util.List;
import java.util.Map;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.logging.log4j.LogManager;
Expand All @@ -38,19 +46,24 @@
import org.apereo.openequella.tools.toolbox.utils.sorts.SortOpenEquellaItemByName;
import org.json.JSONArray;
import org.json.JSONObject;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;

public class ExportItemsDriver {
private static Logger LOGGER = LogManager.getLogger(ExportItemsDriver.class);

private Map<String, EquellaItem> cache = new HashMap<>();
public void execute(Config config) {
Long start = System.currentTimeMillis();
// Ensure openEQUELLA is accessible
OpenEquellaRestUtils oeru = new OpenEquellaRestUtils(config);
if(!oeru.gatherAccessToken()) {
LOGGER.error("Ending openEQUELLA run - unable to access {}", config.getConfig(Config.OEQ_URL));
return;
}

LOGGER.info("Duration to obtain access token: {}ms", (System.currentTimeMillis()-start));

// Create output file
File output = new File(config.getConfig(Config.EXPORT_ITEMS_OUTPUT_FILE));
if(output.exists()) {
Expand All @@ -75,7 +88,12 @@ public void execute(Config config) {
// Loop through search results and save to the output file
while(oeru.hasMoreResourcesToCache()) {
try {
start = System.currentTimeMillis();

List<EquellaItem> nextBatch = oeru.gatherItemsGeneral();
LOGGER.info("Duration to obtain batch of items: {}ms", (System.currentTimeMillis()-start));
start = System.currentTimeMillis();

for(EquellaItem ei : nextBatch) {
// filter by date created if the filter was specified
if(filterByDateCreated == null || !ei.getCreatedDate().before(filterByDateCreated)) {
Expand All @@ -92,25 +110,43 @@ public void execute(Config config) {
LOGGER.debug("{} - Filtering out since it's dateCreated ({}) is before the specified date {}", ei.getSignature(), Config.DATE_FORMAT_OEQ_API.format(ei.getCreatedDate()), config.getConfig(Config.EXPORT_ITEMS_FILTER_DATE_CREATED));
}
}
LOGGER.info("Duration to cache batch of items: {}ms", (System.currentTimeMillis()-start));
} catch (Exception e) {
LOGGER.error("Ending openEQUELLA run - error caching output file was not able to be created: {} - {}", output.getAbsolutePath(), e.getMessage());
return;
}
}
LOGGER.info("All items gathered. Printing out to CSV file [{}].", config.getConfig(Config.EXPORT_ITEMS_OUTPUT_FILE));

start = System.currentTimeMillis();

List<EquellaItem> records = new ArrayList<>(cache.values());
Collections.sort(records, new SortOpenEquellaItemByName());
int counter = 0;
LOGGER.info("Duration to prep cached items for printing: {}ms", (System.currentTimeMillis()-start));
start = System.currentTimeMillis();

int counter = 1;
for(EquellaItem ei : records) {
csvPrinter.printRecord(buildRecord(headers, ei));
if(counter++ % 10 == 0) {
LOGGER.info("Printing status: {} items complete.", counter);
List<String> record = buildRecord(headers, ei, config);
LOGGER.debug("Duration to build csv record: {}ms - {}", (System.currentTimeMillis()-start), ei.getSignature());
start = System.currentTimeMillis();

csvPrinter.printRecord(record);

LOGGER.debug("Duration to print record: {}ms", (System.currentTimeMillis()-start));
start = System.currentTimeMillis();

if(counter % 10 == 0) {
LOGGER.debug("Printing status: {} items complete.", counter);
}
counter++;
}
csvPrinter.flush();

csvPrinter.flush();
csvPrinter.close();
LOGGER.info("Duration to flush printer: {}ms", (System.currentTimeMillis()-start));

if(counter % 10 != 0) {
LOGGER.info("Printing status: {} items complete.", counter);
}
Expand All @@ -130,23 +166,34 @@ public void execute(Config config) {
}
}

private List<String> buildRecord(List<String> headers, EquellaItem ei) throws Exception {
// While MigrationUtils.findFirstOccurrenceInXml() is similar, this combines the 'reserved
// keywords' with a single invocation of parsing the XML.
private List<String> buildRecord(List<String> headers, EquellaItem ei, Config config) throws Exception {
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(new InputSource(new StringReader(ei.getMetadata())));
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();

List<String> record = new ArrayList<>();
for(String header : headers) {
if(header.equalsIgnoreCase("uuid")) {
record.add(ei.getUuid());
} else if(header.equalsIgnoreCase("version")) {
record.add(ei.getVersion()+"");
} else if(header.equalsIgnoreCase("attachment_names")) {
record.add(parseAttachmentFilenames(ei));
record.add(parseAttachmentFilenames(ei, config));
} else if(header.equalsIgnoreCase("name")) {
record.add(ei.getName());
} else if(header.equalsIgnoreCase("description")) {
record.add(ei.getDescription());
} else if(header.equalsIgnoreCase("kaltura_id")) {
record.add(findFirstKalturaIdInAttachments(ei));
} else {
//Assume it's a metadata path
try {
record.add(MigrationUtils.findFirstOccurrenceInXml(ei.getMetadata(), "/xml/" + header + "/text()"));
XPathExpression expr = xpath.compile("/xml/" + header + "/text()");
record.add((String) expr.evaluate(doc, XPathConstants.STRING));
} catch (Exception e) {
e.printStackTrace();
throw new Exception("Unable to parse column format xpath "+header);
Expand All @@ -156,7 +203,7 @@ private List<String> buildRecord(List<String> headers, EquellaItem ei) throws Ex
return record;
}

public String parseAttachmentFilenames(EquellaItem ei) {
public String parseAttachmentFilenames(EquellaItem ei, Config config) {
JSONObject json = ei.getJson();
if(json.has(OpenEquellaRestUtils.KEY_ATTS)) {
StringBuilder sb = new StringBuilder();
Expand All @@ -169,14 +216,7 @@ public String parseAttachmentFilenames(EquellaItem ei) {
if(sb.length() != 0) {
sb.append(",");
}
sb.append(att.getString(OpenEquellaRestUtils.KEY_ATT_FILENAME));
break;
}
case OpenEquellaRestUtils.VAL_ATT_TYPE_KALTURA: {
if(sb.length() != 0) {
sb.append(",");
}
sb.append(att.getString(OpenEquellaRestUtils.KEY_ATT_TITLE));
sb.append(constructAttachmentPath(ei, att.getString(OpenEquellaRestUtils.KEY_ATT_FILENAME), config));
break;
}
default: {
Expand All @@ -190,6 +230,22 @@ public String parseAttachmentFilenames(EquellaItem ei) {
return "";
}
}

public String findFirstKalturaIdInAttachments(EquellaItem ei) {
JSONObject json = ei.getJson();
if(json.has(OpenEquellaRestUtils.KEY_ATTS)) {
JSONArray atts = json.getJSONArray(OpenEquellaRestUtils.KEY_ATTS);
for(int i = 0; i < atts.length(); i++) {
JSONObject att = atts.getJSONObject(i);
if(att.getString(OpenEquellaRestUtils.KEY_ATT_TYPE).equals(OpenEquellaRestUtils.VAL_ATT_TYPE_KALTURA)) {
return att.getString(OpenEquellaRestUtils.KEY_ATT_MEDIA_ID);
}
}
} else {
return "";
}
return "";
}

private List<String> parseCSV(String csv) {
List<String> tokens = new ArrayList<>();
Expand All @@ -200,5 +256,25 @@ private List<String> parseCSV(String csv) {
return tokens;
}

public String constructAttachmentPath(EquellaItem ei, String attName, Config config) {
String template = config.getConfig(Config.EXPORT_ITEMS_ATTACHMENT_PATH_TEMPLATE);
if(template.contains("@HASH")) {
template = template.replaceFirst("@HASH", (ei.getUuid().hashCode() & 127)+"");
}

if(template.contains("@UUID")) {
template = template.replaceFirst("@UUID", ei.getUuid());
}

if(template.contains("@VERSION")) {
template = template.replaceFirst("@VERSION", (ei.getVersion())+"");
}

if(template.contains("@FILENAME")) {
template = template.replaceFirst("@FILENAME", attName);
}

return template;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public class OpenEquellaRestUtils {
public static final String KEY_ATT_SIZE = "size";
public static final String KEY_ATT_LINKS = "links";
public static final String KEY_ATT_LINKS_VIEW = "view";
public static final String KEY_ATT_MEDIA_ID = "mediaId";

public static final String VAL_ATT_TYPE_KALTURA = "kaltura";
public static final String VAL_ATT_TYPE_FILE = "file";
Expand Down Expand Up @@ -263,8 +264,7 @@ public List<EquellaItem> gatherItemsGeneral() throws Exception {
ei.setCreatedDate(Config.DATE_FORMAT_OEQ_API.parse(confirmAndGatherString(resourceObj, "createdDate")));
ei.setJson(resourceObj);

LOGGER.info(
"CACHED {}/{}: [{}] - {}", ei.getUuid(), ei.getVersion(), ei.getName(), resourceObj);
LOGGER.info("CACHED {}", ei.getSignature());
cachedItems.add(ei);
}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@

import static org.junit.Assert.assertEquals;

import java.util.Properties;
import java.util.UUID;

import org.apereo.openequella.tools.toolbox.utils.EquellaItem;
import org.json.JSONArray;
import org.json.JSONObject;
Expand Down Expand Up @@ -47,8 +50,39 @@ public void testParseAttachmentFilenames() {
json.put("attachments", atts);
EquellaItem ei = new EquellaItem();
ei.setJson(json);
ei.setUuid("ec48a0e1-9643-4d50-840a-db26fd9fa15a");
ei.setVersion(1);
ExportItemsDriver eid = new ExportItemsDriver();
Properties props = new Properties();
props.put(Config.EXPORT_ITEMS_ATTACHMENT_PATH_TEMPLATE, "/Attachments/@HASH/@UUID/@VERSION/@FILENAME");
assertEquals("/Attachments/40/ec48a0e1-9643-4d50-840a-db26fd9fa15a/1/myfile.pdf", eid.parseAttachmentFilenames(ei, new Config(props)));

}

@Test
public void testFindFirstKalturaIdInAttachments() {
JSONObject json = new JSONObject();
JSONObject att1 = new JSONObject();
att1.put("type", "kaltura");
att1.put("description", "This is an interesting kaltura link description");
att1.put("mediaId", "0_12345");
JSONObject att2 = new JSONObject();
att2.put("type", "file");
att2.put("description", "This is an interesting file description");
att2.put("filename", "myfile.pdf");
JSONObject att3 = new JSONObject();
att3.put("type", "kaltura");
att3.put("description", "This is an interesting kaltura link description");
att3.put("mediaId", "0_98765");
JSONArray atts = new JSONArray();
atts.put(att1);
atts.put(att2);
atts.put(att3);
json.put("attachments", atts);
EquellaItem ei = new EquellaItem();
ei.setJson(json);
ExportItemsDriver eid = new ExportItemsDriver();
assertEquals("A title of a kaltura link,myfile.pdf", eid.parseAttachmentFilenames(ei));
assertEquals("0_12345", eid.findFirstKalturaIdInAttachments(ei));

}
}

0 comments on commit 4c88752

Please sign in to comment.