From 34db9f0a9af66d698583decbd9b3e48e5509b53e Mon Sep 17 00:00:00 2001 From: Fabien Carrion Date: Mon, 8 Dec 2014 15:04:27 -0600 Subject: [PATCH] Add a selector on text / inner html / outer html / attribute --- build.xml | 3 - src/be/ibridge/kettle/jsoup/JsoupInput.java | 37 +++- .../kettle/jsoup/JsoupInputDialog.java | 61 +++++-- .../ibridge/kettle/jsoup/JsoupInputField.java | 163 +++++++++++++++++- .../jsoup/messages/messages_en_US.properties | 8 + 5 files changed, 242 insertions(+), 30 deletions(-) diff --git a/build.xml b/build.xml index 7a17db5..72d87bc 100644 --- a/build.xml +++ b/build.xml @@ -6,7 +6,6 @@ - @@ -16,8 +15,6 @@ - - diff --git a/src/be/ibridge/kettle/jsoup/JsoupInput.java b/src/be/ibridge/kettle/jsoup/JsoupInput.java index bcf4eea..d1f4e4a 100644 --- a/src/be/ibridge/kettle/jsoup/JsoupInput.java +++ b/src/be/ibridge/kettle/jsoup/JsoupInput.java @@ -418,11 +418,38 @@ private Object[] buildRow() throws KettleException { Elements jsoupa=data.resultList.get(i); String nodevalue=null; if(jsoupa!=null) { - Object jo= (Object) jsoupa.get(data.recordnr); + Element jo= jsoupa.get(data.recordnr); if(jo!=null){ - nodevalue = jo.toString(); - } - } + + // Do Element Type + switch (field.getElementType()) { + case JsoupInputField.ELEMENT_TYPE_NODE: + // Do Result Type + switch (field.getResultType()) { + case JsoupInputField.RESULT_TYPE_TEXT: + nodevalue = jo.text(); + break; + case JsoupInputField.RESULT_TYPE_TYPE_OUTER_HTML: + nodevalue = jo.outerHtml(); + break; + case JsoupInputField.RESULT_TYPE_TYPE_INNER_HTML: + nodevalue = jo.html(); + break; + default: + nodevalue = jo.toString(); + break; + } + break; + case JsoupInputField.ELEMENT_TYPE_ATTRIBUT: + nodevalue = jo.attr(field.getAttribute()); + break; + default: + nodevalue = jo.toString(); + break; + } + } + } + // Do trimming switch (field.getTrimType()) { @@ -541,4 +568,4 @@ public void dispose(StepMetaInterface smi, StepDataInterface sdi) { data.resultList=null; super.dispose(smi, sdi); } -} \ No newline at end of file +} diff --git a/src/be/ibridge/kettle/jsoup/JsoupInputDialog.java b/src/be/ibridge/kettle/jsoup/JsoupInputDialog.java index 8db34b5..84a42ac 100644 --- a/src/be/ibridge/kettle/jsoup/JsoupInputDialog.java +++ b/src/be/ibridge/kettle/jsoup/JsoupInputDialog.java @@ -833,6 +833,20 @@ public void focusGained(org.eclipse.swt.events.FocusEvent e) ColumnInfo.COLUMN_TYPE_TEXT, false), new ColumnInfo( + BaseMessages.getString(PKG, "JsoupInputDialog.FieldsTable.Element.Column"), + ColumnInfo.COLUMN_TYPE_CCOMBO, + JsoupInputField.ElementTypeDesc, + true ), + new ColumnInfo( + BaseMessages.getString(PKG, "JsoupInputDialog.FieldsTable.ResultType.Column"), + ColumnInfo.COLUMN_TYPE_CCOMBO, + JsoupInputField.ResultTypeDesc, + true ), + new ColumnInfo( + BaseMessages.getString(PKG, "JsoupInputDialog.FieldsTable.Attribute.Column"), + ColumnInfo.COLUMN_TYPE_TEXT, + false ), + new ColumnInfo( BaseMessages.getString(PKG, "JsoupInputDialog.FieldsTable.Type.Column"), ColumnInfo.COLUMN_TYPE_CCOMBO, ValueMeta.getTypes(), @@ -1237,6 +1251,9 @@ public void getData(JsoupInputMeta in) TableItem item = wFields.table.getItem(i); String name = field.getName(); String xpath = field.getPath(); + String element = field.getElementTypeDesc(); + String resulttype = field.getResultTypeDesc(); + String attribute = field.getAttribute(); String type = field.getTypeDesc(); String format = field.getFormat(); String length = ""+field.getLength(); @@ -1249,15 +1266,18 @@ public void getData(JsoupInputMeta in) if (name !=null) item.setText( 1, name); if (xpath !=null) item.setText( 2, xpath); - if (type !=null) item.setText( 3, type ); - if (format !=null) item.setText( 4, format ); - if (length !=null && !"-1".equals(length )) item.setText( 5, length ); - if (prec !=null && !"-1".equals(prec )) item.setText( 6, prec ); - if (curr !=null) item.setText( 7, curr ); - if (decim !=null) item.setText( 8, decim ); - if (group !=null) item.setText( 9, group ); - if (trim !=null) item.setText( 10, trim ); - if (rep !=null) item.setText(11, rep ); + if (element != null) item.setText( 3, element); + if (resulttype != null) item.setText( 4, resulttype); + if (attribute != null) item.setText( 5, attribute); + if (type !=null) item.setText( 6, type ); + if (format !=null) item.setText( 7, format ); + if (length !=null && !"-1".equals(length )) item.setText( 8, length ); + if (prec !=null && !"-1".equals(prec )) item.setText( 9, prec ); + if (curr !=null) item.setText( 10, curr ); + if (decim !=null) item.setText( 11, decim ); + if (group !=null) item.setText( 12, group ); + if (trim !=null) item.setText( 13, trim ); + if (rep !=null) item.setText(14, rep ); } } @@ -1334,15 +1354,18 @@ private void getInfo(JsoupInputMeta in) throws KettleException field.setName( item.getText(1) ); field.setPath( item.getText(2) ); - field.setType( ValueMeta.getType(item.getText(3)) ); - field.setFormat( item.getText(4) ); - field.setLength( Const.toInt(item.getText(5), -1) ); - field.setPrecision( Const.toInt(item.getText(6), -1) ); - field.setCurrencySymbol( item.getText(7) ); - field.setDecimalSymbol( item.getText(8) ); - field.setGroupSymbol( item.getText(9) ); - field.setTrimType( JsoupInputField.getTrimTypeByDesc(item.getText(10)) ); - field.setRepeated( BaseMessages.getString(PKG, "System.Combo.Yes").equalsIgnoreCase(item.getText(11)) ); + field.setElementType( JsoupInputField.getElementTypeByDesc(item.getText(3)) ); + field.setResultType( JsoupInputField.getResultTypeByDesc(item.getText(4)) ); + field.setAttribute( item.getText(5) ); + field.setType( ValueMeta.getType(item.getText(6)) ); + field.setFormat( item.getText(7) ); + field.setLength( Const.toInt(item.getText(8), -1) ); + field.setPrecision( Const.toInt(item.getText(9), -1) ); + field.setCurrencySymbol( item.getText(10) ); + field.setDecimalSymbol( item.getText(11) ); + field.setGroupSymbol( item.getText(12) ); + field.setTrimType( JsoupInputField.getTrimTypeByDesc(item.getText(13)) ); + field.setRepeated( BaseMessages.getString(PKG, "System.Combo.Yes").equalsIgnoreCase(item.getText(14)) ); in.getInputFields()[i] = field; } @@ -1589,4 +1612,4 @@ private void addAdditionalFieldsTab() } -} \ No newline at end of file +} diff --git a/src/be/ibridge/kettle/jsoup/JsoupInputField.java b/src/be/ibridge/kettle/jsoup/JsoupInputField.java index 4b2b921..750624f 100644 --- a/src/be/ibridge/kettle/jsoup/JsoupInputField.java +++ b/src/be/ibridge/kettle/jsoup/JsoupInputField.java @@ -29,6 +29,28 @@ public class JsoupInputField implements Cloneable { private static Class PKG = JsoupInputMeta.class; // for i18n purposes, needed by Translator2!! $NON-NLS-1$ + public final static int ELEMENT_TYPE_NODE = 0; + public final static int ELEMENT_TYPE_ATTRIBUT = 1; + + public final static String ElementTypeCode[] = { "node", "attribute" }; + + public final static String ElementTypeDesc[] = { + BaseMessages.getString(PKG, "JsoupInputField.ElementType.Node"), + BaseMessages.getString(PKG, "JsoupInputField.ElementType.Attribute") + }; + + public final static int RESULT_TYPE_TEXT = 0; + public final static int RESULT_TYPE_TYPE_OUTER_HTML = 1; + public final static int RESULT_TYPE_TYPE_INNER_HTML = 2; + + public final static String ResultTypeCode[] = { "valueof", "outerhtml", "innerhtml" }; + + public final static String ResultTypeDesc[] = { + BaseMessages.getString(PKG, "JsoupInputField.ResultType.Text"), + BaseMessages.getString(PKG, "JsoupInputField.ResultType.OuterHtml"), + BaseMessages.getString(PKG, "JsoupInputField.ResultType.InnerHtml") + }; + public final static int TYPE_TRIM_NONE = 0; public final static int TYPE_TRIM_LEFT = 1; public final static int TYPE_TRIM_RIGHT = 2; @@ -46,6 +68,9 @@ public class JsoupInputField implements Cloneable private String name; private String path; + private int elementtype; + private int resulttype; + private String attribute; private int type; private int length; @@ -60,7 +85,10 @@ public class JsoupInputField implements Cloneable public JsoupInputField(String fieldname) { this.name = fieldname; - this.path = ""; + this.path = ""; + this.elementtype = ELEMENT_TYPE_NODE; + this.resulttype = RESULT_TYPE_TEXT; + this.attribute = ""; this.length = -1; this.type = ValueMetaInterface.TYPE_STRING; this.format = ""; @@ -84,6 +112,9 @@ public String getXML() retval.append(" ").append(Const.CR); retval.append(" ").append(XMLHandler.addTagValue("name", getName())); retval.append(" ").append(XMLHandler.addTagValue("path", getPath())); + retval.append(" ").append(XMLHandler.addTagValue("element_type", getElementTypeCode())); + retval.append(" ").append(XMLHandler.addTagValue("result_type", getResultTypeCode())); + retval.append(" ").append(XMLHandler.addTagValue("attribute", getAttribute())); retval.append(" ").append(XMLHandler.addTagValue("type", getTypeDesc())); retval.append(" ").append(XMLHandler.addTagValue("format", getFormat())); retval.append(" ").append(XMLHandler.addTagValue("currency", getCurrencySymbol())); @@ -103,6 +134,9 @@ public JsoupInputField(Node fnode) throws KettleValueException { setName( XMLHandler.getTagValue(fnode, "name") ); setPath( XMLHandler.getTagValue(fnode, "path") ); + setElementType( getElementTypeByCode(XMLHandler.getTagValue(fnode, "element_type")) ); + setResultType( getResultTypeByCode(XMLHandler.getTagValue(fnode, "result_type")) ); + setAttribute( XMLHandler.getTagValue(fnode, "attribute") ); setType( ValueMeta.getType(XMLHandler.getTagValue(fnode, "type")) ); setFormat( XMLHandler.getTagValue(fnode, "format") ); setCurrencySymbol( XMLHandler.getTagValue(fnode, "currency") ); @@ -127,6 +161,28 @@ public final static int getTrimTypeByCode(String tt) + public final static int getElementTypeByCode(String tt) + { + if (tt==null) return 0; + + /// Code to be removed later on as explained in the top of + // this file. + //////////////////////////////////////////////////////////////// + for (int i=0;i=trimTypeCode.length) return trimTypeCode[0]; return trimTypeCode[i]; } + public final static String getElementTypeCode(int i) + { + // To be changed to the new code once all are converted + if (i<0 || i>=ElementTypeCode.length) return ElementTypeCode[0]; + return ElementTypeCode[i]; + } + + public final static String getTrimTypeDesc(int i) { if (i<0 || i>=trimTypeDesc.length) return trimTypeDesc[0]; return trimTypeDesc[i]; } + public final static String getElementTypeDesc(int i) + { + if (i<0 || i>=ElementTypeDesc.length) return ElementTypeDesc[0]; + return ElementTypeDesc[i]; + } + public Object clone() { try @@ -225,22 +306,40 @@ public int getTrimType() return trimtype; } + public int getElementType() + { + return elementtype; + } public String getTrimTypeCode() { return getTrimTypeCode(trimtype); } + public String getElementTypeCode() + { + return getElementTypeCode(elementtype); + } + public String getTrimTypeDesc() { return getTrimTypeDesc(trimtype); } + public String getElementTypeDesc() + { + return getElementTypeDesc(elementtype); + } public void setTrimType(int trimtype) { this.trimtype= trimtype; } + public void setElementType(int element_type) + { + this.elementtype= element_type; + } + public String getGroupSymbol() { return groupSymbol; @@ -290,9 +389,67 @@ public void setRepeated(boolean repeat) { this.repeat = repeat; } - + + public String getAttribute() + { + return attribute; + } + + public void setAttribute(String attribute) + { + this.attribute = attribute; + } + public void flipRepeated() { repeat = !repeat; } - } \ No newline at end of file + + public final static int getResultTypeByDesc(String tt) + { + if (tt==null) return 0; + + for (int i=0;i=ResultTypeDesc.length) return ResultTypeDesc[0]; + return ResultTypeDesc[i]; + } + public int getResultType() + { + return resulttype; + } + public void setResultType(int resulttype) + { + this.resulttype= resulttype; + } + public final static int getResultTypeByCode(String tt) + { + if (tt==null) return 0; + + for (int i=0;i=ResultTypeCode.length) return ResultTypeCode[0]; + return ResultTypeCode[i]; + } + public String getResultTypeCode() + { + return getResultTypeCode(resulttype); + } +} diff --git a/src/be/ibridge/kettle/jsoup/messages/messages_en_US.properties b/src/be/ibridge/kettle/jsoup/messages/messages_en_US.properties index 19a3705..120a049 100644 --- a/src/be/ibridge/kettle/jsoup/messages/messages_en_US.properties +++ b/src/be/ibridge/kettle/jsoup/messages/messages_en_US.properties @@ -91,6 +91,9 @@ JsoupInputDialog.wSourceStreamField.Tooltip=Get source from previously defined f JsoupInputDialog.wlXMLField.Label=get source from a field JsoupInputDialog.FieldsTable.Length.Column=Length JsoupInputDialog.NumberRows.DialogTitle=Enter preview size +JsoupInputDialog.FieldsTable.Element.Column=Element +JsoupInputDialog.FieldsTable.ResultType.Column=Result type +JsoupInputDialog.FieldsTable.Attribute.Column=Attribute JsoupInputDialog.FieldsTable.Type.Column=Type JsoupInput.ErrorInStepRunning=Error running step\! {0} JsoupInput.Log.NrRecords=We found [{0}] records @@ -137,3 +140,8 @@ JsoupInput.Log.WeCanFindFile=We can find file [{0}] JsoupInputDialog.wOutputField.Label=Source from field JsoupInputDialog.FilesMissing.DialogMessage=No file was specified in list\! JsoupInputField.TrimType.Right=right +JsoupInputField.ElementType.Node=Node +JsoupInputField.ElementType.Attribute=Attribute +JsoupInputField.ResultType.Text=Text +JsoupInputField.ResultType.OuterHtml=Outer Html +JsoupInputField.ResultType.InnerHtml=Inner Html