diff --git a/karma-cleaning/pom.xml b/karma-cleaning/pom.xml deleted file mode 100644 index 8ef7ad11c..000000000 --- a/karma-cleaning/pom.xml +++ /dev/null @@ -1,140 +0,0 @@ - - 4.0.0 - - - edu.isi - webkarma - 0.0.1-SNAPSHOT - - - karma-cleaning - - - 0.9 - - - - - edu.isi - karma-util - ${project.version} - - - junit - junit - 4.11 - - - javax.mail - mail - 1.4 - - - org.apache.commons - commons-math3 - 3.0 - - - org.python - jython-standalone - - - log4j - log4j - 1.2.16 - - - org.slf4j - slf4j-api - 1.6.4 - - - org.slf4j - slf4j-log4j12 - 1.6.4 - - - xml-apis - xml-apis - 1.0.b2 - - - - de.micromata.jak - JavaAPIforKml - 2.2.0 - - - com.hp.hpl.jena - arq - 2.8.8 - - - com.hp.hpl.jena - jena - 2.6.4 - - - com.hp.hpl.jena - iri - 0.8 - - - com.hp.hpl.jena - tdb - 0.8.10 - - - net.sf.opencsv - opencsv - 2.3 - - - org.antlr - antlr - 3.2 - - - org.jdom - jdom - 1.1.2 - - - org.apache.poi - poi - 3.8-beta5 - - - org.apache.poi - poi-ooxml - 3.8-beta5 - - - org.apache.commons - commons-lang3 - 3.1 - - - tw.edu.ntu.csie - libsvm - 3.17 - - - org.perf4j - perf4j - 0.9.16 - - - commons-lang - commons-lang - 2.3 - - - org.apache.commons - commons-math - 2.2 - - - - diff --git a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/Checker.java b/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/Checker.java deleted file mode 100644 index d65583c81..000000000 --- a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/Checker.java +++ /dev/null @@ -1,54 +0,0 @@ -package edu.isi.karma.cleaning.Correctness; - -import java.util.ArrayList; - -import edu.isi.karma.cleaning.features.RecordClassifier; -import edu.isi.karma.cleaning.features.RecordFeatureSet; - -/* - * check whether the transformed results are correct - */ -public class Checker { - RecordClassifier clf; - public Checker() - { - RecordFeatureSet rfs1 = new RecordFeatureSet(); - clf = new RecordClassifier(rfs1); - - } - public String binds(String[] exp) - { - String res = ""; - if(exp.length == 2) - { - res = String.format("bef:%s aft:%s", exp[0],exp[1]); - } - else - { - res = "NOEXP"; - } - return res; - } - public void train(ArrayList postive, ArrayList negative) - { - clf.init(); - for(String[] pos:postive) - { - String tmp = binds(pos); - clf.addTrainingData(tmp, "1"); - } - for(String[] neg:negative) - { - String tmp = binds(neg); - clf.addTrainingData(tmp, "-1"); - } - clf.learnClassifer(); - } - - public String test(String[] record) - { - String line = binds(record); - String label = clf.getLabel(line); - return label; - } -} diff --git a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/FormatFunc.java b/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/FormatFunc.java deleted file mode 100644 index ec620ff21..000000000 --- a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/FormatFunc.java +++ /dev/null @@ -1,95 +0,0 @@ -package edu.isi.karma.cleaning.Correctness; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - -import edu.isi.karma.cleaning.UtilTools; - -/* - * unseen formats detections: - * distance to the class center and find the largest distance as the threshold - * - * boundary formats: - * difference of distances to two classes are below 5% - * - * record = [id, org, tar, label] - * - */ - -public class FormatFunc implements VerificationFunc { - private int funid = 1; - private HashMap cmeans = new HashMap(); - private HashMap mean_var = new HashMap(); - private double[] dmetric= null; - public FormatFunc(ArrayList records, double[] dmetric) - { - dmetric = UtilTools.initArray(dmetric, 1.0); - this.dmetric = dmetric; - getMeanandDists(records, dmetric); - - } - //identify the mean vector of each cluster - private void getMeanandDists(ArrayList records, double[] dmetric) - { - HashMap> tmp = new HashMap>(); - for(TransRecord rec:records) - { - if(tmp.containsKey(rec.label)) - { - tmp.get(rec.label).add(rec); - } - else - { - ArrayList x = new ArrayList(); - x.add(rec); - tmp.put(rec.label, x); - } - } - // find the means - for(Map.Entry> stringArrayListEntry : tmp.entrySet()) - { - ArrayList tdata = stringArrayListEntry.getValue(); - if(!tdata.isEmpty() || tdata.get(0).features.length > 0) - { - ArrayList tcl = new ArrayList(); - for(int i =0; i< tdata.size(); i++) - { - tcl.add(tdata.get(i).features); - } - double[] tmean = UtilTools.sum(tcl); - tmean = UtilTools.produce(1.0/tdata.size(), tmean); - cmeans.put(stringArrayListEntry.getKey(), tmean); - // find the max distances - // strictly bigger or smaller than [mean-3*delta, mean+3*delta] - double d_mean = 0; - double d_mu = 0; - for(int i =0; i< tdata.size(); i++) - { - d_mean += UtilTools.distance(tdata.get(i).features, tmean, dmetric); - } - d_mean = d_mean*1.0/tdata.size(); - for(int i =0; i< tdata.size(); i++) - { - d_mu += Math.pow(UtilTools.distance(tdata.get(i).features, tmean, dmetric)-d_mean, 2); - } - d_mu = Math.sqrt(d_mu/tdata.size()); - double[] x = {d_mean,d_mu}; - mean_var.put(stringArrayListEntry.getKey(), x); - } - } - - //Prober.printFeatureandWeight(tmp, cmeans, dmetric); - } - - public String verify(TransRecord record) { - double dist = UtilTools.distance(record.features, cmeans.get(record.label), dmetric); - //difference STRICTLY bigger than 2 standard deviations [68, 95, 99.7] rule - if(Math.abs(dist - mean_var.get(record.label)[0]) > 2.0*mean_var.get(record.label)[1]) - { - return String.valueOf(this.funid); - } - return "0"; - } - -} diff --git a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/HypoTester.java b/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/HypoTester.java deleted file mode 100644 index 89597bcf9..000000000 --- a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/HypoTester.java +++ /dev/null @@ -1,7 +0,0 @@ -package edu.isi.karma.cleaning.Correctness; -/* - * Test whether different correctnesses can pass the hypotest - */ -public class HypoTester { - -} diff --git a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/OutlierDetector.java b/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/OutlierDetector.java deleted file mode 100644 index 43af4e9b9..000000000 --- a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/OutlierDetector.java +++ /dev/null @@ -1,45 +0,0 @@ -package edu.isi.karma.cleaning.Correctness; - -import java.util.ArrayList; -import java.util.Arrays; - -import libsvm.svm_parameter; -import edu.isi.karma.cleaning.features.RecordClassifier; -import edu.isi.karma.cleaning.features.RecordFeatureSet; - -public class OutlierDetector { - RecordClassifier clf; - public OutlierDetector() - { - RecordFeatureSet rfs1 = new RecordFeatureSet(); - clf = new RecordClassifier(rfs1, svm_parameter.ONE_CLASS); - } - - public void train(ArrayList tdata) - { - for(String line:tdata) - { - clf.addTrainingData(line, "1"); - } - clf.learnClassifer(); - } - public String getLabel(String input) - { - String label = clf.getLabel(input); - return label; - } - public static void main(String[] args) - { - OutlierDetector outDet = new OutlierDetector(); - String[] dat = {"A", "AA","B", "BB"}; - String[] tst = {"B", "b", "AAAAAAAAAAAA","."}; - ArrayList data = new ArrayList(Arrays.asList(dat)); - outDet.train(data); - for(String l:tst) - { - String out = outDet.getLabel(l); - System.out.println(l+": "+out); - } - } - -} diff --git a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/Recommander.java b/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/Recommander.java deleted file mode 100644 index 3c213597b..000000000 --- a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/Recommander.java +++ /dev/null @@ -1,26 +0,0 @@ -package edu.isi.karma.cleaning.Correctness; - -import java.util.ArrayList; - -/* recommend - * 1 the outlier - 2 the points on the boundary - in the test dataset. -*/ -public class Recommander { - public Recommander() - { - - } - // - public ArrayList getOutliers() - { - ArrayList res = new ArrayList(); - return res; - } - public ArrayList getBoundaryPoints() - { - ArrayList res = new ArrayList(); - return res; - } -} diff --git a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/TransRecord.java b/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/TransRecord.java deleted file mode 100644 index 0ac2ebfaf..000000000 --- a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/TransRecord.java +++ /dev/null @@ -1,18 +0,0 @@ -package edu.isi.karma.cleaning.Correctness; - -public class TransRecord { - public String Id = ""; - public String org = ""; - public String tar = ""; - public String label = ""; - public String correct = "f"; - public double[] features = null; - public TransRecord(String Id, String org, String tar, String lab, double[] feats) - { - this.Id = Id; - this.org = org; - this.tar = tar; - this.label = lab; - this.features = feats; - } -} diff --git a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/VerificationFunc.java b/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/VerificationFunc.java deleted file mode 100644 index f4857c94b..000000000 --- a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/VerificationFunc.java +++ /dev/null @@ -1,9 +0,0 @@ -package edu.isi.karma.cleaning.Correctness; - - -public interface VerificationFunc { - //label a record - //label 0 correct, >=1 doubious - public String verify(TransRecord record); - -} diff --git a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/ViewFunc.java b/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/ViewFunc.java deleted file mode 100644 index 45d058d3c..000000000 --- a/karma-cleaning/src/main/java/edu/isi/karma/cleaning/Correctness/ViewFunc.java +++ /dev/null @@ -1,120 +0,0 @@ -package edu.isi.karma.cleaning.Correctness; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Vector; - -import edu.isi.karma.cleaning.GrammarTreeNode; -import edu.isi.karma.cleaning.Partition; -import edu.isi.karma.cleaning.ProgSynthesis; -import edu.isi.karma.cleaning.ProgramRule; -import edu.isi.karma.cleaning.Template; -import edu.isi.karma.cleaning.Traces; - -public class ViewFunc implements VerificationFunc { - private HashMap data = new HashMap(); - private String contextId; - public ViewFunc(ArrayList records, ProgSynthesis ps, ProgramRule pr, String contextId) - { - this.contextId = contextId; - Vector pars = ps.myprog.partitions; - HashMap> resHashMap = cluster(records); - for(Partition p: pars) - { - handlePartition(resHashMap.get(p.label), p.trace, pr.getStringRule(p.label)); - } - } - public HashMap> cluster(ArrayList reds) - { - HashMap> res = new HashMap>(); - for(TransRecord r: reds) - { - if(res.containsKey(r.label)) - { - res.get(r.label).add(r); - } - else { - ArrayList line = new ArrayList(); - line.add(r); - res.put(r.label, line); - } - } - return res; - } - public void handlePartition(ArrayList records, Traces trace, String prog) { - - if (identifyIncorrRecord(records, prog)) { - return; - } else { - identifyRecord(records, trace); - } - } - - // detect the records which prog failed on - public boolean identifyIncorrRecord(ArrayList records, - String prog) { - ProgramRule pr = new ProgramRule(prog, contextId); - boolean res = true; - for (TransRecord r : records) { - String orgString = r.org; - String tar = pr.transform(orgString); - if (tar.compareTo(r.tar) != 0) { - res = false; - data.put(r.org, "1"); - } - } - return res; - } - - public void identifyRecord(ArrayList records, Traces trace) { - ArrayList