The Tarantual fault localization implementation is based on the following reference implmentation.
WARNING: UNDER NO CIRCUMSTANCE IS FILE TO BE CHANGED!!
/*******************************************************************************
* Copyright (c) 2010 James A. Jones [email protected]
******************************************************************************/
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.Externalizable;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.StringTokenizer;
import java.util.concurrent.locks.ReentrantLock;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
/**
* Calculates suspiciousness and confidence values according to the Tarantula
* fault-localization technique.
*
* The usage mode is to create a coverage matrix that specifies which program
* elements are executed by which test cases. In terms of this implementation, I
* call each program element a statement ("stmt"), but they can refer to any
* program element or event that could happen or not within an execution. This
* coverage matrix is encoded as a two-dimensional array (or, equivalently in
* Java means an array of arrays) where the first dimension is indexed by the
* test case number and the second dimension is indexed by the statement number
* (or any program event identifier).
*
* The algorithm is also informed by a few other arrays:
*
* F[] is indexed by test case number. F specifies which test cases
* fail�true=fail, false=pass.
*
* L[] is indexed by test case number. L specifies which test cases should be
* used. This array was created to allow a large universe of test cases to be
* loaded and several subsets of test cases could be included or excluded from
* the total set. This allows easy and fast experimentation with various subset
* test suites.
*
* S[] is indexed by program element number. S specifies the bug identifier for
* that line. This array is optional. This is for experimentation purposes where
* multiple faults are placed in the subject program.
*
* C[] is indexed by program element number. C specifies which
* statements/program elements are coverable and which are not (for example, a
* blank line or a comment line).
*
* B[] is indexed by test case number. B specifies which test cases' coverage
* are problematic. This was created as a work-around to problems that I had
* with coverage reports that were incomplete due to segmentation faults in the
* subject program. Specifying a test case as a "bad coverage" test case means
* that it will not be used for calculation regardless of whether this test case
* is "live" (according to the L array). You can explicitly set this or you can
* leave this alone. The calculateBadTestCoverage() method sets this array for
* you based on whether a test case has absolutely no coverage � no program
* elements being executed.
*
* Once these arrays are set, the compute() method should be called, which then
* calls a series of other calculate... methods. Instead you can directly call
* the other calculate... methods. After compute() has been called, the results
* can be found in the suspiciousness[] and confidence[] arrays (which are each
* indexed by program element number). For both suspiciousness[] and
* confidence[] arrays, a -1 is used for program elements that can't be assigned
* (probably because of an uncoverable entity).
*
* @author James A. Jones [email protected]
*
*/
public class TarantulaSuspiciousnessCalculation implements Externalizable {
private boolean[][] M; // coverage matrix -- [test][stmt]
private boolean[] F; // failing test cases -- [test]
private boolean[] L; // live test cases -- [test]
private int[] S; // fault numbers -- [stmt]
private boolean[] C; // coverable statements -- [stmt]
private boolean[] B; /*
* bad coverage (no coverage information, usually due to
* a segmentation fault) -- [test]
*/
private int numOrigTests;
private int numStmts;
private int numFaults;
private int totalLiveFail;
private int totalLivePass;
private int totalOrigFail;
private int totalOrigPass;
private int[] passOnStmt; /*
* p(s), for every s and considering liveness of
* test cases -- [stmt]
*/
private int[] failOnStmt; /*
* f(s), for every s and considering liveness of
* test cases -- [stmt]
*/
private double[] passRatio; // p(s)/total_live_p, for every s -- [stmt]
private double[] failRatio; // f(s)/total_live_f, for every f -- [stmt]
private double[] suspiciousness; // -- [stmt]
private double[] confidence; // -- [stmt]
private String directory; /* the directory to store the coverage matrix file */
public TarantulaSuspiciousnessCalculation() {
}
public TarantulaSuspiciousnessCalculation(boolean[][] M) {
this.M = M;
numOrigTests = M.length;
numStmts = M[0].length;
// initialize so that all test cases are live
L = new boolean[numOrigTests];
for (int i = 0; i < L.length; i++) {
L[i] = true;
}
}
public void setDirectory(String dirStr) {
directory = dirStr;
}
public void setC(boolean[] C) {
this.C = C;
}
public void setF(boolean[] F) {
this.F = F;
calculateOrigFailAndPass();
}
public void setL(boolean[] L) {
this.L = L;
}
public void setS(int[] S) {
this.S = S;
numFaults = 0;
for (int i = 0; i < S.length; i++) {
if (S[i] > numFaults)
numFaults = S[i];
}
}
public boolean[][] getM() {
return M;
}
public boolean[] getL() {
return L;
}
public boolean[] getF() {
return F;
}
public int[] getPassOnStmt() {
return passOnStmt;
}
public int[] getFailOnStmt() {
return failOnStmt;
}
public void compute() {
calculateBadTestCoverage();
calculateTotalLiveFailAndPass();
calculatePassOnStmtAndFailOnStmt();
calculatePassRatioAndFailRatio();
calculateSuspiciousnessAndConfidence();
}
public double[] getSuspiciousness() {
return suspiciousness;
}
public double[] getConfidence() {
return confidence;
}
public boolean isTestPassed(int zeroBasedTestNumber) {
return !F[zeroBasedTestNumber];
}
public boolean isStmtCoverable(int zeroBasedLineNumber) {
return C[zeroBasedLineNumber];
}
public int getOrigNumTests() {
return numOrigTests;
}
public int getOrigTotalFail() {
return totalOrigFail;
}
public int getOrigTotalPass() {
return totalOrigPass;
}
public int getLiveNumTests() {
return totalLiveFail + totalLivePass;
}
public int getLiveTotalFail() {
return totalLiveFail;
}
public int getLiveTotalPass() {
return totalLivePass;
}
public int getNumStmts() {
return numStmts;
}
public int getNumFaults() {
return numFaults;
}
public int getFaultNum(int zeroBasedLineNum) {
return S[zeroBasedLineNum];
}
public int getNumUncoverableStmts() {
int c = 0;
for (int i = 0; i < C.length; i++) {
if (!C[i])
c++;
}
return c;
}
private void calculateTotalLiveFailAndPass() {
totalLiveFail = 0;
totalLivePass = 0;
for (int i = 0; i < numOrigTests; i++) {
if (L[i]) {
if (!B[i]) {
if (F[i])
totalLiveFail++;
else
totalLivePass++;
}
}
}
// System.out.println("livepass="+ totalLivePass + "\tlivefail=" +
// totalLiveFail + "\n");
}
private boolean isBCalculated = false;
private void calculateBadTestCoverage() {
if (isBCalculated)
return;
B = new boolean[numOrigTests];
for (int i = 0; i < numOrigTests; i++) {
B[i] = true;
for (int j = 0; j < numStmts; j++) {
if (M[i][j]) { /*
* if there is a statement covered for this test
* case
*/
B[i] = false; // this is not a bad test case
break; // no need to look further at this test case
}
}
}
isBCalculated = true;
}
private void calculateOrigFailAndPass() {
totalOrigFail = 0;
totalOrigPass = 0;
for (int i = 0; i < numOrigTests; i++) {
if (F[i])
totalOrigFail++;
else
totalOrigPass++;
}
// System.out.println("origpass="+ totalOrigPass + "\torigfail=" +
// totalOrigFail + "\n");
}
private void calculatePassOnStmtAndFailOnStmt() {
passOnStmt = new int[numStmts];
failOnStmt = new int[numStmts];
// first only consider live test cases
for (int i = 0; i < numOrigTests; i++) {
if (!B[i]) { // if this isn't a dead test case (seg fault)
if (L[i]) { // if this test case is live
for (int j = 0; j < numStmts; j++) {
if (C[j]) {
if (M[i][j]) {
if (F[i])
failOnStmt[j]++;
else
passOnStmt[j]++;
}
}
}
}
}
}
}
private void calculatePassRatioAndFailRatio() {
passRatio = new double[numStmts];
failRatio = new double[numStmts];
for (int i = 0; i < numStmts; i++) {
// System.out.print("line " + i + "\t");
if (totalLivePass == 0) {
passRatio[i] = 0d;
} else {
passRatio[i] = (double) passOnStmt[i] / (double) totalLivePass;
// System.out.print("numPass=" + passOnStmt[i] + "\t");
}
if (totalLiveFail == 0) {
failRatio[i] = 0d;
} else {
failRatio[i] = (double) failOnStmt[i] / (double) totalLiveFail;
// System.out.print("numFail=" + failOnStmt[i] + "\t");
}
// System.out.println();
}
}
private void calculateSuspiciousnessAndConfidence() {
suspiciousness = new double[numStmts];
confidence = new double[numStmts];
for (int i = 0; i < numStmts; i++) {
if ((totalLiveFail == 0) && (totalLivePass == 0)) {
suspiciousness[i] = -1d;
confidence[i] = -1d;
} else if ((failRatio[i] == 0d) && (passRatio[i] == 0d)) {
suspiciousness[i] = -1d;
confidence[i] = -1d;
} else {
suspiciousness[i] = failRatio[i]
/ (failRatio[i] + passRatio[i]);
confidence[i] = Math.max(failRatio[i], passRatio[i]);
}
}
}
/** ******** Externalizable required methods ******************* */
public void writeExternal(ObjectOutput obj_out) throws IOException {
String pwd = null;
if (directory == null)
pwd = System.getProperty("user.dir");
else
pwd = directory;
// FileWriter unzipped_out = new FileWriter(pwd + "/cov_matrix.txt");
// Reference to our zip file
FileWriter out = null;
String covMatrixFileName = pwd + "/cov_matrix";
try {
out = new FileWriter(covMatrixFileName);
} catch (IOException ioe) {
ioe.printStackTrace();
}
// write a comment that describes the file format
out.write("0 Coverage matrix file\n");
out.write("0 Record descriptions:\n");
out.write("0 0 comment\n");
out
.write("0 1 <number of test cases> <number of sourcefiles> <number of bugs>\n");
out.write("0 2 <source file name> <number of statements>\n");
out
.write("0 3 <test case num (0-based)> <coverage vector> (1=covered, 0=uncovered)\n");
out.write("0 4 <vector of pass/fail info (1=failed, 0=passed)\n");
out
.write("0 5 <vector of liveness of test cases (1=live, 0=dead)\n");
out
.write("0 6 <vector of fault numbers (0=nonfaulty, 1,2,3...F=fault numbers (16 faults max))\n");
out
.write("0 7 <vector of coverability of statements (1=coverable, 0=uncoverable)\n");
out.write("0\n");
// write the number test cases, number of source files, number of bugs
out.write(("1 " + numOrigTests + " 1 " + numFaults + "\n"));
// for each source file
{
// write source file name and number of statements
out.write(("2 space.c " + numStmts + "\n"));
// write the coverage matrix
for (int i = 0; i < M.length; i++) {
out.write(("3 " + i + " "));
boolean[] vector = M[i];
for (int j = 0; j < vector.length; j++) {
if (vector[j])
out.write("1");
else
out.write("0");
}
out.write("\n");
}
}
// write the pass/fail info
out.write("4 ");
for (int i = 0; i < F.length; i++) {
if (F[i])
out.write("1");
else
out.write("0");
}
out.write("\n");
// write the liveness of test cases info
out.write("5 ");
for (int i = 0; i < L.length; i++) {
if (L[i])
out.write("1");
else
out.write("0");
}
out.write("\n");
// write the bug number info
out.write("6 ");
for (int i = 0; i < S.length; i++) {
if ((S[i] >= 0) && (S[i] <= 9)) {
out.write(("" + S[i]));
} else if (S[i] == 10) {
out.write("a");
} else if (S[i] == 11) {
out.write("b");
} else if (S[i] == 12) {
out.write("c");
} else if (S[i] == 13) {
out.write("d");
} else if (S[i] == 14) {
out.write("e");
} else if (S[i] == 15) {
out.write("f");
} else {
System.err.println("Bug number out of range: " + S[i]);
System.exit(1);
}
}
out.write("\n");
// write the coverability of each statement
out.write("7 ");
for (int i = 0; i < C.length; i++) {
if (C[i])
out.write("1");
else
out.write("0");
}
out.write("\n");
try {
out.close();
// new CmdExec("/bin/rm -f " + covMatrixFileName + ".gz",
// new ReentrantLock());
// new CmdExec("/sw/bin/gzip " + covMatrixFileName,
// new ReentrantLock());
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
public void writeExternal1(ObjectOutput obj_out) throws IOException {
String pwd = null;
if (directory == null)
pwd = System.getProperty("user.dir");
else
pwd = directory;
// FileWriter unzipped_out = new FileWriter(pwd + "/cov_matrix.txt");
// Reference to our zip file
FileOutputStream dest = new FileOutputStream(pwd + "/cov_matrix.gz");
// Wrap our destination zipfile with a ZipOutputStream
GZIPOutputStream out = new GZIPOutputStream(new BufferedOutputStream(
dest));
// write a comment that describes the file format
out.write("0 Coverage matrix file\n".getBytes());
out.write("0 Record descriptions:\n".getBytes());
out.write("0 0 comment\n".getBytes());
out
.write("0 1 <number of test cases> <number of sourcefiles> <number of bugs>\n"
.getBytes());
out.write("0 2 <source file name> <number of statements>\n"
.getBytes());
out
.write("0 3 <test case num (0-based)> <coverage vector> (1=covered, 0=uncovered)\n"
.getBytes());
out.write("0 4 <vector of pass/fail info (1=failed, 0=passed)\n"
.getBytes());
out
.write("0 5 <vector of liveness of test cases (1=live, 0=dead)\n"
.getBytes());
out
.write("0 6 <vector of fault numbers (0=nonfaulty, 1,2,3...F=fault numbers (16 faults max))\n"
.getBytes());
out
.write("0 7 <vector of coverability of statements (1=coverable, 0=uncoverable)\n"
.getBytes());
out.write("0\n".getBytes());
// write the number test cases, number of source files, number of bugs
out.write(("1 " + numOrigTests + " 1 " + numFaults + "\n").getBytes());
// for each source file
{
// write source file name and number of statements
out.write(("2 space.c " + numStmts + "\n").getBytes());
// write the coverage matrix
for (int i = 0; i < M.length; i++) {
out.write(("3 " + i + " ").getBytes());
boolean[] vector = M[i];
for (int j = 0; j < vector.length; j++) {
if (vector[j])
out.write("1".getBytes());
else
out.write("0".getBytes());
}
out.write("\n".getBytes());
}
}
// write the pass/fail info
out.write("4 ".getBytes());
for (int i = 0; i < F.length; i++) {
if (F[i])
out.write("1".getBytes());
else
out.write("0".getBytes());
}
out.write("\n".getBytes());
// write the liveness of test cases info
out.write("5 ".getBytes());
for (int i = 0; i < L.length; i++) {
if (L[i])
out.write("1".getBytes());
else
out.write("0".getBytes());
}
out.write("\n".getBytes());
// write the bug number info
out.write("6 ".getBytes());
for (int i = 0; i < S.length; i++) {
if ((S[i] >= 0) && (S[i] <= 9)) {
out.write(("" + S[i]).getBytes());
} else if (S[i] == 10) {
out.write("a".getBytes());
} else if (S[i] == 11) {
out.write("b".getBytes());
} else if (S[i] == 12) {
out.write("c".getBytes());
} else if (S[i] == 13) {
out.write("d".getBytes());
} else if (S[i] == 14) {
out.write("e".getBytes());
} else if (S[i] == 15) {
out.write("f".getBytes());
} else {
System.err.println("Bug number out of range: " + S[i]);
System.exit(1);
}
}
out.write("\n".getBytes());
// write the coverability of each statement
out.write("7 ".getBytes());
for (int i = 0; i < C.length; i++) {
if (C[i])
out.write("1".getBytes());
else
out.write("0".getBytes());
}
out.write("\n".getBytes());
out.close();
}
public void readExternal(ObjectInput obj_in) throws IOException,
ClassNotFoundException {
final int BUFFER = 2048;
String pwd = null;
if (directory == null)
pwd = System.getProperty("user.dir");
else
pwd = directory;
// read in the coverage matrix from zip file to a string
FileInputStream dest = new FileInputStream(pwd + "/cov_matrix.gz");
GZIPInputStream in = new GZIPInputStream(new BufferedInputStream(dest));
StringBuffer strBuf = new StringBuffer();
while (true) {
byte[] b = new byte[BUFFER];
int returnVal = in.read(b, 0, BUFFER);
String newStr = new String(b);
int index = newStr.indexOf('\0');
String newerStr = null;
if (index == -1)
newerStr = newStr;
else
newerStr = newStr.substring(0, index);
strBuf.append(newerStr);
if (returnVal == -1)
break;
}
in.close();
// parse the coverage matrix string
String covMatrixStr = strBuf.toString();
StringTokenizer lineTokenizer = new StringTokenizer(covMatrixStr, "\n");
while (lineTokenizer.hasMoreTokens()) {
String line = lineTokenizer.nextToken();
// parse the record id
StringTokenizer t = new StringTokenizer(line, " ");
if (!t.hasMoreTokens())
continue;
String recordIDStr = t.nextToken();
int recordID = Integer.parseInt(recordIDStr);
switch (recordID) {
case 0:
break;
case 1:
String numTestsStr = t.nextToken();
numOrigTests = Integer.parseInt(numTestsStr);
String numSourceFilesStr = t.nextToken();
String numFaultsStr = t.nextToken();
numFaults = Integer.parseInt(numFaultsStr);
// initialize coverage matrix
M = new boolean[numOrigTests][];
break;
case 2:
String sourceFileName = t.nextToken();
String numStmtsStr = t.nextToken();
numStmts = Integer.parseInt(numStmtsStr);
break;
case 3:
String testCaseNumStr = t.nextToken();
int testCaseNum = Integer.parseInt(testCaseNumStr);
M[testCaseNum] = new boolean[numStmts];
String covVectorStr = t.nextToken();
char[] covVector = covVectorStr.toCharArray();
for (int i = 0; i < covVector.length; i++) {
M[testCaseNum][i] = (covVector[i] == '1') ? true : false;
}
break;
case 4:
F = new boolean[numOrigTests];
String failVecStr = t.nextToken();
char[] failVec = failVecStr.toCharArray();
for (int i = 0; i < failVec.length; i++) {
F[i] = (failVec[i] == '1') ? true : false;
}
calculateOrigFailAndPass();
break;
case 5:
L = new boolean[numOrigTests];
String liveVecStr = t.nextToken();
char[] liveVec = liveVecStr.toCharArray();
for (int i = 0; i < liveVec.length; i++) {
L[i] = (liveVec[i] == '1') ? true : false;
}
break;
case 6:
S = new int[numStmts];
String faultNumsStr = t.nextToken();
char[] faultNums = faultNumsStr.toCharArray();
for (int i = 0; i < faultNums.length; i++) {
S[i] = Integer.parseInt(String.valueOf(faultNums[i]));
}
break;
case 7:
C = new boolean[numStmts];
String coverVecStr = t.nextToken();
char[] coverVec = coverVecStr.toCharArray();
for (int i = 0; i < coverVec.length; i++) {
C[i] = (coverVec[i] == '1') ? true : false;
}
break;
default:
System.err
.println("Warning: Don't know record type, ignoring: "
+ line);
break;
}
}
compute();
}
}