-
Notifications
You must be signed in to change notification settings - Fork 1
/
generating_results.py
375 lines (283 loc) · 12.4 KB
/
generating_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
import polars as pl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import FuncFormatter
import numpy as np
pd.set_option('display.max_columns', 200)
import warnings
warnings.filterwarnings('ignore')
import re
import csv
import os
import re
import itertools
from collections import Counter
from fpdf import FPDF
# Name of the CSV file
file_name = 'revList.csv'
# List to store the DataFrames
found_files = []
# Open the CSV file for reading
with open(file_name, mode='r', newline='', encoding='utf-8') as file:
# Create a CSV reader
csv_reader = csv.reader(file, delimiter=';')
# Skip the first line (header)
next(csv_reader)
# Read the remaining lines and process each line
for row in csv_reader:
project = row[0]
merge_commit = row[1]
class_name = row[2]
method = row[3]
# left_modification = row[4]
# has_build = row[5]
# left_deletion = row[6]
# right_modification = row[7]
# right_deletion = row[8]
# realistic_case_path = row[9]
# Generate path
parts = class_name.split('.')
class_name = parts.pop()
class_path = '.'.join(parts).replace(".", "/")
path_file = f"joana/reports/{project}/{merge_commit}/{class_path}/executionSummary.csv"
if os.path.exists(path_file):
df = pd.read_csv(path_file, sep=';')
# Add context columns to the DataFrame
df['project'] = project
df['merge commit'] = merge_commit
df['class'] = class_name
df['original method'] = method
else:
data = {
'project': project,
'merge commit': merge_commit,
'class': class_name,
'original method': [method]
}
df = pd.DataFrame(data)
# Reorder columns to have new columns at the beginning
columns_order = ['project', 'merge commit', 'class', 'original method'] + [col for col in df.columns if col not in ['project', 'merge commit', 'class', 'original method']]
df = df[columns_order]
found_files.append(df)
# Check if there are DataFrames to concatenate
if len(found_files) > 0:
# Concatenate all DataFrames, keeping all rows and columns
merged_file = pd.concat(found_files, ignore_index=True, sort=False)
merged_file = merged_file.drop(columns=['Method'])
# Save the result to a new CSV file with ';' as separator
merged_file.to_csv('merged_file.csv', index=False, sep=';')
print("Files merged successfully! File saved as 'merged_file.csv'.")
else:
print("No file was found!")
class ReportAnalysis:
def __init__(self, path_result, path_ground_truth):
self.soot_results = pd.read_csv(path_result, sep=';', encoding='latin-1', on_bad_lines='skip', low_memory=False)
self.loi = pd.read_csv(path_ground_truth, sep=';', encoding='latin-1', on_bad_lines='skip', low_memory=False)
self.generate_results()
def get_method_name(self, method_declaration):
match = re.search(r'\.([a-zA-Z_][a-zA-Z0-9_]*)\(', method_declaration)
if match:
method_name = match.group(1)
print("Method name:", method_name)
else:
print("Method name not found.")
def get_loi(self, project, class_name, method, merge_commit):
df = pd.read_csv("LOI.csv", delimiter=';')
# Encontrar o índice do ponto final e do parêntese
dot_index = method.rfind('.')
paren_index = method.find('(')
method_name = ""
# Extrair o nome do método
if dot_index != -1 and paren_index != -1:
method_name = method[dot_index + 1:paren_index]
value_LOI = ("", "")
for project_l, class_name_l, method_l, merge_commit_l, LOI, original_sample in zip(
df['Project'],
df['Class Name'],
df['Method or field declaration changed by the two merged branches'],
df['Merge Commit'],
df['Locally Observable Interference'],
df['Original Sample'],
):
if (project_l == project and
class_name in class_name_l and
method_name in method_l and
merge_commit_l == merge_commit):
value_LOI = (LOI, original_sample)
break # Para parar no primeiro match encontrado
return value_LOI
def calculate_matrix_loi(self, columns):
results = []
loi_list = []
original_sample_list = []
info_LOI = ['project', 'class', 'original method', 'merge commit']
# Limpar espaços em branco nos nomes das colunas
self.soot_results.columns = self.soot_results.columns.str.strip()
for index, row in self.soot_results.iterrows():
value = row['HasSourcedAndSink']
values_LOI = [row[column] for column in info_LOI if column in row]
(loi_actual, original_sample) = self.get_loi(*values_LOI)
loi_list.append(loi_actual)
original_sample_list.append(original_sample)
result = "-"
# Determinar o resultado
if "Yes" in str(value) and "Yes" in loi_actual:
result = "TRUE POSITIVE"
elif "No" in str(value) and "No" in loi_actual:
result = "TRUE NEGATIVE"
elif "No" in str(value) and "Yes" in loi_actual:
result = "FALSE NEGATIVE"
elif "Yes" in str(value) and "No" in loi_actual:
result = "FALSE POSITIVE"
results.append(result)
df = pd.read_csv('merged_file.csv', sep=';')
df['LOI'] = loi_list
df['Original Sample'] = original_sample_list
df['result'] = results
# Salvar o novo DataFrame em um novo arquivo CSV
new_csv_path = 'results.csv'
df.to_csv(new_csv_path, sep=';', index=False)
return results
def generate_results(self):
print("Generating results...")
FP,TP, FN, TN = 0, 0, 0, 0
list_columns = self.soot_results.columns.tolist()
result_matrix = self.calculate_matrix_loi(list_columns)
for elem, count in Counter(result_matrix).items():
if (elem == 'FALSE POSITIVE'):
FP = count
if (elem == 'FALSE NEGATIVE'):
FN = count
if (elem == 'TRUE POSITIVE'):
TP = count
if (elem == 'TRUE NEGATIVE'):
TN = count
sensitivity = 0 if ((TP + FN) == 0) else (TP / (TP + FN))
precision = 0 if ((TP + FP) == 0) else (TP / (TP + FP))
f1_score = 0 if ((2*TP + FP + FN) == 0) else (2*TP / (2*TP + FP + FN))
accuracy = 0 if ((FP + TP + TN + FN) == 0) else ((TP + TN) / (FP + TP + TN + FN))
df = pd.read_csv("results.csv", sep=';', encoding='latin-1', on_bad_lines='skip', low_memory=False)
fail_results = df['result'].eq("-").sum()
total = len(df)
# variable pdf
pdf = FPDF()
# add a page
pdf.add_page()
# set style and size of font
# that you want in the pdf
pdf.set_font("Arial", size = 15)
# create a cell
pdf.cell(200, 10, txt = "Results for execution",
ln = 1, align = 'C')
pdf.cell(200, 10, txt = ("Precision: "+str(round(precision, 2))),
ln = 2, align = 'L')
pdf.cell(200, 10, txt = ("Recall: "+str(round(sensitivity, 2))),
ln = 2, align = 'L')
pdf.cell(200, 10, txt = ("F1 Score: "+str(round(f1_score, 2))),
ln = 2, align = 'L')
pdf.cell(200, 10, txt = ("Accuracy: "+str(round(accuracy, 2))),
ln = 2, align = 'L')
pdf.cell(200, 10, txt = ("False Positives: "+str(FP)),
ln = 2, align = 'L')
pdf.cell(200, 10, txt = ("False Negatives: "+str(FN)),
ln = 2, align = 'L')
pdf.cell(200, 10, txt = ("True Positives: "+str(TP)),
ln = 2, align = 'L')
pdf.cell(200, 10, txt = ("True Negatives: "+str(TN)),
ln = 2, align = 'L')
pdf.cell(200, 10, txt = (f"Total produzed: {total-fail_results} out of {total} units"),
ln = 2, align = 'L')
cm = np.array([[TP, FP], [FN, TN]])
normalize = False
target_names = ['Actually Positive', ' Actually Negative']
target_names2 = ['Predicted Positive', ' Predicted Negative']
title = "Confusion Matrix"
cmap = plt.get_cmap('Blues')
plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title, fontsize=16)
plt.colorbar()
if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, fontsize=16)
plt.yticks(tick_marks, target_names2, fontsize=16)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if normalize:
plt.text(j, i, "{:0.4f}".format(cm[i, j]),
horizontalalignment="center",
color="yellow" if cm[i, j] > thresh else "black", fontsize=23)
else:
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment="center",
color="yellow" if cm[i, j] > thresh else "black", fontsize=23)
plt.tight_layout()
plt.savefig("confusion_matrix.jpg")
pdf.image("confusion_matrix.jpg", x = None, y = None, w = 160, h = 110, type = 'jpg', link = 'confusion_matrix.jpg')
# Save the pdf with name .pdf
pdf.output("results.pdf")
# pdf.output("results.pdf")
print("Results in results.pdf")
path_ground_truth = "LOI.csv"
path_result = 'merged_file.csv'
print("Reading analyses execution results...")
ReportAnalysis(path_result, path_ground_truth)
path_result = 'merged_file.csv'
print("Reading analyses execution results...")
soot_results = pd.read_csv(path_result, sep=';', encoding='latin-1', on_bad_lines='skip', low_memory=False)
soot_results.columns = soot_results.columns.str.strip()
list_time_seconds = []
for index, row in soot_results.iterrows():
value = row['Time (ms)']
try:
# Ignora valores que não são numéricos, como ' -'
if str(value) not in ['', '-', 'NaN', 'nan']:
list_time_seconds.append(float(value) / 1000)
except ValueError:
continue
# Exemplo de classe que utiliza a função plot_by_variable
class Plotter:
def __init__(self):
pass
def plot_by_variable(self, leg1, leg_x, time_list1):
# Define o tamanho da figura
fig, ax = plt.subplots(figsize=(10, 4))
# Dados
data_x = [time_list1]
# Cores para os gráficos
boxplot_color = 'yellowgreen'
violin_color = 'thistle'
scatter_color = 'tomato'
# Boxplot
bp = ax.boxplot(data_x, patch_artist=True, vert=False, positions=[1], widths=0.6)
for patch in bp['boxes']:
patch.set_facecolor(boxplot_color)
patch.set_alpha(0.4)
# Violinplot
vp = ax.violinplot(data_x, points=500, showmeans=True, showextrema=True, showmedians=False, vert=False)
for b in vp['bodies']:
b.set_color(violin_color)
b.set_alpha(0.6)
# Scatterplot
features = data_x[0]
y = np.full(len(features), 1) # Usando apenas 1 para a posição no eixo Y
jitter = np.random.uniform(low=-0.1, high=0.1, size=len(features))
ax.scatter(features + jitter, y, s=15, c=scatter_color, alpha=0.7, edgecolor='k')
# Configurações do gráfico
ax.set_yticks([1])
ax.grid(False)
ax.set_yticklabels([leg1])
ax.set_xlabel(leg_x)
ax.set_title("Results by time (s)")
plt.tight_layout()
plt.savefig("rain_cloud_time_sdg.jpg", dpi=300)
plt.show()
# Criar uma instância da classe ExamplePlotter
plotter = Plotter()
# Nome da variável a ser plotada
y = "SDG"
x = "Values (seconds)"
plotter.plot_by_variable(y, x, list_time_seconds)