-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgetSurrogateRes.py
146 lines (133 loc) · 7 KB
/
getSurrogateRes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import os
import json
# these two functions and implementation is borrowed from label.py ancestor labelling
def CheckAncestoralNodes(callstack):
# Handling non-script type
if callstack["type"] != "script":
return []
# Initialize a set to track unique script URLs
unique_scripts = set()
# Recursively insert unique scripts in the stack
rec_stack_checker(callstack["stack"], unique_scripts)
# Convert the set to a list and return it
return list(unique_scripts)
def rec_stack_checker(stack, unique_scripts):
# Append unique script URLs to the set
for item in stack["callFrames"]:
script_url = (
item["url"]
+ "@"
+ item["functionName"]
+ "@"
+ str(item["lineNumber"])
+ "@"
+ str(item["columnNumber"])
)
unique_scripts.add(script_url)
# Check if the parent object exists and send a recursive call
if "parent" in stack:
rec_stack_checker(stack["parent"], unique_scripts)
fold = os.listdir("server/output")
count = 0
surr_average = 0
norm_average = 0
surr_tracking = 0
norm_tracking = 0
surr_functional = 0
norm_functional = 0
script_not_in_request_file = 0
inline_script = 0
replace_function_call_fail = 0
replace_function_call_success = 0
storage_access_surr = 0
storage_access_norm = 0
for f in fold:
try:
if len(os.listdir("server/output_surr/" + f + "/surrogate")) > 0:
surr_gen = {"script_not_in_request_file": 0, "inline_script": 0, "replace_function_call_fail": 0, "success": 0}
with open("server/output_surr/" + f + "/surrogate_logs.json") as file:
# Load JSON data from file
data = json.load(file)
surr_gen["script_not_in_request_file"] = data["script_not_in_request_file"]
surr_gen["inline_script"] = data["inline_script"]
surr_gen["replace_function_call_fail"] = data["replace_function_call_fail"]
surr_gen["success"] += data["success"]
# print("Surrogate_logs", data)
try:
with open("server/output_surr/" + f + "/cookie_storage.json", 'r') as file:
lines = file.readlines()
storage_access_norm += len(lines)
with open("server/output/" + f + "/cookie_storage.json", 'r') as file:
lines = file.readlines()
storage_access_surr += len(lines)
except:
pass
surr = {"tracking-functions": [], "inline-functions": [], "tracking-requests": 0, "functional-requests":0 }
# reading big request data line by line
with open("server/output/" + f + "/label_request.json") as file:
for line in file:
data = json.loads(line)
for dataset in data:
if (
dataset["easylistflag"] == 1
or dataset["easyprivacylistflag"] == 1
or dataset["ancestorflag"] == 1
) and dataset["call_stack"]["type"] == "script":
surr["tracking-requests"] += 1
lst = CheckAncestoralNodes(dataset["call_stack"])
for item in lst:
if item not in surr["tracking-functions"] and "https://" + dataset["top_level_url"] + "/" != item:
surr["tracking-functions"].append(item)
elif item not in surr["inline-functions"] and "https://" + dataset["top_level_url"] + "/" == item:
surr["inline-functions"].append(item)
else:
surr["functional-requests"] += 1
# print("Surrogate",f,{k: len(v) if isinstance(v, list) else v for k, v in surr.items()})
norm = {"tracking-functions": [], "inline-functions": [], "tracking-requests": 0, "functional-requests":0 }
# reading big request data line by line
with open("server/output_surr/" + f + "/label_request.json") as file:
for line in file:
data = json.loads(line)
for dataset in data:
if (
dataset["easylistflag"] == 1
or dataset["easyprivacylistflag"] == 1
or dataset["ancestorflag"] == 1
)and dataset["call_stack"]["type"] == "script":
norm["tracking-requests"] += 1
lst = CheckAncestoralNodes(dataset["call_stack"])
for item in lst:
if item not in norm["tracking-functions"] and "https://" + dataset["top_level_url"] + "/" != item.split("@")[0]:
norm["tracking-functions"].append(item)
elif item not in norm["inline-functions"] and "https://" + dataset["top_level_url"] + "/" == item.split("@")[0]:
norm["inline-functions"].append(item)
else:
norm["functional-requests"] += 1
# if surr["tracking-functions"] < norm["tracking-functions"]:
count += 1
surr_average += len(surr["tracking-functions"])
norm_average += len(norm["tracking-functions"])
surr_tracking += surr["tracking-requests"]
norm_tracking += norm["tracking-requests"]
surr_functional += surr["functional-requests"]
norm_functional += norm["functional-requests"]
script_not_in_request_file += surr_gen["script_not_in_request_file"]
inline_script += surr_gen["inline_script"]
replace_function_call_fail += surr_gen["replace_function_call_fail"]
replace_function_call_success += surr_gen["success"]
# print("Before-surrogate",f,{k: len(v) if isinstance(v, list) else v for k, v in norm.items()})
except:
pass
print("total-websites", count)
print("Average tracking requests after surrogate/website", (surr_tracking))
print("Average tracking requets before surrogate/website", (norm_tracking))
print("Average functional requests after surrogate/website", (surr_functional))
print("Average functional requests before surrogate/website", (norm_functional))
print("Average tracking functions after surrogate/website", (surr_average))
print("Average tracking functions before surrogate/website", (norm_average))
print("Average script_not_in_request_file", (script_not_in_request_file))
print("Average inline_script", (inline_script))
print("Average replace_function_call_fail", (replace_function_call_fail))
print("Average replace_function_call_success", (replace_function_call_success))
print("Average storage access after surrogate", (storage_access_surr))
print("Average storage access before surrogate", storage_access_norm)