-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathgen_backfill_report.py
605 lines (518 loc) · 17.7 KB
/
gen_backfill_report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
"""
This script can be used to generate a report of the amount of
machine time used during all backfills between a start and end
date.
"""
import argparse
import os
import json
import re
import shutil
import signal
import threading
import time
import urllib
try:
from urllib.parse import urlencode
from urllib.request import urlopen, urlretrieve
except ImportError:
from urllib import urlencode, urlretrieve
from urllib2 import urlopen
DEBUG = True
TOTAL_REQUESTS = 0
MAX_REQUESTS = 50
OVERRIDE = False
TREEHERDER_LINK = "https://treeherder.mozilla.org/#/jobs?repo={}&tier=1%2C2%2C3&revision={}&searchStr={}"
BACKFILL_CACHE = os.path.join(os.path.expanduser("~"), ".backfill-cache")
"""
`where` clause will be created in the script.
It will be similar to this:
"where": {"and": [
{"eq":{"job.type.symbol":"Bk"}},
{"gte": {"date": STARTTIME},
{"lt": {"date": ENDTIME},
]}
All TIME values must follow the standards laid out in:
https://github.com/mozilla/ActiveData/blob/dev/docs/jx_time.md
"""
AD_BACKFILL_QUERY = {
"from": "treeherder",
"where": None,
"select": [
"build.revision",
"job.details.url",
"repo.branch.name",
"run.taskcluster.id",
],
"limit": 10000,
}
"""
This query is used to determine the owners of the backfill
request so that we can filter backfills based on owners.
To get specific tasks, this condition will be added to the
query: {"in":{"task.id": [<BACKFILL_TASK_IDS>]}},
"""
AD_BK_OWNER_QUERY = {
"from": "task",
"where": {
"and": [
{"eq": {"treeherder.symbol": "Bk"}},
{"in": {"task.tags.name": ["action.context.clientId"]}},
]
},
"select": ["task.tags.value", "task.id"],
"limit": 10000,
}
"""
`where` clause will be created in the script
It will be similar to this:
"where": {"and": [
# Make sure action.duration is positive
{"gt":{"action.duration":0}},
{"in": {"run.taskcluster.id": [TASKIDS]}}
]}
"""
AD_TIME_QUERY = {
"from": "treeherder",
"where": None,
"select": [
{"name": "action.duration", "value": "action.duration"},
# The rest of these are used to provide
# additional information to the user.
{"name": "build.revision", "value": "build.revision"},
{"name": "repo.branch.name", "value": "repo.branch.name"},
{"name": "run.key", "value": "run.key"},
{"name": "job.type.name", "value": "job.type.name"},
{"name": "job.type.group.symbol", "value": "job.type.group.symbol"},
{"name": "job.type.symbol", "value": "job.type.symbol"},
],
"limit": 10000,
}
def backfill_parser():
"""
Parser for the backfill generation script.
"""
parser = argparse.ArgumentParser(
"This tool can be used to generate a report of how much machine time "
+ "is being consumed by backfills."
)
parser.add_argument(
"--start-date",
type=str,
default="",
help="The start date for where to start looking for backfilled jobs. "
"Defaults to 1 year back.",
)
parser.add_argument(
"--end-date",
type=str,
default="",
help="The end date for where to start looking for backfilled jobs.",
)
parser.add_argument(
"--branches",
type=str,
nargs="+",
default=["autoland"],
help="The branch to find backfilled jobs in.",
)
parser.add_argument(
"--owners",
type=str,
nargs="+",
default=[],
help="The owners to search for in backfilled tasks.",
)
parser.add_argument(
"--symbols",
type=str,
nargs="+",
default=[],
help="The task group symbols to search for.",
)
parser.add_argument(
"--talos",
action="store_true",
default=False,
help="Set this to search for talos backfilled tasks.",
)
parser.add_argument(
"--raptor",
action="store_true",
default=False,
help="Set this to search for raptor backfilled tasks.",
)
parser.add_argument(
"--browsertime",
action="store_true",
default=False,
help="Set this to search for browsertime backfilled tasks.",
)
parser.add_argument(
"--awsy",
action="store_true",
default=False,
help="Set this to search for AWSY backfilled tasks.",
)
parser.add_argument(
"--task-name-regex",
type=str,
default="",
help="A regular expression used to find a particular set of tasks (using run.key).",
)
parser.add_argument(
"--additional-conditions",
type=str,
nargs="+",
default=[],
help="Additional conditions for an ActiveData `where` clause. Used when finding the "
"backfilled task times. Expected a dict per entry in this command, i.e. "
'{"eq": {"job.type.group.symbol": "Btime"}}',
)
parser.add_argument(
"--find-long-tasks",
action="store_true",
default=False,
help="Outputs all long running tasks, along with their treeherder links. "
"A long running task is defined as one that exceeds x2 the run time of the "
"average task.",
)
parser.add_argument(
"--no-cache",
action="store_true",
default=False,
help="This will disable caching the downloaded data for future runs.",
)
parser.add_argument(
"--clobber-cache",
action="store_true",
default=False,
help="This will delete the current cache.",
)
return parser
def debug(msg):
"""Helper function for debug prints"""
if DEBUG:
print(msg)
def get_json(url, params=None):
"""
Gets a JSON artifact from a given URL.
"""
if params is not None:
url += "?" + urlencode(params)
r = urlopen(url).read().decode("utf-8")
return json.loads(r)
def open_json(path):
"""
Opens a JSON file and returns the data.
"""
data = {}
with open(path, "r") as f:
data = json.load(f)
return data
def write_json(data, path):
"""
Writes the given data at the given path.
"""
with open(path, "w") as f:
json.dump(data, f)
def query_activedata(query_json):
"""
Used to run queries on active data.
"""
active_data_url = "http://activedata.allizom.org/query"
req = urllib.request.Request(active_data_url)
req.add_header("Content-Type", "application/json")
jsondata = json.dumps(query_json)
jsondataasbytes = jsondata.encode("utf-8")
req.add_header("Content-Length", len(jsondataasbytes))
print("Querying Active-data...")
response = urllib.request.urlopen(req, jsondataasbytes)
print("Status:" + str(response.getcode()))
data = json.loads(response.read().decode("utf8").replace("'", '"'))["data"]
return data
def get_owner_information(owners, taskids):
"""
Uses the given task IDs to determine the owner or
person who created them.
"""
filter_by_owners = {}
AD_BK_OWNER_QUERY["where"]["and"].append(
{"in": {"task.id": taskids}},
)
owner_data = query_activedata(AD_BK_OWNER_QUERY)
for c, taskid in enumerate(owner_data["task.id"]):
possible_owners = [o for o in owner_data["task.tags.value"][c] if o]
if not possible_owners:
# Missing owner information
continue
# There should only every be one owner. If
# either of the requested owners match it,
# then we keep this task and download
# artifacts from it.
task_owner = possible_owners[0]
for owner in owners:
if owner in task_owner:
filter_by_owners[taskid] = True
break
return filter_by_owners
def generate_backfill_report(
start_date="",
end_date="",
task_name_regex="",
talos=False,
raptor=False,
browsertime=False,
awsy=False,
symbols=[],
branches=["autoland"],
find_long_tasks=False,
owners=[],
additional_conditions=[],
no_cache=False,
clobber_cache=False,
):
"""
This generation works as follows:
(i): Find all backfill tasks between the given dates.
If no dates are given, then we look over the past year.
If only a start date is given, then we look from then to now.
If only an end date is given, then we look from 1 year ago up
to the end date.
(ii): Using the backfill tasks that were found, download all
the to-run-<PUSH_ID>.json files and label-to-taskid-<PUSH_ID>.json
files.
(iii): For each to-run file, find the tests that are
being retriggered and their taskid. Then, obtain the sum
of the runtime for all these taskids.
"""
if clobber_cache and os.path.exists(BACKFILL_CACHE):
shutil.rmtree(BACKFILL_CACHE)
if no_cache:
print("Not caching downloaded data")
else:
print("Downloaded data will be cached here: %s" % BACKFILL_CACHE)
os.makedirs(BACKFILL_CACHE, exist_ok=True)
conditions = [
{"eq": {"job.type.symbol": "Bk"}},
{"in": {"repo.branch.name": branches}},
]
where_clause = {"and": conditions}
# Setup the time range
if end_date:
conditions.append({"lt": {"action.start_time": {"date": str(end_date)}}})
if start_date:
conditions.append({"gte": {"action.start_time": {"date": str(start_date)}}})
else:
# Restrict to 1 year back
print("Setting start-date as 1 year ago. This query will take some time...")
conditions.append({"gte": {"action.start_time": {"date": "today-year"}}})
if start_date or end_date:
print(
"Date specifications detected. "
"Ensure that they follow these guidelines: "
"https://github.com/mozilla/ActiveData/blob/dev/docs/jx_time.md"
)
# Query active data for the backfilled tasks
AD_BACKFILL_QUERY["where"] = where_clause
debug(json.dumps(AD_BACKFILL_QUERY, indent=4))
data = query_activedata(AD_BACKFILL_QUERY)
if "build.revision" not in data:
print("No backfill tasks found for the given time range")
return
debug("Analyzing backfills performed on the revisions: %s" % data["build.revision"])
# Find the tasks that are specific to the requested owners
filter_by_owners = {}
if owners:
# Get the owners of the backfills if needed
print("Getting backfill task owner information...")
filter_by_owners = get_owner_information(owners, data["run.taskcluster.id"])
# Go through all the URL groupings and match up data from each PUSHID
alltaskids = []
total_groups = len(data["job.details.url"])
matcher = re.compile(r"-([\d]+).json")
# cache_mappings = {}
# cache_mapping_path = os.path.join(BACKFILL_CACHE, "mappings.json")
# if os.path.exists(cache_mapping_path):
# with open(cache_mapping_path, "r") as f:
# cache_mappings = json.load(f)
for c, url_grouping in enumerate(data["job.details.url"]):
if not url_grouping:
continue
if filter_by_owners and data["run.taskcluster.id"][c] not in filter_by_owners:
continue
print(
"\nProcessing %s from %s (%s/%s)"
% (
data["build.revision"][c],
data["repo.branch.name"][c],
(c + 1),
total_groups,
)
)
push_data = {}
# Gather groupings
for url in url_grouping:
if not url:
continue
matches = matcher.findall(url)
if not matches:
continue
# Only one match should be found
if len(matches) > 1:
print("Bad URL found: %s" % url)
continue
pushid = matches[0]
if pushid not in push_data:
push_data[pushid] = {}
fname = url.split("/")[-1]
orig_fname = fname
if "label-to-taskid" in fname:
fname = "label-to-taskid"
elif "to-run-" in fname:
fname = "to-run"
else:
# We don't care about these files
continue
push_data[pushid][fname] = {"url": url, "data": None}
if not no_cache:
# Setup the cache file name
cache_file = "%s_%s" % (data["run.taskcluster.id"][c], orig_fname)
if not cache_file.endswith(".json"):
cache_file = cache_file + ".json"
push_data[pushid][fname]["cache-file"] = os.path.join(
BACKFILL_CACHE, cache_file
)
# Setup a signal handler for simple timeouts
def handler(signum, frame):
raise Exception("Timed out.")
signal.signal(signal.SIGALRM, handler)
def download(url, storage):
"""Downloads a JSON through a thread"""
global TOTAL_REQUESTS
global MAX_REQUESTS
global OVERRIDE
while TOTAL_REQUESTS >= MAX_REQUESTS and not OVERRIDE:
time.sleep(0.5)
TOTAL_REQUESTS += 1
print("Downloading %s" % url)
try:
# Timeout after 20 seconds
signal.alarm(20)
storage["data"] = get_json(url)
if "cache-file" in storage:
write_json(storage["data"], storage["cache-file"])
except Exception:
pass
TOTAL_REQUESTS -= 1
# Download all the artifacts - batch them in case
# we are looking very far back.
threads = []
for _, push_files in push_data.items():
for file, file_info in push_files.items():
if not no_cache:
cached = file_info["cache-file"]
if os.path.exists(cached):
file_info["data"] = open_json(cached)
continue
t = threading.Thread(
target=download, args=(file_info["url"], file_info)
)
t.daemon = True
t.start()
threads.append(t)
for t in threads:
try:
t.join()
except Exception:
pass
# Cancel the timeout alarm
signal.alarm(0)
# Get all of the TASKIDs of the backfilled jobs
taskids = []
for pid, push_files in push_data.items():
tasks_running = push_files["to-run"]["data"]
labeled_tasks = push_files["label-to-taskid"]["data"]
if not tasks_running or not labeled_tasks:
print("Skipping push %s, could not obtain required artifacts" % pid)
continue
# Artifacts don't exist - skip them
if "code" in tasks_running or "code" in labeled_tasks:
print("Artifacts don't exist in push %s" % pid)
continue
taskids.extend([labeled_tasks[taskname] for taskname in tasks_running])
alltaskids.extend(taskids)
conditions = [
{"gt": {"action.duration": 0}},
{"in": {"run.taskcluster.id": alltaskids}},
]
# Setup additional settings
if talos:
symbols.append("T")
if raptor:
symbols.append("Rap")
if browsertime:
symbols.append("Btime")
if awsy:
symbols.append("SY")
if symbols:
conditions.append({"in": {"job.type.group.symbol": symbols}})
if task_name_regex:
conditions.append({"regex": {"run.key": regex}})
if additional_conditions:
conditions.extend(additional_conditions)
where_clause = {"and": conditions}
AD_TIME_QUERY["where"] = where_clause
debug(json.dumps(AD_TIME_QUERY, indent=4))
data = query_activedata(AD_TIME_QUERY)
if "action.duration" not in data:
print("No backfilled tasks found matching the given criteria")
return
if DEBUG:
print("\nAll times:")
print(data["action.duration"])
print("")
total = 0
for c, i in enumerate(data["action.duration"]):
total += i
avgtime = total / len(data["action.duration"])
print("Average task time: %s" % avgtime)
if find_long_tasks:
print("Searching for tasks that are x2 this value...")
printed = False
for c, i in enumerate(data["action.duration"]):
if i > avgtime * 2:
if not printed:
print("Long running tasks:")
printed = True
url = TREEHERDER_LINK.format(
data["repo.branch.name"][c],
data["build.revision"][c],
data["job.type.name"][c],
)
print("Test %s: %s" % (data["run.key"][c], url))
print(" Time: %s\n" % i)
print("Total runtime of backfilled tasks: %s hours" % (int(total) / 3600))
def main():
args = backfill_parser().parse_args()
report = generate_backfill_report(
start_date=args.start_date,
end_date=args.end_date,
task_name_regex=args.task_name_regex,
owners=args.owners,
talos=args.talos,
raptor=args.raptor,
browsertime=args.browsertime,
awsy=args.awsy,
symbols=args.symbols,
branches=args.branches,
find_long_tasks=args.find_long_tasks,
additional_conditions=args.additional_conditions,
no_cache=args.no_cache,
clobber_cache=args.clobber_cache,
)
if __name__ == "__main__":
main()