-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathmeasure_start_up.py
executable file
·316 lines (251 loc) · 14.5 KB
/
measure_start_up.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
import argparse
import datetime
import os
import re
import subprocess
import time
DESC = """Measures start up durations using multiple methodologies.
IMPORTANT: each methodology provides a different picture of start up. If you're
not sure which one to use, please ask the perf team!
IMPORTANT: some tests require manual test set up! Read the output carefully.
This system is temporary until mozperftest is ready (e.g. less noisy). As such,
we try to keep this simple and avoid implementing all the things they do.
"""
DEFAULT_ITER_COUNT = 25
PROD_FENIX = 'fenix'
PROD_FOCUS = 'focus'
PRODUCTS = [PROD_FENIX, PROD_FOCUS]
PROD_TO_CHANNEL_TO_PKGID = {}
PROD_TO_CHANNEL_TO_PKGID[PROD_FENIX] = {
'nightly': 'org.mozilla.fenix',
'beta': 'org.mozilla.firefox.beta',
'release': 'org.mozilla.firefox',
'debug': 'org.mozilla.fenix.debug'
}
PROD_TO_CHANNEL_TO_PKGID[PROD_FOCUS] = {
'nightly': 'org.mozilla.focus.nightly',
'beta': 'org.mozilla.focus.beta', # only present since post-fenix update.
'release': 'org.mozilla.focus',
'debug': 'org.mozilla.focus.debug'
}
TEST_COLD_MAIN_FF = 'cold_main_first_frame'
TEST_COLD_MAIN_RESTORE = 'cold_main_session_restore'
TEST_COLD_VIEW_FF = 'cold_view_first_frame'
TEST_COLD_VIEW_NAV_START = 'cold_view_nav_start'
TESTS = [TEST_COLD_MAIN_FF, TEST_COLD_MAIN_RESTORE, TEST_COLD_VIEW_FF, TEST_COLD_VIEW_NAV_START]
TEST_URI = 'https://example.com'
def parse_args():
parser = argparse.ArgumentParser(description=DESC, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"release_channel", choices=PROD_TO_CHANNEL_TO_PKGID[PROD_FENIX].keys(), help="the release channel to measure"
)
parser.add_argument(
"test_name", choices=TESTS, help="""the measurement methodology to use. Options:
- {cold_main_ff}: click the app icon & get duration to first frame from 'am start -W'
- {cold_main_restore}: click the app icon & get duration from logcat: START proc to PageStart
- {cold_view_ff}: send a VIEW intent & get duration to first frame from 'am start -W'
- {cold_view_nav_start}: send a VIEW intent & get duration from logcat: START proc to PageStart
Measurements to first frame are a reimplementation of
https://medium.com/androiddevelopers/testing-app-startup-performance-36169c27ee55
See https://wiki.mozilla.org/Performance/Fenix#Terminology for descriptions of cold/warm/hot and main/view""".format(
cold_main_ff=TEST_COLD_MAIN_FF, cold_main_restore=TEST_COLD_MAIN_RESTORE,
cold_view_ff=TEST_COLD_VIEW_FF, cold_view_nav_start=TEST_COLD_VIEW_NAV_START,
))
parser.add_argument("path", help="the path to save the measurement results; will abort if file exists")
# We ordinarily wouldn't specify a default because it may cause the user to get results
# from a product they didn't intend but this script lives in the fenix repo so having fenix
# as a default would be less confusing for users.
parser.add_argument("-p", "--product", default=PROD_FENIX, choices=PRODUCTS,
help="which product to get measurements from")
parser.add_argument("-c", "--iter-count", default=DEFAULT_ITER_COUNT, type=int,
help="the number of iterations to run. defaults to {}".format(DEFAULT_ITER_COUNT))
parser.add_argument("-f", "--force", action="store_true",
help="overwrite the given path rather than stopping on file existence")
parser.add_argument("--no-startup-cache", action="store_true",
help="skips delay in the warm up run to ensure the start up cache is filled")
return parser.parse_args()
def validate_args(args):
# This helps prevent us from accidentally overwriting previous measurements.
if not args.force:
if os.path.exists(args.path):
raise Exception("Given `path` unexpectedly exists: pick a new path or use --force to overwrite.")
def get_adb_shell_args():
return ['adb', 'shell']
def get_activity_manager_args():
return get_adb_shell_args() + ['am']
def force_stop(pkg_id):
args = get_activity_manager_args() + ['force-stop', pkg_id]
subprocess.run(args, check=True)
def disable_startup_profiling():
# Startup profiling sets the app to the "debug-app" which executes extra code to
# read a config file off disk that triggers the profiling. Removing the app as a
# debug app should address that issue but isn't a perfect clean up.
args = get_activity_manager_args() + ['clear-debug-app']
subprocess.run(args, check=True)
def get_component_name_for_intent(pkg_id, intent):
resolve_component_args = (get_adb_shell_args()
+ ['cmd', 'package', 'resolve-activity', '--brief']
+ intent + [pkg_id])
proc = subprocess.run(resolve_component_args, capture_output=True)
stdout = proc.stdout.splitlines()
assert len(stdout) == 2, 'expected 2 lines. Got: {}'.format(stdout)
return stdout[1]
def get_start_cmd(test_name, pkg_id, product):
intent_action_prefix = 'android.intent.action.{}'
if test_name in [TEST_COLD_MAIN_FF, TEST_COLD_MAIN_RESTORE]:
intent = [
'-a', intent_action_prefix.format('MAIN'),
'-c', 'android.intent.category.LAUNCHER',
]
elif test_name in [TEST_COLD_VIEW_FF, TEST_COLD_VIEW_NAV_START]:
intent = [
'-a', intent_action_prefix.format('VIEW'),
'-d', TEST_URI
]
# You can't launch an app without an pkg_id/activity pair. Instead of
# hard-coding the activity, which could break on app updates, we ask the
# system to resolve it for us.
component_name = get_component_name_for_intent(pkg_id, intent)
cmd = get_activity_manager_args() + [
'start-activity', # this would change to `start` on older API levels like GS5.
'-W', # wait for app launch to complete before returning
'-n', component_name
] + intent
# If it's focus, skip onboarding: this is not stateful so must be sent for every cold start intent.
# fenix is handled elsewhere (e.g. backfill.py) because it is stateful so it only needs to be sent once.
# Note: adding this extra unconditionally means there is no way to test onboarding perf. We can change
# that when we actually want to test that.
if product == PROD_FOCUS:
cmd += ['--ez', 'performancetest', 'true']
return cmd
def measure(test_name, product, pkg_id, start_cmd_args, iter_count, warmup_delay_seconds):
# Startup profiling may accidentally be left enabled and throw off the results.
# To prevent this, we disable it.
disable_startup_profiling()
# After an (re)installation, we've observed the app starts up more slowly than subsequent runs.
# As such, we start it once beforehand to let it settle.
force_stop(pkg_id)
subprocess.run(start_cmd_args, check=True, capture_output=True) # capture_output so no print to stdout.
time.sleep(warmup_delay_seconds)
measurements = []
for _ in range(0, iter_count):
force_stop(pkg_id)
time.sleep(1)
# This is only necessary for nav start tests (to ensure logcat only contains the result from the current run).
# However, it's not known to be disruptive to other tests (to first frame) so we leave it in.
subprocess.run(['adb', 'logcat', '-c'], check=True)
proc = subprocess.run(start_cmd_args, check=True, capture_output=True) # expected to wait for app to start.
measurements.append(get_measurement(test_name, product, pkg_id, proc.stdout))
return measurements
def get_measurement(test_name, product, pkg_id, stdout):
if test_name in [TEST_COLD_MAIN_FF, TEST_COLD_VIEW_FF]:
measurement = get_measurement_from_am_start_log(stdout)
elif test_name in [TEST_COLD_VIEW_NAV_START, TEST_COLD_MAIN_RESTORE]:
# We must sleep until:
# - the Navigation::Start event occurs. If we don't, the script will fail.
# - the content process start up scripts are cached. If we don't, the cache won't be populated and we won't be
# measuring perf accurately. This can take up to 14s from proc start until ScriptPreloader::CacheWriteComplete
# is called on a Moto G5, as of Feb 11 2022. There is an explicit 10s sleep in the code:
# https://searchfox.org/mozilla-central/rev/fc4d4a8d01b0e50d20c238acbb1739ccab317ebc/js/xpconnect/loader/ScriptPreloader.cpp#769).
time.sleep(17)
proc = subprocess.run(['adb', 'logcat', '-d'], check=True, capture_output=True)
measurement = get_measurement_from_nav_start_logcat(product, pkg_id, proc.stdout)
return measurement
def get_measurement_from_am_start_log(stdout):
# Sample output:
# Starting: Intent { cmp=org.mozilla.fenix/.App }
# Status: ok
# Activity: org.mozilla.fenix/.App
# ThisTime: 5662
# TotalTime: 5662
# WaitTime: 5680
# Complete
total_time_prefix = b'TotalTime: '
lines = stdout.split(b'\n')
matching_lines = [line for line in lines if line.startswith(total_time_prefix)]
if len(matching_lines) != 1:
raise Exception('Each run should only have one {} but this unexpectedly had more: '.format(total_time_prefix) +
matching_lines)
duration = int(matching_lines[0][len(total_time_prefix):])
return duration
def get_measurement_from_nav_start_logcat(product, pkg_id, logcat_bytes):
# Relevant lines:
# 05-18 14:32:47.366 1759 6003 I ActivityManager: START u0 {act=android.intent.action.VIEW dat=https://example.com/... typ=text/html flg=0x10000000 cmp=org.mozilla.fenix/.IntentReceiverActivity} from uid 2000 # noqa
# 05-18 14:32:47.402 1759 6003 I ActivityManager: Start proc 9007:org.mozilla.fenix/u0a170 for activity org.mozilla.fenix/.IntentReceiverActivity # noqa
# 05-18 14:32:50.809 9007 9007 I GeckoSession: handleMessage GeckoView:PageStart uri=
# 05-18 14:32:50.821 9007 9007 I GeckoSession: handleMessage GeckoView:PageStop uri=null
def line_to_datetime(line):
date_str = ' '.join(line.split(' ')[:2]) # e.g. "05-18 14:32:47.366"
# strptime needs microseconds. logcat outputs millis so we append zeroes
date_str_with_micros = date_str + '000'
return datetime.datetime.strptime(date_str_with_micros, '%m-%d %H:%M:%S.%f')
def get_proc_start_datetime():
# This regex may not work on older versions of Android: we don't care
# yet because supporting older versions isn't in our requirements.
proc_start_re = re.compile(r'ActivityManager: Start proc \d+:{}/'.format(pkg_id))
proc_start_lines = [line for line in lines if proc_start_re.search(line)]
assert len(proc_start_lines) == 1
return line_to_datetime(proc_start_lines[0])
def get_page_start_datetime():
page_start_re = re.compile('GeckoSession: handleMessage GeckoView:PageStart uri=')
page_start_lines = [line for line in lines if page_start_re.search(line)]
page_start_line_count = len(page_start_lines)
page_start_assert_msg = 'found len=' + str(page_start_line_count)
# In focus versions <= v8.8.2, it logs 3 PageStart lines and these include actual uris.
# We need to handle our assertion differently due to the different line count.
#
# In focus versions >= v8.8.3, this measurement is broken because the logcat were removed.
is_old_version_of_focus = 'about:blank' in page_start_lines[0] and product == PROD_FOCUS
if is_old_version_of_focus:
assert page_start_line_count == 3, page_start_assert_msg # Lines: about:blank, target URL, target URL.
else:
assert page_start_line_count == 2, page_start_assert_msg # Lines: about:blank, target URL.
return line_to_datetime(page_start_lines[1]) # 2nd PageStart is for target URL.
logcat = logcat_bytes.decode('UTF-8') # Easier to work with and must for strptime.
lines = logcat.split('\n')
# We measure the time from process start, rather than the earlier START
# activity line, because I assume we have no control over the duration
# before our process starts. If we wanted to put in more time, we could
# double-check this assumption by seeing what values `am start -W` returns
# compared to the time stamps.
#
# For total_seconds(), values < 1s are expressed in decimal (e.g. .001 is 1ms).
elapsed_seconds = (get_page_start_datetime() - get_proc_start_datetime()).total_seconds()
elapsed_millis = round(elapsed_seconds * 1000)
return elapsed_millis
def save_measurements(path, measurements):
with open(path, 'w') as f:
for measurement in measurements:
f.write(str(measurement) + '\n')
def print_preface_text(test_name):
print("To analyze the results, use this script (we recommend using the median):" +
"\nhttps://github.com/mozilla-mobile/perf-tools/blob/master/analyze_durations.py")
if test_name in [TEST_COLD_MAIN_FF]:
print("\nWARNING: you may wish to clear the onboarding experience manually.")
elif test_name in [TEST_COLD_VIEW_FF, TEST_COLD_VIEW_NAV_START]:
print("\nWARNING: you may wish to reduce the number of open tabs when starting this test")
print("as this test may leave many additional tabs open which could impact the results.")
elif test_name in [TEST_COLD_MAIN_RESTORE]:
print("\nWARNING: ensure at least one tab is opened when starting this test.")
def get_warmup_delay_seconds(no_startup_cache):
# We've been told the start up cache is populated ~60s after first start up. As such, it's likely
# most users start the app with it so, if we want to measure the representative user experience,
# we should measure start up with the start up cache populated. We wait to ensure the cache is
# populated during the warm up run. If the args say we shouldn't wait, we only wait a short
# duration that is roughly visual completeness.
return 5 if no_startup_cache else 60
def main():
args = parse_args()
validate_args(args)
pkg_id = PROD_TO_CHANNEL_TO_PKGID[args.product][args.release_channel]
start_cmd = get_start_cmd(args.test_name, pkg_id, args.product)
print_preface_text(args.test_name)
measurements = measure(args.test_name, args.product, pkg_id, start_cmd, args.iter_count,
get_warmup_delay_seconds(args.no_startup_cache))
save_measurements(args.path, measurements)
if __name__ == '__main__':
main()