-
Notifications
You must be signed in to change notification settings - Fork 55
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
performance: calling python from wasm is too slow #140
Comments
here is the code import time
import wasmtime as wa
from math import gcd as math_gcd
from functools import partial
from wasmtime import Store, Module, Instance
def python_gcd(x, y):
while y:
x, y = y, x % y
return abs(x)
store = Store()
module = Module(
store.engine,
"""
(module
(func $log_i (import "env" "log_i") (param i32))
(func $math_gcd (import "env" "math_gcd") (param i32 i32) (result i32))
(func $python_gcd (import "env" "python_gcd") (param i32 i32) (result i32))
(func $gcd (export "gcd") (param i32 i32) (result i32)
(local i32)
block ;; label = @1
block ;; label = @2
local.get 0
br_if 0 (;@2;)
local.get 1
local.set 2
br 1 (;@1;)
end
loop ;; label = @2
local.get 1
local.get 0
local.tee 2
i32.rem_u
local.set 0
local.get 2
local.set 1
local.get 0
br_if 0 (;@2;)
end
end
local.get 2
)
(func $wasm_gcd_loop (export "wasm_gcd_loop") (param $n i32) (param $a i32) (param $b i32) (result i32)
(local $i i32)
(local $r i32)
;; i=0
i32.const 0
local.set $i
block $y_block
loop $y_loop
;; if (i>=n) break
local.get $i
local.get $n
i32.ge_s
br_if $y_block
;; loop body
local.get $a
local.get $b
call $gcd
local.set $r
;;;; log_i(i)
;;local.get $i
;;call $log_i
;;;; log_i(r)
;;local.get $r
;;call $log_i
;; ++i
local.get $i
i32.const 1
i32.add
local.set $i
br $y_loop
end
end
local.get $r
)
(func $math_gcd_loop (export "math_gcd_loop") (param $n i32) (param $a i32) (param $b i32) (result i32)
(local $i i32)
(local $r i32)
;; i=0
i32.const 0
local.set $i
block $y_block
loop $y_loop
;; if (i>=n) break
local.get $i
local.get $n
i32.ge_s
br_if $y_block
;; loop body
local.get $a
local.get $b
call $math_gcd
local.set $r
;;;; log_i(i)
;;local.get $i
;;call $log_i
;;;; log_i(r)
;;local.get $r
;;call $log_i
;; ++i
local.get $i
i32.const 1
i32.add
local.set $i
br $y_loop
end
end
local.get $r
)
(func $python_gcd_loop (export "python_gcd_loop") (param $n i32) (param $a i32) (param $b i32) (result i32)
(local $i i32)
(local $r i32)
;; i=0
i32.const 0
local.set $i
block $y_block
loop $y_loop
;; if (i>=n) break
local.get $i
local.get $n
i32.ge_s
br_if $y_block
;; loop body
local.get $a
local.get $b
call $python_gcd
local.set $r
;;;; log_i(i)
;;local.get $i
;;call $log_i
;;;; log_i(r)
;;local.get $r
;;call $log_i
;; ++i
local.get $i
i32.const 1
i32.add
local.set $i
br $y_loop
end
end
local.get $r
)
)
""",
)
def log_i(i):
print("** log_i: ", i)
linker = wa.Linker(store.engine)
linker.define_func("env", "log_i", wa.FuncType([wa.ValType.i32()], []), log_i)
linker.define_func("env", "math_gcd", wa.FuncType([wa.ValType.i32(), wa.ValType.i32()], [wa.ValType.i32()]), math_gcd)
linker.define_func("env", "python_gcd", wa.FuncType([wa.ValType.i32(), wa.ValType.i32()], [wa.ValType.i32()]), python_gcd)
instance = linker.instantiate(store, module)
gcd_store = instance.exports(store)["gcd"]
wasm_gcd = partial(gcd_store, store)
wasm_gcd_loop = partial(instance.exports(store)["wasm_gcd_loop"], store)
math_gcd_loop = partial(instance.exports(store)["math_gcd_loop"], store)
python_gcd_loop = partial(instance.exports(store)["python_gcd_loop"], store)
a = 16516842
b = 154654684
N = 1_000
print(math_gcd(a, b), python_gcd(a, b), wasm_gcd(a, b))
print(wasm_gcd_loop(N, a, b))
print(math_gcd_loop(N, a, b))
print(python_gcd_loop(N, a, b))
by_name = locals()
print("*** python loop benchmark: ")
for name in "math_gcd", "python_gcd", "wasm_gcd":
gcdf = by_name[name]
start_time = time.perf_counter()
for _ in range(N):
g = gcdf(a, b)
total_time = time.perf_counter() - start_time
print(total_time, "\t\t", name)
print("*** wasm loop benchmark: ")
for name in "wasm_gcd_loop", "math_gcd_loop", "python_gcd_loop":
gcdf = by_name[name]
start_time = time.perf_counter()
g = gcdf(N, a, b)
total_time = time.perf_counter() - start_time
print(total_time, "\t\t", name)
from cProfile import Profile
pr=Profile()
pr.enable()
print(math_gcd_loop(N, a, b))
pr.disable()
pr.print_stats()
pr.print_stats('cumulative')
pr.print_stats('calls') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
as mentioned in parent performance ticket
#96 (comment)
we have a bottleneck when python is called from inside wasm
here is a minimum reproducible example
unlike #96 which is to be solved by #139
the loop here lives inside wasm and the functions are in python, C (python's math) and wasm
this issue is not solved by #139
experiment setup
I've logged i and logged gcd to make sure loop is working fine using
log_i()
I've removed the logging
I've imported C gcd from python's math package
I've imported python gcd
and did a loop inside wasm calling the 3 versions
results
conclusion
when wasm loop call's wasm function it was too fast, it was 100x faster than calling C's gcd
since C's gcd is faster than wasm gcd as shown in other benchmark, then this bottleneck is wrapping python part.
profiling
here is how we profile calling C from wasm
using the production 7.0.0, the obvious bootleneck is in:
_func.py:172(trampoline)
_value.py:129(_convert)
by time
The text was updated successfully, but these errors were encountered: