Skip to content

Commit

Permalink
feat(hlapi): add erc20 bench
Browse files Browse the repository at this point in the history
This adds benchmarks of both latency and throughput
of 4 variants of the erc20 transfer.

It also prints the PBS count of each versions.

- "whitepaper" is the variant written in the fhevm whitepaper
- "no_cmux" is similar to whitepaper, but uses a "boolean multiplication"
  instead of a cmux
- "overflow" uses an overflowing_sub to remove the need for comparison
- "safe" use both overflowing_sub and overflowing_add to make sure both
  then sender has enough money and the that the transfer won't overflow
  the receiver's money

"overflow" has the lowest latency, and second best throughput
"no_cmux" has the second lowest latenc and the best throughput
  • Loading branch information
tmontaigu committed Oct 3, 2024
1 parent 54a08af commit bd28d22
Show file tree
Hide file tree
Showing 4 changed files with 309 additions and 7 deletions.
6 changes: 6 additions & 0 deletions tfhe/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,12 @@ path = "benches/high_level_api/bench.rs"
harness = false
required-features = ["integer", "internal-keycache"]

[[bench]]
name = "hlapi-erc20"
path = "benches/high_level_api/erc20.rs"
harness = false
required-features = ["integer", "internal-keycache"]

[[bench]]
name = "keygen"
path = "benches/keygen/bench.rs"
Expand Down
8 changes: 2 additions & 6 deletions tfhe/benches/high_level_api/bench.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::ops::*;

use criterion::{black_box, Criterion};
use rand::prelude::*;
use std::fmt::Write;
use std::ops::*;
use tfhe::prelude::*;
use tfhe::shortint::parameters::*;
use tfhe::{
Expand Down Expand Up @@ -33,11 +34,6 @@ where
let rhs = FheType::encrypt(rng.gen(), client_key);

let mut name = String::with_capacity(255);

write!(name, "add({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| b.iter(|| black_box(&lhs + &rhs)));
name.clear();

write!(name, "overflowing_add({type_name}, {type_name})").unwrap();
bench_group.bench_function(&name, |b| {
b.iter(|| black_box((&lhs).overflowing_add(&rhs)))
Expand Down
301 changes: 301 additions & 0 deletions tfhe/benches/high_level_api/erc20.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
use criterion::measurement::WallTime;
use criterion::{BenchmarkGroup, Criterion, Throughput};
use rand::prelude::*;
use rand::thread_rng;
use rayon::prelude::*;
use std::ops::{Add, Mul, Sub};
use tfhe::prelude::*;
use tfhe::shortint::parameters::*;
use tfhe::{set_server_key, ClientKey, CompressedServerKey, ConfigBuilder, FheBool, FheUint64};

/// Transfer as written in the original FHEvm white-paper,
/// it uses a comparison to check if the sender has enough,
/// and cmuxes based on the comparison result
fn transfer_whitepaper<FheType>(
from_amount: &FheType,
to_amount: &FheType,
amount: &FheType,
) -> (FheType, FheType)
where
FheType: Add<Output = FheType> + for<'a> FheOrd<&'a FheType>,
FheBool: IfThenElse<FheType>,
for<'a> &'a FheType: Add<Output = FheType> + Sub<Output = FheType>,
{
let has_enough_funds = (from_amount).ge(amount);

let mut new_to_amount = to_amount + amount;
new_to_amount = has_enough_funds.if_then_else(&new_to_amount, to_amount);

let mut new_from_amount = from_amount - amount;
new_from_amount = has_enough_funds.if_then_else(&new_from_amount, from_amount);

(new_from_amount, new_to_amount)
}

/// This one also uses a comparison, but it leverages the 'boolean' multiplication
/// instead of cmuxes, so it is faster
fn transfer_no_cmux<FheType>(
from_amount: &FheType,
to_amount: &FheType,
amount: &FheType,
) -> (FheType, FheType)
where
FheType: Add<Output = FheType> + CastFrom<FheBool> + for<'a> FheOrd<&'a FheType>,
FheBool: IfThenElse<FheType>,
for<'a> &'a FheType:
Add<Output = FheType> + Sub<Output = FheType> + Mul<FheType, Output = FheType>,
{
let has_enough_funds = (from_amount).ge(amount);

let amount = amount * FheType::cast_from(has_enough_funds);

let new_to_amount = to_amount + &amount;
let new_from_amount = from_amount - &amount;

(new_from_amount, new_to_amount)
}

/// This one uses overflowing sub to remove the need for comparison
/// it also uses the 'boolean' multiplication
fn transfer_overflow<FheType>(
from_amount: &FheType,
to_amount: &FheType,
amount: &FheType,
) -> (FheType, FheType)
where
FheType: CastFrom<FheBool> + for<'a> FheOrd<&'a FheType>,
FheBool: IfThenElse<FheType>,
for<'a> &'a FheType: Add<FheType, Output = FheType>
+ OverflowingSub<&'a FheType, Output = FheType>
+ Mul<FheType, Output = FheType>,
{
let (new_from, did_not_have_enough) = (from_amount).overflowing_sub(amount);

let new_from_amount = did_not_have_enough.if_then_else(from_amount, &new_from);

let had_enough_funds = !did_not_have_enough;
let new_to_amount = to_amount + (amount * FheType::cast_from(had_enough_funds));

(new_from_amount, new_to_amount)
}

/// This ones uses both overflowing_add/sub to check that both
/// the sender has enough funds, and the receiver will not overflow its balance
fn transfer_safe<FheType>(
from_amount: &FheType,
to_amount: &FheType,
amount: &FheType,
) -> (FheType, FheType)
where
for<'a> &'a FheType: OverflowingSub<&'a FheType, Output = FheType>
+ OverflowingAdd<&'a FheType, Output = FheType>,
FheBool: IfThenElse<FheType>,
{
let (new_from, did_not_have_enough_funds) = (from_amount).overflowing_sub(amount);
let (new_to, did_not_have_enough_space) = (to_amount).overflowing_add(amount);

let something_not_ok = did_not_have_enough_funds | did_not_have_enough_space;

let new_from_amount = something_not_ok.if_then_else(from_amount, &new_from);
let new_to_amount = something_not_ok.if_then_else(to_amount, &new_to);

(new_from_amount, new_to_amount)
}

#[cfg(feature = "pbs-stats")]
fn print_transfer_pbs_counts<FheType, F>(
client_key: &ClientKey,
type_name: &str,
fn_name: &str,
transfer_func: F,
) where
FheType: FheEncrypt<u64, ClientKey>,
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType),
{
let mut rng = thread_rng();

let from_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
let to_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
let amount = FheType::encrypt(rng.gen::<u64>(), client_key);

tfhe::reset_pbs_count();
let (_, _) = transfer_func(&from_amount, &to_amount, &amount);
let count = tfhe::get_pbs_count();

println!("ERC20 transfer/{fn_name}::{type_name}: {count} PBS");
}

fn bench_transfer_latency<FheType, F>(
c: &mut BenchmarkGroup<'_, WallTime>,
client_key: &ClientKey,
type_name: &str,
fn_name: &str,
transfer_func: F,
) where
FheType: FheEncrypt<u64, ClientKey>,
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType),
{
let id_name = format!("{fn_name}::{type_name}");
c.bench_function(&id_name, |b| {
let mut rng = thread_rng();

let from_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
let to_amount = FheType::encrypt(rng.gen::<u64>(), client_key);
let amount = FheType::encrypt(rng.gen::<u64>(), client_key);

b.iter(|| {
let (_, _) = transfer_func(&from_amount, &to_amount, &amount);
})
});
}

fn bench_transfer_throughput<FheType, F>(
group: &mut BenchmarkGroup<'_, WallTime>,
client_key: &ClientKey,
type_name: &str,
fn_name: &str,
transfer_func: F,
) where
FheType: FheEncrypt<u64, ClientKey> + Send + Sync,
F: for<'a> Fn(&'a FheType, &'a FheType, &'a FheType) -> (FheType, FheType) + Sync,
{
let mut rng = thread_rng();

for num_elems in [10, 100, 500] {
group.throughput(Throughput::Elements(num_elems));
let id_name = format!("{fn_name}::{type_name}::{num_elems}");
group.bench_with_input(id_name, &num_elems, |b, &num_elems| {
let from_amounts = (0..num_elems)
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
.collect::<Vec<_>>();
let to_amounts = (0..num_elems)
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
.collect::<Vec<_>>();
let amounts = (0..num_elems)
.map(|_| FheType::encrypt(rng.gen::<u64>(), client_key))
.collect::<Vec<_>>();

b.iter(|| {
from_amounts
.par_iter()
.zip(to_amounts.par_iter().zip(amounts.par_iter()))
.for_each(|(from_amount, (to_amount, amount))| {
let (_, _) = transfer_func(from_amount, to_amount, amount);
})
})
});
}
}

fn main() {
#[cfg(not(feature = "gpu"))]
let params = PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64;
#[cfg(feature = "gpu")]
let params = PARAM_GPU_MULTI_BIT_MESSAGE_2_CARRY_2_GROUP_3_KS_PBS;

let config = ConfigBuilder::with_custom_parameters(params).build();
let cks = ClientKey::generate(config);
let compressed_sks = CompressedServerKey::new(&cks);

#[cfg(not(feature = "gpu"))]
let sks = compressed_sks.decompress();
#[cfg(feature = "gpu")]
let sks = compressed_sks.decompress_to_gpu();

rayon::broadcast(|_| set_server_key(sks.clone()));
set_server_key(sks);

let mut c = Criterion::default().sample_size(10).configure_from_args();

// FheUint64 PBS counts
// We don't run multiple times since every input is encrypted
// PBS count is always the same
#[cfg(feature = "pbs-stats")]
{
print_transfer_pbs_counts(
&cks,
"FheUint64",
"whitepaper",
transfer_whitepaper::<FheUint64>,
);
print_transfer_pbs_counts(&cks, "FheUint64", "no_cmux", transfer_no_cmux::<FheUint64>);
print_transfer_pbs_counts(
&cks,
"FheUint64",
"overflow",
transfer_overflow::<FheUint64>,
);
print_transfer_pbs_counts(&cks, "FheUint64", "safe", transfer_safe::<FheUint64>);
}

// FheUint64 latency
{
let mut group = c.benchmark_group("ERC20 latency");
bench_transfer_latency(
&mut group,
&cks,
"FheUint64",
"whitepaper",
transfer_whitepaper::<FheUint64>,
);
bench_transfer_latency(
&mut group,
&cks,
"FheUint64",
"no_cmux",
transfer_no_cmux::<FheUint64>,
);
bench_transfer_latency(
&mut group,
&cks,
"FheUint64",
"overflow",
transfer_overflow::<FheUint64>,
);
bench_transfer_latency(
&mut group,
&cks,
"FheUint64",
"safe",
transfer_safe::<FheUint64>,
);

group.finish();
}

// FheUint64 Throughput
{
let mut group = c.benchmark_group("ERC20 throughput");
bench_transfer_throughput(
&mut group,
&cks,
"FheUint64",
"whitepaper",
transfer_whitepaper::<FheUint64>,
);
bench_transfer_throughput(
&mut group,
&cks,
"FheUint64",
"no_cmux",
transfer_no_cmux::<FheUint64>,
);
bench_transfer_throughput(
&mut group,
&cks,
"FheUint64",
"overflow",
transfer_overflow::<FheUint64>,
);
bench_transfer_throughput(
&mut group,
&cks,
"FheUint64",
"safe",
transfer_safe::<FheUint64>,
);
group.finish();
}

c.final_summary();
}
1 change: 0 additions & 1 deletion tfhe/src/integer/gpu/server_key/radix/mul.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ impl CudaServerKey {
stream: &CudaStreams,
) {
let num_blocks = ct_left.as_ref().d_blocks.lwe_ciphertext_count().0 as u32;

match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => {
unchecked_mul_integer_radix_kb_assign_async(
Expand Down

0 comments on commit bd28d22

Please sign in to comment.