Skip to content

Commit

Permalink
lazy utf-8 validation
Browse files Browse the repository at this point in the history
Makes text::Reader a wrapper around &[u8] and defers
utf-8 validation until `to_str()` or `to_string()` is called.

This means that users will only need to pay the utf8 validation
cost if they actually need it. This also allows users to access
(broken) non-utf8 Text data. Previously such data was inaccessible.
  • Loading branch information
dwrensha committed Sep 2, 2023
1 parent 5a54a64 commit 2dc3cdd
Show file tree
Hide file tree
Showing 22 changed files with 365 additions and 206 deletions.
4 changes: 2 additions & 2 deletions benchmark/carsales.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ const MAKES: [&str; 5] = ["Toyota", "GM", "Ford", "Honda", "Tesla"];
const MODELS: [&str; 6] = ["Camry", "Prius", "Volt", "Accord", "Leaf", "Model S"];

pub fn random_car(rng: &mut FastRand, mut car: car::Builder) {
car.set_make(MAKES[rng.next_less_than(MAKES.len() as u32) as usize]);
car.set_model(MODELS[rng.next_less_than(MODELS.len() as u32) as usize]);
car.set_make(MAKES[rng.next_less_than(MAKES.len() as u32) as usize].into());
car.set_model(MODELS[rng.next_less_than(MODELS.len() as u32) as usize].into());

car.set_color(
(rng.next_less_than(Color::Silver as u32 + 1) as u16)
Expand Down
6 changes: 3 additions & 3 deletions benchmark/catrank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl crate::TestCase for CatRank {
snippet.push_str(WORDS[rng.next_less_than(WORDS.len() as u32) as usize]);
}

result.set_snippet(&snippet);
result.set_snippet(snippet[..].into());
}

good_count
Expand All @@ -102,10 +102,10 @@ impl crate::TestCase for CatRank {
let result = results.get(i);
let mut score = result.get_score();
let snippet = result.get_snippet()?;
if snippet.contains(" cat ") {
if snippet.to_str()?.contains(" cat ") {
score *= 10000.0;
}
if snippet.contains(" dog ") {
if snippet.to_str()?.contains(" dog ") {
score /= 10000.0;
}
scored_results.push(ScoredResult { score, result });
Expand Down
16 changes: 8 additions & 8 deletions capnp-futures/test/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,32 +35,32 @@ mod tests {
{
let mut alice = people.reborrow().get(0);
alice.set_id(123);
alice.set_name("Alice");
alice.set_email("[email protected]");
alice.set_name("Alice".into());
alice.set_email("[email protected]".into());
{
let mut alice_phones = alice.reborrow().init_phones(1);
alice_phones.reborrow().get(0).set_number("555-1212");
alice_phones.reborrow().get(0).set_number("555-1212".into());
alice_phones
.reborrow()
.get(0)
.set_type(person::phone_number::Type::Mobile);
}
alice.get_employment().set_school("MIT");
alice.get_employment().set_school("MIT".into());
}

{
let mut bob = people.get(1);
bob.set_id(456);
bob.set_name("Bob");
bob.set_email("[email protected]");
bob.set_name("Bob".into());
bob.set_email("[email protected]".into());
{
let mut bob_phones = bob.reborrow().init_phones(2);
bob_phones.reborrow().get(0).set_number("555-4567");
bob_phones.reborrow().get(0).set_number("555-4567".into());
bob_phones
.reborrow()
.get(0)
.set_type(person::phone_number::Type::Home);
bob_phones.reborrow().get(1).set_number("555-7654");
bob_phones.reborrow().get(1).set_number("555-7654".into());
bob_phones
.reborrow()
.get(1)
Expand Down
7 changes: 5 additions & 2 deletions capnp-rpc/examples/hello-world/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,14 @@ pub async fn main() -> Result<(), Box<dyn std::error::Error>> {
tokio::task::spawn_local(rpc_system);

let mut request = hello_world.say_hello_request();
request.get().init_request().set_name(&msg);
request.get().init_request().set_name(msg[..].into());

let reply = request.send().promise.await?;

println!("received: {}", reply.get()?.get_reply()?.get_message()?);
println!(
"received: {}",
reply.get()?.get_reply()?.get_message()?.to_str()?
);
Ok(())
})
.await
Expand Down
4 changes: 2 additions & 2 deletions capnp-rpc/examples/hello-world/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ impl hello_world::Server for HelloWorldImpl {
mut results: hello_world::SayHelloResults,
) -> Promise<(), ::capnp::Error> {
let request = pry!(pry!(params.get()).get_request());
let name = pry!(request.get_name());
let name = pry!(pry!(request.get_name()).to_str());
let message = format!("Hello, {name}!");

results.get().init_reply().set_message(&message);
results.get().init_reply().set_message(message[..].into());

Promise::ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion capnp-rpc/examples/pubsub/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl subscriber::Server<::capnp::text::Owned> for SubscriberImpl {
) -> Promise<(), ::capnp::Error> {
println!(
"message from publisher: {}",
pry!(pry!(params.get()).get_message())
pry!(pry!(pry!(params.get()).get_message()).to_str())
);
Promise::ok(())
}
Expand Down
5 changes: 4 additions & 1 deletion capnp-rpc/examples/pubsub/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,10 @@ pub async fn main() -> Result<(), Box<dyn std::error::Error>> {
subscriber.requests_in_flight += 1;
let mut request = subscriber.client.push_message_request();
request.get().set_message(
&format!("system time is: {:?}", ::std::time::SystemTime::now())[..])?;
(&format!("system time is: {:?}", ::std::time::SystemTime::now())
[..])
.into(),
)?;
let subscribers2 = subscribers1.clone();
tokio::task::spawn_local(request.send().promise.map(
move |r| match r {
Expand Down
10 changes: 7 additions & 3 deletions capnp-rpc/src/rpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ fn to_pipeline_ops(
}

fn from_error(error: &Error, mut builder: exception::Builder) {
builder.set_reason(&error.to_string());
builder.set_reason(error.to_string()[..].into());
let typ = match error.kind {
::capnp::ErrorKind::Failed => exception::Type::Failed,
::capnp::ErrorKind::Overloaded => exception::Type::Overloaded,
Expand All @@ -379,10 +379,14 @@ fn remote_exception_to_error(exception: exception::Reader) -> Error {
(Ok(exception::Type::Unimplemented), Ok(reason)) => {
(::capnp::ErrorKind::Unimplemented, reason)
}
_ => (::capnp::ErrorKind::Failed, "(malformed error)"),
_ => (::capnp::ErrorKind::Failed, "(malformed error)".into()),
};
let reason_str = match reason.to_str() {
Ok(s) => s,
Err(_) => "<malformed utf-8 in error reason>",
};
Error {
extra: format!("remote exception: {reason}"),
extra: format!("remote exception: {reason_str}"),
kind,
}
}
Expand Down
12 changes: 6 additions & 6 deletions capnp-rpc/test/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ impl test_interface::Server for TestInterface {
}
{
let mut results = results.get();
results.set_x("foo");
results.set_x("foo".into());
}
Promise::ok(())
}
Expand Down Expand Up @@ -190,7 +190,7 @@ impl test_interface::Server for TestExtends {
}
{
let mut results = results.get();
results.set_x("bar");
results.set_x("bar".into());
}
Promise::ok(())
}
Expand Down Expand Up @@ -259,7 +259,7 @@ impl test_pipeline::Server for TestPipeline {
return Err(Error::failed("expected x to equal 'foo'".to_string()));
}

results.get().set_s("bar");
results.get().set_s("bar".into());

// TODO implement better casting
results
Expand Down Expand Up @@ -352,7 +352,7 @@ impl test_more_stuff::Server for TestMoreStuff {
if response?.get()?.get_x()? != "foo" {
return Err(Error::failed("expected x to equal 'foo'".to_string()));
}
results.get().set_s("bar");
results.get().set_s("bar".into());
Ok(())
}))
}
Expand All @@ -372,7 +372,7 @@ impl test_more_stuff::Server for TestMoreStuff {
if response?.get()?.get_x()? != "foo" {
return Err(Error::failed("expected x to equal 'foo'".to_string()));
}
results.get().set_s("bar");
results.get().set_s("bar".into());
Ok(())
})
}))
Expand Down Expand Up @@ -438,7 +438,7 @@ impl test_more_stuff::Server for TestMoreStuff {
if response?.get()?.get_x()? != "foo" {
Err(Error::failed("expected X to equal 'foo'".to_string()))
} else {
results.get().set_s("bar");
results.get().set_s("bar".into());
Ok(())
}
}))
Expand Down
4 changes: 2 additions & 2 deletions capnp-rpc/test/reconnect_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl test_interface::Server for TestInterfaceImpl {
);
{
let mut results = results.get();
results.set_x(&s);
results.set_x(s[..].into());
}
if let Some(fut) = self.inner.borrow().block.as_ref() {
Promise::from_future(fut.clone())
Expand All @@ -91,7 +91,7 @@ where
F: Future<Output = capnp::Result<Response<test_interface::foo_results::Owned>>>,
{
match pool.run_until(fut) {
Ok(resp) => Ok(resp.get()?.get_x()?.to_string()),
Ok(resp) => Ok(resp.get()?.get_x()?.to_string()?),
Err(err) => Err(err),
}
}
Expand Down
28 changes: 14 additions & 14 deletions capnp-rpc/test/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub fn init_test_message(mut builder: test_all_types::Builder) {
builder.set_u_int64_field(12345678901234567890);
builder.set_float32_field(1234.5);
builder.set_float64_field(-123e45);
builder.set_text_field("foo");
builder.set_text_field("foo".into());
builder.set_data_field(b"bar");
{
let mut sub_builder = builder.reborrow().init_struct_field();
Expand All @@ -50,14 +50,14 @@ pub fn init_test_message(mut builder: test_all_types::Builder) {
sub_builder.set_u_int64_field(345678901234567890);
sub_builder.set_float32_field(-1.25e-10);
sub_builder.set_float64_field(345f64);
sub_builder.set_text_field("baz");
sub_builder.set_text_field("baz".into());
sub_builder.set_data_field(b"qux");
{
let mut sub_sub_builder = sub_builder.reborrow().init_struct_field();
sub_sub_builder.set_text_field("nested");
sub_sub_builder.set_text_field("nested".into());
sub_sub_builder
.init_struct_field()
.set_text_field("really nested");
.set_text_field("really nested".into());
}
sub_builder.set_enum_field(TestEnum::Baz);

Expand Down Expand Up @@ -105,15 +105,15 @@ pub fn init_test_message(mut builder: test_all_types::Builder) {
struct_list
.reborrow()
.get(0)
.set_text_field("x structlist 1");
.set_text_field("x structlist 1".into());
struct_list
.reborrow()
.get(1)
.set_text_field("x structlist 2");
.set_text_field("x structlist 2".into());
struct_list
.reborrow()
.get(2)
.set_text_field("x structlist 3");
.set_text_field("x structlist 3".into());
}

let mut enum_list = sub_builder.reborrow().init_enum_list(3);
Expand Down Expand Up @@ -149,7 +149,7 @@ check_test_message_impl(($typ:ident) => (
assert_eq!(12345678901234567890, reader.reborrow().get_u_int64_field());
assert_eq!(1234.5, reader.reborrow().get_float32_field());
assert_eq!(-123e45, reader.reborrow().get_float64_field());
assert_eq!("foo", &*reader.reborrow().get_text_field().unwrap());
assert_eq!("foo", reader.reborrow().get_text_field().unwrap());
assert_eq!(b"bar", &*reader.reborrow().get_data_field().unwrap());
{
let mut sub_reader = reader.get_struct_field().unwrap();
Expand All @@ -165,12 +165,12 @@ check_test_message_impl(($typ:ident) => (
assert_eq!(345678901234567890, sub_reader.reborrow().get_u_int64_field());
assert_eq!(-1.25e-10, sub_reader.reborrow().get_float32_field());
assert_eq!(345f64, sub_reader.reborrow().get_float64_field());
assert_eq!("baz", &*sub_reader.reborrow().get_text_field().unwrap());
assert_eq!("baz", sub_reader.reborrow().get_text_field().unwrap());
assert_eq!(b"qux", &*sub_reader.reborrow().get_data_field().unwrap());
{
let mut sub_sub_reader = sub_reader.reborrow().get_struct_field().unwrap();
assert_eq!("nested", &*sub_sub_reader.reborrow().get_text_field().unwrap());
assert_eq!("really nested", &*sub_sub_reader.get_struct_field().unwrap()
assert_eq!("nested", sub_sub_reader.reborrow().get_text_field().unwrap());
assert_eq!("really nested", sub_sub_reader.get_struct_field().unwrap()
.get_text_field().unwrap());
}
assert!(Ok(TestEnum::Baz) == sub_reader.reborrow().get_enum_field());
Expand Down Expand Up @@ -218,9 +218,9 @@ check_test_message_impl(($typ:ident) => (
{
let mut struct_list = sub_reader.reborrow().get_struct_list().unwrap();
assert_eq!(3, struct_list.len());
assert_eq!("x structlist 1", &*struct_list.reborrow().get(0).get_text_field().unwrap());
assert_eq!("x structlist 2", &*struct_list.reborrow().get(1).get_text_field().unwrap());
assert_eq!("x structlist 3", &*struct_list.reborrow().get(2).get_text_field().unwrap());
assert_eq!("x structlist 1", struct_list.reborrow().get(0).get_text_field().unwrap());
assert_eq!("x structlist 2", struct_list.reborrow().get(1).get_text_field().unwrap());
assert_eq!("x structlist 3", struct_list.reborrow().get(2).get_text_field().unwrap());
}

{
Expand Down
28 changes: 17 additions & 11 deletions capnp/src/private/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1730,8 +1730,7 @@ mod wire_helpers {

SegmentAnd {
segment_id,
value: text::Builder::new(slice::from_raw_parts_mut(ptr, size as usize), 0)
.expect("empty text builder should be valid utf-8"),
value: text::Builder::new(slice::from_raw_parts_mut(ptr, size as usize)),
}
}

Expand All @@ -1740,12 +1739,16 @@ mod wire_helpers {
arena: &'a mut dyn BuilderArena,
reff: *mut WirePointer,
segment_id: u32,
value: &str,
value: crate::text::Reader<'_>,
) -> SegmentAnd<text::Builder<'a>> {
let value_bytes = value.as_bytes();
// TODO make sure the string is not longer than 2 ** 29.
let mut allocation = init_text_pointer(arena, reff, segment_id, value_bytes.len() as u32);
allocation.value.push_str(value);
allocation
.value
.reborrow()
.as_bytes_mut()
.copy_from_slice(value_bytes);
allocation
}

Expand All @@ -1758,7 +1761,7 @@ mod wire_helpers {
) -> Result<text::Builder<'a>> {
let ref_target = if (*reff).is_null() {
match default {
None => return text::Builder::new(&mut [], 0),
None => return Ok(text::Builder::new(&mut [])),
Some(d) => {
let (new_ref_target, new_reff, new_segment_id) = copy_message(
arena,
Expand Down Expand Up @@ -1793,10 +1796,10 @@ mod wire_helpers {
}

// Subtract 1 from the size for the NUL terminator.
text::Builder::new(
Ok(text::Builder::with_pos(
slice::from_raw_parts_mut(ptr, (count - 1) as usize),
count - 1,
)
(count - 1) as usize,
))
}

#[inline]
Expand Down Expand Up @@ -2615,7 +2618,7 @@ mod wire_helpers {
) -> Result<text::Reader<'a>> {
if (*reff).is_null() {
match default {
None => return Ok(""),
None => return Ok("".into()),
Some(d) => {
reff = d.as_ptr() as *const WirePointer;
arena = &super::NULL_ARENA;
Expand Down Expand Up @@ -2661,7 +2664,10 @@ mod wire_helpers {
));
}

text::new_reader(slice::from_raw_parts(str_ptr, size as usize - 1))
Ok(text::Reader(slice::from_raw_parts(
str_ptr,
size as usize - 1,
)))
}

#[inline]
Expand Down Expand Up @@ -3282,7 +3288,7 @@ impl<'a> PointerBuilder<'a> {
}
}

pub fn set_text(&mut self, value: &str) {
pub fn set_text(&mut self, value: crate::text::Reader<'_>) {
unsafe {
wire_helpers::set_text_pointer(self.arena, self.pointer, self.segment_id, value);
}
Expand Down
2 changes: 1 addition & 1 deletion capnp/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ impl StructSchema {
/// Looks up a field by name. Returns `None` if no matching field is found.
pub fn find_field_by_name(&self, name: &str) -> Result<Option<Field>> {
for field in self.get_fields()? {
if field.get_proto().get_name()? == name {
if field.get_proto().get_name()?.as_bytes() == name.as_bytes() {
return Ok(Some(field));
}
}
Expand Down
Loading

0 comments on commit 2dc3cdd

Please sign in to comment.