Skip to content

Commit

Permalink
add an implementation of serialized_length_from_bytes() where the inp…
Browse files Browse the repository at this point in the history
…ut is not trusted. i.e. back-references are validated. Rename the existing function to indicate that the input is trusted (i.e. not fully validated)
  • Loading branch information
arvidn committed Dec 19, 2023
1 parent 0cfbd41 commit a868f47
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 45 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,7 @@ criterion = "0.5.1"
[[bench]]
name = "run-program"
harness = false

[[bench]]
name = "deserialize"
harness = false
Binary file added benches/block_af9c3d98.bin
Binary file not shown.
58 changes: 58 additions & 0 deletions benches/deserialize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use clvmr::allocator::Allocator;
use clvmr::serde::node_from_bytes;
use clvmr::serde::node_from_bytes_backrefs;
use clvmr::serde::serialized_length_from_bytes;
use clvmr::serde::serialized_length_from_bytes_trusted;
use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
use std::include_bytes;
use std::time::Instant;

fn deserialize_benchmark(c: &mut Criterion) {
let block = include_bytes!("block_af9c3d98.bin");

let mut group = c.benchmark_group("deserialize");
group.sample_size(10);
group.sampling_mode(SamplingMode::Flat);

group.bench_function("serialized_length_from_bytes", |b| {
b.iter(|| {
let start = Instant::now();
let _ = serialized_length_from_bytes(block);
start.elapsed()
})
});

group.bench_function("serialized_length_from_bytes_trusted", |b| {
b.iter(|| {
let start = Instant::now();
let _ = serialized_length_from_bytes_trusted(block);
start.elapsed()
})
});

let mut a = Allocator::new();
let iter_checkpoint = a.checkpoint();

group.bench_function("node_from_bytes_backrefs", |b| {
b.iter(|| {
a.restore_checkpoint(&iter_checkpoint);
let start = Instant::now();
let _ = node_from_bytes_backrefs(&mut a, block);
start.elapsed()
})
});

group.bench_function("node_from_bytes", |b| {
b.iter(|| {
a.restore_checkpoint(&iter_checkpoint);
let start = Instant::now();
let _ = node_from_bytes(&mut a, block);
start.elapsed()
})
});

group.finish();
}

criterion_group!(deserialize, deserialize_benchmark);
criterion_main!(deserialize);
1 change: 0 additions & 1 deletion benches/run-program.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,6 @@ fn run_program_benchmark(c: &mut Criterion) {
] {
a.restore_checkpoint(&test_case_checkpoint);

println!("benchmark/{test}.hex");
let prg = read_to_string(format!("benchmark/{test}.hex"))
.expect("failed to load benchmark program");
let prg = hex::decode(prg.trim()).expect("invalid hex in benchmark program");
Expand Down
6 changes: 6 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ path = "fuzz_targets/serialized_length.rs"
test = false
doc = false

[[bin]]
name = "fuzz_serialized_length_trusted"
path = "fuzz_targets/serialized_length_trusted.rs"
test = false
doc = false

[[bin]]
name = "fuzz_deserialize"
path = "fuzz_targets/deserialize.rs"
Expand Down
29 changes: 24 additions & 5 deletions fuzz/fuzz_targets/serialized_length.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,31 @@
#![no_main]
use clvmr::serde::node_from_bytes_backrefs;
use clvmr::serde::node_to_bytes;
use clvmr::serde::serialized_length_from_bytes;
use clvmr::Allocator;
use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _len = match serialized_length_from_bytes(data) {
Err(_) => {
return;
let len = serialized_length_from_bytes(data);

let mut allocator = Allocator::new();
let program = node_from_bytes_backrefs(&mut allocator, data);

match (len, program) {
(Ok(_), Ok(_)) => {
// this is expected
}
(Err(_), Err(_)) => {
// this is expected
}
(Ok(len), Err(e)) => {
panic!("discrepancy between serialized_length and node_from_bytes_backrefs().\n {len}\n{e}");
}
(Err(e), Ok(program)) => {
panic!(
"discrepancy between serialized_length and node_from_bytes_backrefs().\n {e}\n{:?}",
node_to_bytes(&allocator, program)
);
}
Ok(r) => r,
};
}
});
12 changes: 12 additions & 0 deletions fuzz/fuzz_targets/serialized_length_trusted.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#![no_main]
use clvmr::serde::serialized_length_from_bytes_trusted;
use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _len = match serialized_length_from_bytes_trusted(data) {
Err(_) => {
return;
}
Ok(r) => r,
};
});
4 changes: 3 additions & 1 deletion src/serde/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ pub use de_br::node_from_bytes_backrefs;
pub use de_tree::{parse_triples, ParsedTriple};
pub use ser::node_to_bytes;
pub use ser_br::node_to_bytes_backrefs;
pub use tools::{serialized_length_from_bytes, tree_hash_from_stream};
pub use tools::{
serialized_length_from_bytes, serialized_length_from_bytes_trusted, tree_hash_from_stream,
};
196 changes: 158 additions & 38 deletions src/serde/tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const MAX_SINGLE_BYTE: u8 = 0x7f;
const BACK_REFERENCE: u8 = 0xfe;
const CONS_BOX_MARKER: u8 = 0xff;

pub fn serialized_length_from_bytes(b: &[u8]) -> io::Result<u64> {
pub fn serialized_length_from_bytes_trusted(b: &[u8]) -> io::Result<u64> {
let mut f = Cursor::new(b);
let mut ops_counter = 1;
let mut b = [0; 1];
Expand Down Expand Up @@ -107,6 +107,70 @@ pub fn tree_hash_from_stream(f: &mut Cursor<&[u8]>) -> io::Result<[u8; 32]> {
Ok(values.pop().unwrap())
}

/// validate that a buffer is a valid CLVM serialization, and return the length
/// of the CLVM object. This may fail if the serialization contains an invalid
/// back-reference or if the buffer is truncated.
pub fn serialized_length_from_bytes(b: &[u8]) -> io::Result<u64> {
use crate::serde::parse_atom::parse_path;
use crate::traverse_path::traverse_path;
use crate::{allocator::SExp, Allocator};

let mut f = Cursor::new(b);
let mut b = [0; 1];

// the allocator is just used to track the tree structure, in order to
// validate back-references
let mut allocator = Allocator::new();
let null = allocator.null();
let mut values = null;
let mut ops = vec![ParseOp::SExp];

while let Some(op) = ops.pop() {
match op {
ParseOp::SExp => {
f.read_exact(&mut b)?;
if b[0] == CONS_BOX_MARKER {
ops.push(ParseOp::Cons);
ops.push(ParseOp::SExp);
ops.push(ParseOp::SExp);
} else if b[0] == BACK_REFERENCE {
let path = parse_path(&mut f)?;
let back_reference = traverse_path(&allocator, path, values)?.1;
values = allocator.new_pair(back_reference, values)?;
} else if b[0] == 0x80 || b[0] <= MAX_SINGLE_BYTE {
// This one byte we just read was the whole atom.
// or the special case of NIL
values = allocator.new_pair(null, values)?;
} else {
let blob_size = decode_size(&mut f, b[0])?;
f.seek(SeekFrom::Current(blob_size as i64))?;
if (f.get_ref().len() as u64) < f.position() {
return Err(bad_encoding());
}
values = allocator.new_pair(null, values)?;
}
}
ParseOp::Cons => {
// cons
let SExp::Pair(v1, v2) = allocator.sexp(values) else {
return Err(bad_encoding());
};

let SExp::Pair(v3, v4) = allocator.sexp(v2) else {
return Err(bad_encoding());
};

let new_root = allocator.new_pair(v3, v1)?;
values = allocator.new_pair(new_root, v4)?;
}
}
}
match allocator.sexp(values) {
SExp::Pair(_, _) => Ok(f.position()),
_ => Err(bad_encoding()),
}
}

#[test]
fn test_tree_hash_max_single_byte() {
let mut ctx = Sha256::new();
Expand Down Expand Up @@ -212,50 +276,106 @@ fn test_tree_hash_tree_large_atom() {
);
}

#[test]
fn test_serialized_length_from_bytes() {
assert_eq!(
serialized_length_from_bytes(&[0x7f, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes(&[0x80, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes(&[0xff, 0x00, 0x00, 0x00]).unwrap(),
3
);
assert_eq!(
serialized_length_from_bytes(&[0xff, 0x01, 0xff, 0x80, 0x80, 0x00]).unwrap(),
5
);

let e = serialized_length_from_bytes(&[0x8f, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes(&[0b11001111, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes(&[0b11001111, 0xff, 0, 0]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

assert_eq!(
serialized_length_from_bytes(&[0x8f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]).unwrap(),
16
);
}

#[cfg(test)]
mod test {
use super::*;
use crate::serde::node_from_bytes_backrefs;
use crate::Allocator;
use rstest::rstest;

#[test]
fn test_serialized_length_from_bytes_trusted() {
assert_eq!(
serialized_length_from_bytes_trusted(&[0x7f, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes_trusted(&[0x80, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes_trusted(&[0xff, 0x00, 0x00, 0x00]).unwrap(),
3
);
assert_eq!(
serialized_length_from_bytes_trusted(&[0xff, 0x01, 0xff, 0x80, 0x80, 0x00]).unwrap(),
5
);

// this is an invalid back-ref
// but it's not validated
assert_eq!(
serialized_length_from_bytes_trusted(&[0xff, 0x01, 0xff, 0xfe, 0x10, 0x80, 0x00])
.unwrap(),
6
);

let e = serialized_length_from_bytes_trusted(&[0x8f, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes_trusted(&[0b11001111, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes_trusted(&[0b11001111, 0xff, 0, 0]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

assert_eq!(
serialized_length_from_bytes_trusted(&[
0x8f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
])
.unwrap(),
16
);
}

#[test]
fn test_serialized_length_from_bytes() {
use std::io::ErrorKind;
assert_eq!(
serialized_length_from_bytes(&[0x7f, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes(&[0x80, 0x00, 0x00, 0x00]).unwrap(),
1
);
assert_eq!(
serialized_length_from_bytes(&[0xff, 0x00, 0x00, 0x00]).unwrap(),
3
);
assert_eq!(
serialized_length_from_bytes(&[0xff, 0x01, 0xff, 0x80, 0x80, 0x00]).unwrap(),
5
);

// this is an invalid back-ref
let e =
serialized_length_from_bytes(&[0xff, 0x01, 0xff, 0xfe, 0x10, 0x80, 0x00]).unwrap_err();
assert_eq!(e.kind(), ErrorKind::Other);
assert_eq!(e.to_string(), "path into atom");

let e = serialized_length_from_bytes(&[0x8f, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes(&[0b11001111, 0xff]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

let e = serialized_length_from_bytes(&[0b11001111, 0xff, 0, 0]).unwrap_err();
assert_eq!(e.kind(), bad_encoding().kind());
assert_eq!(e.to_string(), "bad encoding");

assert_eq!(
serialized_length_from_bytes(&[0x8f, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
.unwrap(),
16
);
}

#[rstest]
// ("foobar" "foobar")
#[case("ff86666f6f626172ff86666f6f62617280")]
Expand Down

0 comments on commit a868f47

Please sign in to comment.