From 59027fd9d1c78c4256a3089f078b4595d8d42f03 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 24 Jan 2019 18:24:39 -0800 Subject: [PATCH 1/4] Switch wasm emission to a custom encoder This commit moves emission of the wasm module away from the `parity-wasm` crate to instead using custom code within this crate. Similar to parsing with `wasmparser`, this is motivated twofold: * First, we want the ability to record binary offsets of where functions and instructions are located. This allows us encode dwarf debug information eventually. * Second, this avoids a "lowering to a different IR" problem where we will be able to implement more efficient emission than if we go to parity-wasm first. Ideally this would all be separated to an external crate and/or maybe even sharing `wasmparser` types or something like that, but for now it should be relatively easy enough to inline it and with the spec tests we can have a pretty high degree of confidence it's not full of bugs at least. Some other changes included here are: * Functions are now serialized in parallel * The handling of mapping a local id to an index is now done in a per-function fashion rather than through `IdsToIndices`. This way the maps can be built in parallel and then aggregated at the end into the one global map serially. --- Cargo.toml | 4 +- examples/round-trip.rs | 9 + src/const_value.rs | 27 +- src/emit.rs | 105 +++- src/encode.rs | 102 ++++ src/ir/mod.rs | 30 ++ src/lib.rs | 1 + src/module/data.rs | 93 +++- src/module/elements.rs | 138 +++-- src/module/exports.rs | 73 ++- src/module/functions/local_function/emit.rs | 562 +++++++++----------- src/module/functions/local_function/mod.rs | 50 +- src/module/functions/mod.rs | 132 +++-- src/module/globals.rs | 55 +- src/module/imports.rs | 57 +- src/module/memories.rs | 87 +-- src/module/mod.rs | 171 +++--- src/module/producers.rs | 47 +- src/module/tables.rs | 57 +- src/module/types.rs | 36 +- src/passes/used.rs | 1 + src/passes/validate.rs | 4 + src/ty.rs | 51 +- 23 files changed, 1071 insertions(+), 821 deletions(-) create mode 100644 examples/round-trip.rs create mode 100644 src/encode.rs diff --git a/Cargo.toml b/Cargo.toml index 478c53b3..bebc5635 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,11 +7,9 @@ version = "0.1.0" [dependencies] failure = "0.1.2" id-arena = { version = "2.1.0", features = ['rayon'] } -parity-wasm = "0.35.6" -petgraph = "0.4.13" log = "0.4" -wasmparser = "0.26" rayon = "1.0.3" +wasmparser = "0.26" [dependencies.walrus-derive] path = "./walrus-derive" diff --git a/examples/round-trip.rs b/examples/round-trip.rs new file mode 100644 index 00000000..8ad75f83 --- /dev/null +++ b/examples/round-trip.rs @@ -0,0 +1,9 @@ +// A small example which is primarily used to help benchmark walrus right now. + +fn main() { + env_logger::init(); + let a = std::env::args().nth(1).unwrap(); + let m = walrus::module::Module::from_file(&a).unwrap(); + m.emit_wasm().unwrap(); +} + diff --git a/src/const_value.rs b/src/const_value.rs index 34626081..5567c89e 100644 --- a/src/const_value.rs +++ b/src/const_value.rs @@ -1,12 +1,11 @@ //! Handling wasm constant values -use crate::emit::IdsToIndices; +use crate::emit::{Emit, EmitContext}; use crate::error::Result; use crate::ir::Value; use crate::module::globals::GlobalId; use crate::parse::IndicesToIds; use failure::bail; -use parity_wasm::elements::{self, Instruction}; /// A constant which is produced in WebAssembly, typically used in global /// initializers or element/data offsets. @@ -37,18 +36,18 @@ impl Const { reader.ensure_end()?; Ok(val) } +} - pub(crate) fn emit_instructions(&self, indices: &IdsToIndices) -> elements::InitExpr { - let mut instrs = Vec::with_capacity(2); - instrs.push(match *self { - Const::Value(Value::I32(n)) => Instruction::I32Const(n), - Const::Value(Value::I64(n)) => Instruction::I64Const(n), - Const::Value(Value::F32(n)) => Instruction::F32Const(n.to_bits()), - Const::Value(Value::F64(n)) => Instruction::F64Const(n.to_bits()), - Const::Value(Value::V128(_n)) => unimplemented!(), - Const::Global(id) => Instruction::GetGlobal(indices.get_global_index(id)), - }); - instrs.push(Instruction::End); - elements::InitExpr::new(instrs) +impl Emit for Const { + fn emit(&self, cx: &mut EmitContext) { + match *self { + Const::Value(val) => val.emit(&mut cx.encoder), + Const::Global(id) => { + let idx = cx.indices.get_global_index(id); + cx.encoder.byte(0x23); // global.get + cx.encoder.u32(idx); + } + } + cx.encoder.byte(0x0b); // end } } diff --git a/src/emit.rs b/src/emit.rs index da9450f3..daf20ab6 100644 --- a/src/emit.rs +++ b/src/emit.rs @@ -1,8 +1,8 @@ //! Traits and code for emitting high-level structures as low-level, raw wasm //! structures. E.g. translating from globally unique identifiers down to the -//! raw wasm structure's index spaces. Currently "raw wasm structures" are -//! `parity_wasm::elements` types. +//! raw wasm structure's index spaces. +use crate::encode::{Encoder, MAX_U32_LENGTH}; use crate::ir::LocalId; use crate::module::data::DataId; use crate::module::elements::ElementId; @@ -13,14 +13,19 @@ use crate::module::tables::TableId; use crate::module::Module; use crate::passes::Used; use crate::ty::TypeId; -use parity_wasm::elements; use std::collections::HashMap; +use std::ops::{Deref, DerefMut}; pub struct EmitContext<'a> { pub module: &'a Module, pub used: &'a Used, pub indices: &'a mut IdsToIndices, - pub dst: &'a mut elements::Module, + pub encoder: Encoder<'a>, +} + +pub struct SubContext<'a, 'cx> { + cx: &'cx mut EmitContext<'a>, + write_size_to: usize, } /// Anything that can be lowered to raw wasm structures. @@ -29,6 +34,12 @@ pub trait Emit { fn emit(&self, cx: &mut EmitContext); } +impl<'a, T: ?Sized + Emit> Emit for &'a T { + fn emit(&self, cx: &mut EmitContext) { + T::emit(self, cx) + } +} + /// Maps our high-level identifiers to the raw indices they end up emitted at. /// /// As we lower to raw wasm structures, we cement various constructs' locations @@ -42,10 +53,10 @@ pub struct IdsToIndices { types: HashMap, funcs: HashMap, globals: HashMap, - locals: HashMap, memories: HashMap, elements: HashMap, data: HashMap, + pub locals: HashMap>, } macro_rules! define_get_push_index { @@ -81,24 +92,74 @@ define_get_push_index!(get_memory_index, push_memory, MemoryId, memories); define_get_push_index!(get_element_index, push_element, ElementId, elements); define_get_push_index!(get_data_index, push_data, DataId, data); -impl IdsToIndices { - /// Get the index for the given identifier. - #[inline] - pub fn get_local_index(&self, id: LocalId) -> u32 { - self.locals.get(&id).cloned().expect( - "Should never try and get the index for an identifier that has not already had \ - its index set. This means that either we are attempting to get the index of \ - an unused identifier, or that we are emitting sections in the wrong order.", - ) +impl<'a> EmitContext<'a> { + pub fn start_section<'b>(&'b mut self, id: Section) -> SubContext<'a, 'b> { + self.subsection(id as u8) + } + + pub fn subsection<'b>(&'b mut self, id: u8) -> SubContext<'a, 'b> { + self.encoder.byte(id); + let start = self.encoder.reserve_u32(); + SubContext { + cx: self, + write_size_to: start, + } + } + + pub fn custom_section<'b>(&'b mut self, name: &str) -> SubContext<'a, 'b> { + let mut cx = self.start_section(Section::Custom); + cx.encoder.str(name); + return cx; } - /// Adds the given identifier to this set, assigning it the next - /// available index. - #[inline] - pub fn set_local_index(&mut self, id: LocalId, index: u32) { - assert!( - self.locals.insert(id, index).is_none(), - "cannot set local index twice" - ); + pub fn list(&mut self, list: T) + where + T: IntoIterator, + T::IntoIter: ExactSizeIterator, + T::Item: Emit, + { + let list = list.into_iter(); + self.encoder.usize(list.len()); + for item in list { + item.emit(self); + } } } + +impl<'a> Deref for SubContext<'a, '_> { + type Target = EmitContext<'a>; + + fn deref(&self) -> &EmitContext<'a> { + &self.cx + } +} + +impl<'a> DerefMut for SubContext<'a, '_> { + fn deref_mut(&mut self) -> &mut EmitContext<'a> { + &mut self.cx + } +} + +impl Drop for SubContext<'_, '_> { + fn drop(&mut self) { + let amt = self.cx.encoder.pos() - self.write_size_to - MAX_U32_LENGTH; + assert!(amt <= u32::max_value() as usize); + self.cx.encoder.u32_at(self.write_size_to, amt as u32); + } +} + +pub enum Section { + Custom = 0, + Type = 1, + Import = 2, + Function = 3, + Table = 4, + Memory = 5, + Global = 6, + Export = 7, + Start = 8, + Element = 9, + Code = 10, + Data = 11, + DataCount = 12, +} diff --git a/src/encode.rs b/src/encode.rs new file mode 100644 index 00000000..d52ef065 --- /dev/null +++ b/src/encode.rs @@ -0,0 +1,102 @@ +pub const MAX_U32_LENGTH: usize = 5; + +#[derive(Debug)] +pub struct Encoder<'a> { + dst: &'a mut Vec, +} + +impl<'data> Encoder<'data> { + pub fn new(dst: &'data mut Vec) -> Encoder<'data> { + Encoder { dst } + } + + pub fn byte(&mut self, byte: u8) { + self.dst.push(byte); + } + + pub fn bytes(&mut self, bytes: &[u8]) { + self.usize(bytes.len()); + self.raw(bytes); + } + + pub fn str(&mut self, data: &str) { + self.bytes(data.as_bytes()) + } + + pub fn usize(&mut self, amt: usize) { + assert!(amt <= u32::max_value() as usize); + self.u32(amt as u32) + } + + pub fn u32(&mut self, mut amt: u32) { + while amt >= (1 << 7) { + self.byte((amt as u8) & 0x7f | 0x80); + amt >>= 7; + } + self.byte(amt as u8); + } + + pub fn i32(&mut self, val: i32) { + self.i64(val as i64); + } + + pub fn i64(&mut self, mut val: i64) { + let mut done = false; + while !done { + let mut byte = (val as i8) & 0x7f; + val >>= 7; + if (val == 0 && (byte & 0x40 == 0)) || (val == -1 && (byte & 0x40 != 0)) { + done = true; + } else { + byte |= 0x80u8 as i8; + } + self.byte(byte as u8); + } + } + + pub fn f32(&mut self, val: f32) { + let bits = val.to_bits(); + for i in 0..4 { + self.byte((bits >> (i * 8)) as u8); + } + } + + pub fn f64(&mut self, val: f64) { + let bits = val.to_bits(); + for i in 0..8 { + self.byte((bits >> (i * 8)) as u8); + } + } + + pub fn raw(&mut self, raw: &[u8]) { + self.dst.extend_from_slice(raw); + } + + /// Reserves `bytes` bytes of space, returning the position at which the + /// reservation starts + pub fn reserve(&mut self, bytes: usize) -> usize { + let start = self.dst.len(); + for _ in 0..bytes { + self.byte(0); + } + return start; + } + + /// Reserves space to write a uleb128 `u32`, returning the postition at + /// hwich it can be written. + pub fn reserve_u32(&mut self) -> usize { + self.reserve(MAX_U32_LENGTH) + } + + pub fn pos(&self) -> usize { + self.dst.len() + } + + pub fn u32_at(&mut self, pos: usize, mut amt: u32) { + for i in 0..MAX_U32_LENGTH { + let flag = if i == MAX_U32_LENGTH - 1 { 0 } else { 0x80 }; + self.dst[pos + i] = (amt as u8) & 0x7f | flag; + amt >>= 7; + } + } +} diff --git a/src/ir/mod.rs b/src/ir/mod.rs index 29816225..a17e9e41 100644 --- a/src/ir/mod.rs +++ b/src/ir/mod.rs @@ -7,6 +7,7 @@ pub mod matcher; use crate::dot::Dot; +use crate::encode::Encoder; use crate::module::functions::FunctionId; use crate::module::functions::{DisplayExpr, DotExpr}; use crate::module::globals::GlobalId; @@ -348,6 +349,35 @@ pub enum Value { V128(u128), } +impl Value { + pub(crate) fn emit(&self, encoder: &mut Encoder) { + match *self { + Value::I32(n) => { + encoder.byte(0x41); // i32.const + encoder.i32(n); + } + Value::I64(n) => { + encoder.byte(0x42); // i64.const + encoder.i64(n); + } + Value::F32(n) => { + encoder.byte(0x43); // f32.const + encoder.f32(n); + } + Value::F64(n) => { + encoder.byte(0x44); // f64.const + encoder.f64(n); + } + Value::V128(n) => { + encoder.raw(&[0xfd, 0x02]); // v128.const + for i in 0..16 { + encoder.byte((n >> (i * 8)) as u8); + } + } + } + } +} + impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { diff --git a/src/lib.rs b/src/lib.rs index db457d53..b600afe4 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ mod arena_set; pub mod const_value; pub mod dot; mod emit; +mod encode; pub mod error; pub mod ir; pub mod module; diff --git a/src/module/data.rs b/src/module/data.rs index ed1b2bae..2899d619 100644 --- a/src/module/data.rs +++ b/src/module/data.rs @@ -1,15 +1,15 @@ //! Data segments within a wasm module. use crate::const_value::Const; -use crate::emit::{Emit, EmitContext}; +use crate::emit::{Emit, EmitContext, Section}; use crate::error::Result; use crate::ir::Value; use crate::module::Module; use crate::parse::IndicesToIds; +use crate::passes::Used; use crate::ty::ValType; use failure::{bail, ResultExt}; use id_arena::{Arena, Id}; -use parity_wasm::elements; /// A passive element segment identifier pub type DataId = Id; @@ -17,7 +17,16 @@ pub type DataId = Id; /// A passive data segment #[derive(Debug)] pub struct Data { - value: Vec, + id: DataId, + /// The payload of this passive data segment + pub value: Vec, +} + +impl Data { + /// Returns the id of this passive data segment + pub fn id(&self) -> DataId { + self.id + } } /// All passive data sections of a wasm module, used to initialize memories via @@ -42,6 +51,27 @@ impl ModuleData { pub fn iter(&self) -> impl Iterator { self.arena.iter().map(|(_, f)| f) } + + pub(crate) fn iter_used<'a>(&'a self, used: &'a Used) -> impl Iterator + 'a { + self.iter().filter(move |data| used.data.contains(&data.id)) + } + + // Note that this is inaccordance with the upstream bulk memory proposal to + // WebAssembly and isn't currently part of the WebAssembly standard. + pub(crate) fn emit_data_count(&self, cx: &mut EmitContext) { + let mut count = 0; + + // Assign indices before we start translating functions to ensure that + // references to data have all been assigned + for data in self.iter_used(cx.used) { + cx.indices.push_data(data.id()); + count += 1; + } + + if count != 0 { + cx.start_section(Section::DataCount).encoder.usize(count); + } + } } impl Module { @@ -51,6 +81,7 @@ impl Module { section: wasmparser::DataSectionReader, ids: &mut IndicesToIds, ) -> Result<()> { + log::debug!("parse data section"); for (i, segment) in section.into_iter().enumerate() { let segment = segment?; @@ -86,44 +117,52 @@ impl Module { impl Emit for ModuleData { fn emit(&self, cx: &mut EmitContext) { - let mut segments = Vec::new(); - + log::debug!("emit data section"); // Sort table ids for a deterministic emission for now, eventually we // may want some sort of sorting heuristic here. let mut active = cx .module .memories .iter() - .filter(|t| cx.used.memories.contains(&t.id())) - .map(|m| (m.id(), m)) + .filter(|m| cx.used.memories.contains(&m.id())) + .flat_map(|memory| memory.emit_data().map(move |data| (memory.id(), data))) .collect::>(); active.sort_by_key(|pair| pair.0); + let passive = self + .arena + .iter() + .filter(|(id, _seg)| cx.used.data.contains(id)) + .count(); - for (_memory_id, memory) in active { - segments.extend(memory.emit_data(cx.indices)); + if active.len() == 0 && passive == 0 { + return; } - // After all the active segments are added add passive segments next. We - // may want to sort this more intelligently in the future. Othrewise - // emitting a segment here is in general much simpler than above as we - // know there are no holes. - for (id, segment) in self.arena.iter() { - if !cx.used.data.contains(&id) { - continue; + let mut cx = cx.start_section(Section::Data); + cx.encoder.usize(active.len() + passive); + + // The encodings here are with respect to the bulk memory proposal, but + // should be backwards compatible with the current stable WebAssembly + // spec so long as only memory 0 is used. + for (id, (offset, data)) in active { + let index = cx.indices.get_memory_index(id); + if index == 0 { + cx.encoder.byte(0x00); + } else { + cx.encoder.byte(0x02); + cx.encoder.u32(index); } - cx.indices.push_data(id); - segments.push(elements::DataSegment::new( - 0, - None, - segment.value.clone(), - true, - )); + offset.emit(&mut cx); + cx.encoder.bytes(data); } - if segments.len() > 0 { - let elements = elements::DataSection::with_entries(segments); - let elements = elements::Section::Data(elements); - cx.dst.sections_mut().push(elements); + // After all the active segments are added add passive segments next. We + // may want to sort this more intelligently in the future. Otherwise + // emitting a segment here is in general much simpler than above as we + // know there are no holes. + for data in self.iter_used(cx.used) { + cx.encoder.byte(0x01); + cx.encoder.bytes(&data.value); } } } diff --git a/src/module/elements.rs b/src/module/elements.rs index f558e45d..09630554 100644 --- a/src/module/elements.rs +++ b/src/module/elements.rs @@ -1,7 +1,7 @@ //! Table elements within a wasm module. use crate::const_value::Const; -use crate::emit::{Emit, EmitContext}; +use crate::emit::{Emit, EmitContext, Section}; use crate::error::Result; use crate::ir::Value; use crate::module::functions::FunctionId; @@ -11,7 +11,6 @@ use crate::parse::IndicesToIds; use crate::ty::ValType; use failure::{bail, ResultExt}; use id_arena::{Arena, Id}; -use parity_wasm::elements; /// A passive element segment identifier pub type ElementId = Id; @@ -53,6 +52,7 @@ impl Module { section: wasmparser::ElementSectionReader, ids: &mut IndicesToIds, ) -> Result<()> { + log::debug!("parse element section"); for (i, segment) in section.into_iter().enumerate() { let segment = segment?; // TODO: get support for passive segments in wasmparser @@ -102,8 +102,7 @@ impl Module { impl Emit for ModuleElements { fn emit(&self, cx: &mut EmitContext) { - let mut segments = Vec::new(); - + log::debug!("emit element section"); // Sort table ids for a deterministic emission for now, eventually we // may want some sort of sorting heuristic here. let mut active = cx @@ -118,62 +117,91 @@ impl Emit for ModuleElements { active.sort_by_key(|pair| pair.0); // Append segments as we find them for all table initializers. We can - // skip initializers for unused tables, and othrewise we just want to + // skip initializers for unused tables, and otherwise we just want to // create an initializer for each contiguous chunk of function indices. - for (table_id, table) in active { - let table_index = cx.indices.get_table_index(table_id); - - let mut add = |offset: usize, members: Vec| { - let code = vec![ - elements::Instruction::I32Const(offset as i32), - elements::Instruction::End, - ]; - let init = elements::InitExpr::new(code); - segments.push(elements::ElementSegment::new( - table_index, - Some(init), - members, - false, - )); - }; - + let mut chunks = Vec::new(); + for (table_id, table) in active.iter() { let mut offset = 0; - let mut cur = Vec::new(); + let mut len = 0; for (i, item) in table.elements.iter().enumerate() { - match item { - Some(item) => { - if cur.len() == 0 { - offset = i; - } - cur.push(cx.indices.get_func_index(*item)); + if item.is_some() { + if len == 0 { + offset = i; } - None => { - if cur.len() > 0 { - add(offset, cur); - } - cur = Vec::new(); + len += 1; + } else { + if len > 0 { + chunks.push((table_id, table, offset, len)); } + len = 0; } } - if cur.len() > 0 { - add(offset, cur); + if len > 0 { + chunks.push((table_id, table, offset, len)); + } + } + + let passive = self + .arena + .iter() + .filter(|(id, _)| cx.used.elements.contains(id)) + .count(); + let relative = active + .iter() + .map(|(_, table)| table.relative_elements.len()) + .sum::(); + let total = passive + relative + chunks.len(); + + if total == 0 { + return; + } + let mut cx = cx.start_section(Section::Element); + cx.encoder.usize(total); + + // Emits the leading data for describing a table's index + // + // Note that much of this is in accordance with the + // currently-in-progress bulk-memory proposal for WebAssembly. + let active_table_header = |cx: &mut EmitContext, index: u32| { + if index == 0 { + cx.encoder.byte(0x00); + } else { + cx.encoder.byte(0x02); + cx.encoder.u32(index); } + }; + + // Emit all contiguous chunks of functions pointers that are located at + // constant offsets + for (&id, table, offset, len) in chunks { + let table_index = cx.indices.get_table_index(id); + active_table_header(&mut cx, table_index); + Const::Value(Value::I32(offset as i32)).emit(&mut cx); + cx.encoder.usize(len); + for item in table.elements[offset..][..len].iter() { + let index = cx.indices.get_func_index(item.unwrap()); + cx.encoder.u32(index); + } + } + // Emit all chunks of function pointers that are located at relative + // global offsets. + for (id, table) in active.iter() { + let table_index = cx.indices.get_table_index(*id); for (global, list) in table.relative_elements.iter() { - let init = Const::Global(*global).emit_instructions(cx.indices); - let members = list.iter().map(|i| cx.indices.get_func_index(*i)).collect(); - segments.push(elements::ElementSegment::new( - table_index, - Some(init), - members, - false, - )); + active_table_header(&mut cx, table_index); + Const::Global(*global).emit(&mut cx); + cx.encoder.usize(list.len()); + for func in list { + let index = cx.indices.get_func_index(*func); + cx.encoder.u32(index); + } } } // After all the active segments are added add passive segments next. We - // may want to sort this more intelligently in the future. Othrewise + // may want to sort this more intelligently in the future. Otherwise // emitting a segment here is in general much simpler than above as we // know there are no holes. for (id, segment) in self.arena.iter() { @@ -181,19 +209,13 @@ impl Emit for ModuleElements { continue; } cx.indices.push_element(id); - - let members = segment - .members - .iter() - .map(|id| cx.indices.get_func_index(*id)) - .collect(); - segments.push(elements::ElementSegment::new(0, None, members, true)); - } - - if segments.len() > 0 { - let elements = elements::ElementSection::with_entries(segments); - let elements = elements::Section::Element(elements); - cx.dst.sections_mut().push(elements); + drop((id, segment)); + // TODO: sync this with the upstream spec + panic!( + "encoding a passive element segment requires either \ + `ref.null` or `ref.func` encodings, which aren't \ + currently implemented" + ); } } } diff --git a/src/module/exports.rs b/src/module/exports.rs index f986941a..f3aba4f5 100644 --- a/src/module/exports.rs +++ b/src/module/exports.rs @@ -3,13 +3,12 @@ use super::globals::GlobalId; use super::memories::MemoryId; use super::tables::TableId; -use crate::emit::{Emit, EmitContext, IdsToIndices}; +use crate::emit::{Emit, EmitContext, Section}; use crate::error::Result; use crate::module::functions::FunctionId; use crate::module::Module; use crate::parse::IndicesToIds; use id_arena::{Arena, Id}; -use parity_wasm::elements; /// The id of an export. pub type ExportId = Id; @@ -29,11 +28,6 @@ impl Export { pub fn id(&self) -> ExportId { self.id } - - fn entry(&self, indices: &IdsToIndices) -> elements::ExportEntry { - let internal = self.item.internal(indices); - elements::ExportEntry::new(self.name.clone(), internal) - } } /// An exported item. @@ -49,29 +43,6 @@ pub enum ExportItem { Global(GlobalId), } -impl ExportItem { - fn internal(&self, indices: &IdsToIndices) -> elements::Internal { - match *self { - ExportItem::Function(f) => { - let idx = indices.get_func_index(f); - elements::Internal::Function(idx) - } - ExportItem::Table(t) => { - let idx = indices.get_table_index(t); - elements::Internal::Table(idx) - } - ExportItem::Memory(m) => { - let idx = indices.get_memory_index(m); - elements::Internal::Memory(idx) - } - ExportItem::Global(g) => { - let idx = indices.get_global_index(g); - elements::Internal::Global(idx) - } - } - } -} - /// The set of exports in a module. #[derive(Debug, Default)] pub struct ModuleExports { @@ -103,6 +74,7 @@ impl Module { section: wasmparser::ExportSectionReader, ids: &IndicesToIds, ) -> Result<()> { + log::debug!("parse export section"); use wasmparser::ExternalKind::*; for entry in section { @@ -126,22 +98,41 @@ impl Module { impl Emit for ModuleExports { fn emit(&self, cx: &mut EmitContext) { + log::debug!("emit export section"); // NB: exports are always considered used. They are the roots that the // used analysis searches out from. - let mut exports = vec![]; - - for (_id, exp) in self.arena.iter() { - let export = exp.entry(cx.indices); - exports.push(export); - } - - if exports.is_empty() { + if self.arena.len() == 0 { return; } - let exports = elements::ExportSection::with_entries(exports); - let exports = elements::Section::Export(exports); - cx.dst.sections_mut().push(exports); + let mut cx = cx.start_section(Section::Export); + cx.encoder.usize(self.arena.len()); + + for (_id, export) in self.arena.iter() { + cx.encoder.str(&export.name); + match export.item { + ExportItem::Function(id) => { + let index = cx.indices.get_func_index(id); + cx.encoder.byte(0x00); + cx.encoder.u32(index); + } + ExportItem::Table(id) => { + let index = cx.indices.get_table_index(id); + cx.encoder.byte(0x01); + cx.encoder.u32(index); + } + ExportItem::Memory(id) => { + let index = cx.indices.get_memory_index(id); + cx.encoder.byte(0x02); + cx.encoder.u32(index); + } + ExportItem::Global(id) => { + let index = cx.indices.get_global_index(id); + cx.encoder.byte(0x03); + cx.encoder.u32(index); + } + } + } } } diff --git a/src/module/functions/local_function/emit.rs b/src/module/functions/local_function/emit.rs index be68d290..3a7d71d4 100644 --- a/src/module/functions/local_function/emit.rs +++ b/src/module/functions/local_function/emit.rs @@ -1,21 +1,23 @@ use crate::emit::IdsToIndices; +use crate::encode::Encoder; +use std::collections::HashMap; use crate::ir::*; use crate::module::functions::LocalFunction; -use parity_wasm::elements; +use crate::ty::ValType; -pub(crate) fn run(func: &LocalFunction, indices: &IdsToIndices) -> Vec { +pub(crate) fn run(func: &LocalFunction, indices: &IdsToIndices, local_indices: &HashMap, encoder: &mut Encoder) { let mut v = Emit { func, indices, id: func.entry_block().into(), blocks: vec![], - instructions: vec![], + encoder, + local_indices, }; v.visit(func.entry_block()); - v.instructions } -struct Emit<'a> { +struct Emit<'a, 'b> { // The function we are visiting. func: &'a LocalFunction, @@ -24,16 +26,17 @@ struct Emit<'a> { // Needed so we can map locals to their indices. indices: &'a IdsToIndices, + local_indices: &'a HashMap, // Stack of blocks that we are currently emitting instructions for. A branch // is only valid if its target is one of these blocks. blocks: Vec, // The instruction sequence we are building up to emit. - instructions: Vec, + encoder: &'a mut Encoder<'b>, } -impl Emit<'_> { +impl Emit<'_, '_> { fn visit(&mut self, e: E) where E: Into, @@ -48,38 +51,34 @@ impl Emit<'_> { self.id = id; match &self.func.exprs[id] { - Const(e) => self.visit_const(e), + Const(e) => e.value.emit(self.encoder), Block(e) => self.visit_block(e), BrTable(e) => self.visit_br_table(e), IfElse(e) => self.visit_if_else(e), Drop(e) => { self.visit(e.expr); - self.emit(elements::Instruction::Drop) + self.encoder.byte(0x1a); // drop } Return(e) => { for x in e.values.iter() { self.visit(*x); } - self.emit(elements::Instruction::Return); + self.encoder.byte(0x0f); // return } MemorySize(e) => { let idx = self.indices.get_memory_index(e.memory); - // TODO: should upstream a fix to parity-wasm to accept 32-bit - // indices for memories. - assert!(idx < 256); - self.emit(elements::Instruction::CurrentMemory(idx as u8)) + self.encoder.byte(0x3f); // memory.size + self.encoder.u32(idx); } MemoryGrow(e) => { self.visit(e.pages); let idx = self.indices.get_memory_index(e.memory); - // TODO: should upstream a fix to parity-wasm to accept 32-bit - // indices for memories. - assert!(idx < 256); - self.emit(elements::Instruction::GrowMemory(idx as u8)) + self.encoder.byte(0x40); // memory.grow + self.encoder.u32(idx); } Binop(e) => { @@ -87,169 +86,171 @@ impl Emit<'_> { self.visit(e.lhs); self.visit(e.rhs); - self.emit(match e.op { - I32Eq => elements::Instruction::I32Eq, - I32Ne => elements::Instruction::I32Ne, - I32LtS => elements::Instruction::I32LtS, - I32LtU => elements::Instruction::I32LtU, - I32GtS => elements::Instruction::I32GtS, - I32GtU => elements::Instruction::I32GtU, - I32LeS => elements::Instruction::I32LeS, - I32LeU => elements::Instruction::I32LeU, - I32GeS => elements::Instruction::I32GeS, - I32GeU => elements::Instruction::I32GeU, - - I64Eq => elements::Instruction::I64Eq, - I64Ne => elements::Instruction::I64Ne, - I64LtS => elements::Instruction::I64LtS, - I64LtU => elements::Instruction::I64LtU, - I64GtS => elements::Instruction::I64GtS, - I64GtU => elements::Instruction::I64GtU, - I64LeS => elements::Instruction::I64LeS, - I64LeU => elements::Instruction::I64LeU, - I64GeS => elements::Instruction::I64GeS, - I64GeU => elements::Instruction::I64GeU, - - F32Eq => elements::Instruction::F32Eq, - F32Ne => elements::Instruction::F32Ne, - F32Lt => elements::Instruction::F32Lt, - F32Gt => elements::Instruction::F32Gt, - F32Le => elements::Instruction::F32Le, - F32Ge => elements::Instruction::F32Ge, - - F64Eq => elements::Instruction::F64Eq, - F64Ne => elements::Instruction::F64Ne, - F64Lt => elements::Instruction::F64Lt, - F64Gt => elements::Instruction::F64Gt, - F64Le => elements::Instruction::F64Le, - F64Ge => elements::Instruction::F64Ge, - - I32Add => elements::Instruction::I32Add, - I32Sub => elements::Instruction::I32Sub, - I32Mul => elements::Instruction::I32Mul, - I32DivS => elements::Instruction::I32DivS, - I32DivU => elements::Instruction::I32DivU, - I32RemS => elements::Instruction::I32RemS, - I32RemU => elements::Instruction::I32RemU, - I32And => elements::Instruction::I32And, - I32Or => elements::Instruction::I32Or, - I32Xor => elements::Instruction::I32Xor, - I32Shl => elements::Instruction::I32Shl, - I32ShrS => elements::Instruction::I32ShrS, - I32ShrU => elements::Instruction::I32ShrU, - I32Rotl => elements::Instruction::I32Rotl, - I32Rotr => elements::Instruction::I32Rotr, - - I64Add => elements::Instruction::I64Add, - I64Sub => elements::Instruction::I64Sub, - I64Mul => elements::Instruction::I64Mul, - I64DivS => elements::Instruction::I64DivS, - I64DivU => elements::Instruction::I64DivU, - I64RemS => elements::Instruction::I64RemS, - I64RemU => elements::Instruction::I64RemU, - I64And => elements::Instruction::I64And, - I64Or => elements::Instruction::I64Or, - I64Xor => elements::Instruction::I64Xor, - I64Shl => elements::Instruction::I64Shl, - I64ShrS => elements::Instruction::I64ShrS, - I64ShrU => elements::Instruction::I64ShrU, - I64Rotl => elements::Instruction::I64Rotl, - I64Rotr => elements::Instruction::I64Rotr, - - F32Add => elements::Instruction::F32Add, - F32Sub => elements::Instruction::F32Sub, - F32Mul => elements::Instruction::F32Mul, - F32Div => elements::Instruction::F32Div, - F32Min => elements::Instruction::F32Min, - F32Max => elements::Instruction::F32Max, - F32Copysign => elements::Instruction::F32Copysign, - - F64Add => elements::Instruction::F64Add, - F64Sub => elements::Instruction::F64Sub, - F64Mul => elements::Instruction::F64Mul, - F64Div => elements::Instruction::F64Div, - F64Min => elements::Instruction::F64Min, - F64Max => elements::Instruction::F64Max, - F64Copysign => elements::Instruction::F64Copysign, - }) + let opcode = match e.op { + I32Eq => 0x46, + I32Ne => 0x47, + I32LtS => 0x48, + I32LtU => 0x49, + I32GtS => 0x4a, + I32GtU => 0x4b, + I32LeS => 0x4c, + I32LeU => 0x4d, + I32GeS => 0x4e, + I32GeU => 0x4f, + + I64Eq => 0x51, + I64Ne => 0x52, + I64LtS => 0x53, + I64LtU => 0x54, + I64GtS => 0x55, + I64GtU => 0x56, + I64LeS => 0x57, + I64LeU => 0x58, + I64GeS => 0x59, + I64GeU => 0x5a, + + F32Eq => 0x5b, + F32Ne => 0x5c, + F32Lt => 0x5d, + F32Gt => 0x5e, + F32Le => 0x5f, + F32Ge => 0x60, + + F64Eq => 0x61, + F64Ne => 0x62, + F64Lt => 0x63, + F64Gt => 0x64, + F64Le => 0x65, + F64Ge => 0x66, + + I32Add => 0x6a, + I32Sub => 0x6b, + I32Mul => 0x6c, + I32DivS => 0x6d, + I32DivU => 0x6e, + I32RemS => 0x6f, + I32RemU => 0x70, + I32And => 0x71, + I32Or => 0x72, + I32Xor => 0x73, + I32Shl => 0x74, + I32ShrS => 0x75, + I32ShrU => 0x76, + I32Rotl => 0x77, + I32Rotr => 0x78, + + I64Add => 0x7c, + I64Sub => 0x7d, + I64Mul => 0x7e, + I64DivS => 0x7f, + I64DivU => 0x80, + I64RemS => 0x81, + I64RemU => 0x82, + I64And => 0x83, + I64Or => 0x84, + I64Xor => 0x85, + I64Shl => 0x86, + I64ShrS => 0x87, + I64ShrU => 0x88, + I64Rotl => 0x89, + I64Rotr => 0x8a, + + F32Add => 0x92, + F32Sub => 0x93, + F32Mul => 0x94, + F32Div => 0x95, + F32Min => 0x96, + F32Max => 0x97, + F32Copysign => 0x98, + + F64Add => 0xa0, + F64Sub => 0xa1, + F64Mul => 0xa2, + F64Div => 0xa3, + F64Min => 0xa4, + F64Max => 0xa5, + F64Copysign => 0xa6, + }; + self.encoder.byte(opcode); } Unop(e) => { use UnaryOp::*; self.visit(e.expr); - self.emit(match e.op { - I32Eqz => elements::Instruction::I32Eqz, - I32Clz => elements::Instruction::I32Clz, - I32Ctz => elements::Instruction::I32Ctz, - I32Popcnt => elements::Instruction::I32Popcnt, - - I64Eqz => elements::Instruction::I64Eqz, - I64Clz => elements::Instruction::I64Clz, - I64Ctz => elements::Instruction::I64Ctz, - I64Popcnt => elements::Instruction::I64Popcnt, - - F32Abs => elements::Instruction::F32Abs, - F32Neg => elements::Instruction::F32Neg, - F32Ceil => elements::Instruction::F32Ceil, - F32Floor => elements::Instruction::F32Floor, - F32Trunc => elements::Instruction::F32Trunc, - F32Nearest => elements::Instruction::F32Nearest, - F32Sqrt => elements::Instruction::F32Sqrt, - - F64Abs => elements::Instruction::F64Abs, - F64Neg => elements::Instruction::F64Neg, - F64Ceil => elements::Instruction::F64Ceil, - F64Floor => elements::Instruction::F64Floor, - F64Trunc => elements::Instruction::F64Trunc, - F64Nearest => elements::Instruction::F64Nearest, - F64Sqrt => elements::Instruction::F64Sqrt, - - I32WrapI64 => elements::Instruction::I32WrapI64, - I32TruncSF32 => elements::Instruction::I32TruncSF32, - I32TruncUF32 => elements::Instruction::I32TruncUF32, - I32TruncSF64 => elements::Instruction::I32TruncSF64, - I32TruncUF64 => elements::Instruction::I32TruncUF64, - I64ExtendSI32 => elements::Instruction::I64ExtendSI32, - I64ExtendUI32 => elements::Instruction::I64ExtendUI32, - I64TruncSF32 => elements::Instruction::I64TruncSF32, - I64TruncUF32 => elements::Instruction::I64TruncUF32, - I64TruncSF64 => elements::Instruction::I64TruncSF64, - I64TruncUF64 => elements::Instruction::I64TruncUF64, - - F32ConvertSI32 => elements::Instruction::F32ConvertSI32, - F32ConvertUI32 => elements::Instruction::F32ConvertUI32, - F32ConvertSI64 => elements::Instruction::F32ConvertSI64, - F32ConvertUI64 => elements::Instruction::F32ConvertUI64, - F32DemoteF64 => elements::Instruction::F32DemoteF64, - F64ConvertSI32 => elements::Instruction::F64ConvertSI32, - F64ConvertUI32 => elements::Instruction::F64ConvertUI32, - F64ConvertSI64 => elements::Instruction::F64ConvertSI64, - F64ConvertUI64 => elements::Instruction::F64ConvertUI64, - F64PromoteF32 => elements::Instruction::F64PromoteF32, - - I32ReinterpretF32 => elements::Instruction::I32ReinterpretF32, - I64ReinterpretF64 => elements::Instruction::I64ReinterpretF64, - F32ReinterpretI32 => elements::Instruction::F32ReinterpretI32, - F64ReinterpretI64 => elements::Instruction::F64ReinterpretI64, - - I32Extend8S => elements::Instruction::I32Extend8S, - I32Extend16S => elements::Instruction::I32Extend16S, - I64Extend8S => elements::Instruction::I64Extend8S, - I64Extend16S => elements::Instruction::I64Extend16S, - I64Extend32S => elements::Instruction::I64Extend32S, - }) + let opcode = match e.op { + I32Eqz => 0x45, + I32Clz => 0x67, + I32Ctz => 0x68, + I32Popcnt => 0x69, + + I64Eqz => 0x50, + I64Clz => 0x79, + I64Ctz => 0x7a, + I64Popcnt => 0x7b, + + F32Abs => 0x8b, + F32Neg => 0x8c, + F32Ceil => 0x8d, + F32Floor => 0x8e, + F32Trunc => 0x8f, + F32Nearest => 0x90, + F32Sqrt => 0x91, + + F64Abs => 0x99, + F64Neg => 0x9a, + F64Ceil => 0x9b, + F64Floor => 0x9c, + F64Trunc => 0x9d, + F64Nearest => 0x9e, + F64Sqrt => 0x9f, + + I32WrapI64 => 0xa7, + I32TruncSF32 => 0xa8, + I32TruncUF32 => 0xa9, + I32TruncSF64 => 0xaa, + I32TruncUF64 => 0xab, + I64ExtendSI32 => 0xac, + I64ExtendUI32 => 0xad, + I64TruncSF32 => 0xae, + I64TruncUF32 => 0xaf, + I64TruncSF64 => 0xb0, + I64TruncUF64 => 0xb1, + + F32ConvertSI32 => 0xb2, + F32ConvertUI32 => 0xb3, + F32ConvertSI64 => 0xb4, + F32ConvertUI64 => 0xb5, + F32DemoteF64 => 0xb6, + F64ConvertSI32 => 0xb7, + F64ConvertUI32 => 0xb8, + F64ConvertSI64 => 0xb9, + F64ConvertUI64 => 0xba, + F64PromoteF32 => 0xbb, + + I32ReinterpretF32 => 0xbc, + I64ReinterpretF64 => 0xbd, + F32ReinterpretI32 => 0xbe, + F64ReinterpretI64 => 0xbf, + + I32Extend8S => 0xc0, + I32Extend16S => 0xc1, + I64Extend8S => 0xc2, + I64Extend16S => 0xc3, + I64Extend32S => 0xc4, + }; + self.encoder.byte(opcode); } Select(e) => { self.visit(e.alternative); self.visit(e.consequent); self.visit(e.condition); - self.emit(elements::Instruction::Select) + self.encoder.byte(0x1b); // select } Unreachable(_) => { - self.emit(elements::Instruction::Unreachable); + self.encoder.byte(0x00); // unreachable } Br(e) => { @@ -257,7 +258,8 @@ impl Emit<'_> { self.visit(*x); } let target = self.branch_target(e.block); - self.emit(elements::Instruction::Br(target)) + self.encoder.byte(0x0c); // br + self.encoder.u32(target); } BrIf(e) => { @@ -266,7 +268,8 @@ impl Emit<'_> { } self.visit(e.condition); let target = self.branch_target(e.block); - self.emit(elements::Instruction::BrIf(target)) + self.encoder.byte(0x0d); // br_if + self.encoder.u32(target); } Call(e) => { @@ -274,7 +277,8 @@ impl Emit<'_> { self.visit(*x); } let idx = self.indices.get_func_index(e.func); - self.emit(elements::Instruction::Call(idx)) + self.encoder.byte(0x10); // call + self.encoder.u32(idx); } CallIndirect(e) => { @@ -284,36 +288,42 @@ impl Emit<'_> { self.visit(e.func); let idx = self.indices.get_type_index(e.ty); let table = self.indices.get_table_index(e.table); - assert!(table < 256); // TODO: update parity-wasm to accept u32 - self.emit(elements::Instruction::CallIndirect(idx, table as u8)) + self.encoder.byte(0x11); // call_indirect + self.encoder.u32(idx); + self.encoder.u32(table); } LocalGet(e) => { - let idx = self.indices.get_local_index(e.local); - self.emit(elements::Instruction::GetLocal(idx)) + let idx = self.local_indices[&e.local]; + self.encoder.byte(0x20); // local.get + self.encoder.u32(idx); } LocalSet(e) => { self.visit(e.value); - let idx = self.indices.get_local_index(e.local); - self.emit(elements::Instruction::SetLocal(idx)) + let idx = self.local_indices[&e.local]; + self.encoder.byte(0x21); // local.set + self.encoder.u32(idx); } LocalTee(e) => { self.visit(e.value); - let idx = self.indices.get_local_index(e.local); - self.emit(elements::Instruction::TeeLocal(idx)) + let idx = self.local_indices[&e.local]; + self.encoder.byte(0x22); // local.tee + self.encoder.u32(idx); } GlobalGet(e) => { let idx = self.indices.get_global_index(e.global); - self.emit(elements::Instruction::GetGlobal(idx)) + self.encoder.byte(0x23); // global.get + self.encoder.u32(idx); } GlobalSet(e) => { self.visit(e.value); let idx = self.indices.get_global_index(e.global); - self.emit(elements::Instruction::SetGlobal(idx)) + self.encoder.byte(0x24); // global.set + self.encoder.u32(idx); } Load(e) => { @@ -321,41 +331,25 @@ impl Emit<'_> { self.visit(e.address); // parity-wasm doesn't have support for multiple memories yet assert_eq!(self.indices.get_memory_index(e.memory), 0); - let (align, offset) = (e.arg.align, e.arg.offset); - let align = align.trailing_zeros(); - let arg = elements::MemArg { - align: align as u8, - offset, - }; - self.emit(match e.kind { - I32 => elements::Instruction::I32Load(align, offset), - I64 => elements::Instruction::I64Load(align, offset), - F32 => elements::Instruction::F32Load(align, offset), - F64 => elements::Instruction::F64Load(align, offset), - V128 => elements::Instruction::V128Load(arg), - I32_8 { sign_extend: true } => elements::Instruction::I32Load8S(align, offset), - I32_8 { sign_extend: false } => elements::Instruction::I32Load8U(align, offset), - I32_16 { sign_extend: true } => { - elements::Instruction::I32Load16S(align, offset) - } - I32_16 { sign_extend: false } => { - elements::Instruction::I32Load16U(align, offset) - } - I64_8 { sign_extend: true } => elements::Instruction::I64Load8S(align, offset), - I64_8 { sign_extend: false } => elements::Instruction::I64Load8U(align, offset), - I64_16 { sign_extend: true } => { - elements::Instruction::I64Load16S(align, offset) - } - I64_16 { sign_extend: false } => { - elements::Instruction::I64Load16U(align, offset) - } - I64_32 { sign_extend: true } => { - elements::Instruction::I64Load32S(align, offset) - } - I64_32 { sign_extend: false } => { - elements::Instruction::I64Load32U(align, offset) - } - }); + match e.kind { + I32 => self.encoder.byte(0x28), // i32.load + I64 => self.encoder.byte(0x29), // i64.load + F32 => self.encoder.byte(0x2a), // f32.load + F64 => self.encoder.byte(0x2b), // f64.load + V128 => self.encoder.raw(&[0xfd, 0x00]), + I32_8 { sign_extend: true } => self.encoder.byte(0x2c), + I32_8 { sign_extend: false } => self.encoder.byte(0x2d), + I32_16 { sign_extend: true } => self.encoder.byte(0x2e), + I32_16 { sign_extend: false } => self.encoder.byte(0x2f), + I64_8 { sign_extend: true } => self.encoder.byte(0x30), + I64_8 { sign_extend: false } => self.encoder.byte(0x31), + I64_16 { sign_extend: true } => self.encoder.byte(0x32), + I64_16 { sign_extend: false } => self.encoder.byte(0x33), + I64_32 { sign_extend: true } => self.encoder.byte(0x34), + I64_32 { sign_extend: false } => self.encoder.byte(0x35), + } + self.encoder.u32(e.arg.align.trailing_zeros()); + self.encoder.u32(e.arg.offset); } Store(e) => { @@ -364,34 +358,26 @@ impl Emit<'_> { self.visit(e.value); // parity-wasm doesn't have support for multiple memories yet assert_eq!(self.indices.get_memory_index(e.memory), 0); - let (align, offset) = (e.arg.align, e.arg.offset); - let align = align.trailing_zeros(); - let arg = elements::MemArg { - align: align as u8, - offset, - }; - self.emit(match e.kind { - I32 => elements::Instruction::I32Store(align, offset), - I64 => elements::Instruction::I64Store(align, offset), - F32 => elements::Instruction::F32Store(align, offset), - F64 => elements::Instruction::F64Store(align, offset), - V128 => elements::Instruction::V128Store(arg), - I32_8 => elements::Instruction::I32Store8(align, offset), - I32_16 => elements::Instruction::I32Store16(align, offset), - I64_8 => elements::Instruction::I64Store8(align, offset), - I64_16 => elements::Instruction::I64Store16(align, offset), - I64_32 => elements::Instruction::I64Store32(align, offset), - }); + match e.kind { + I32 => self.encoder.byte(0x36), // i32.store + I64 => self.encoder.byte(0x37), // i64.store + F32 => self.encoder.byte(0x38), // f32.store + F64 => self.encoder.byte(0x39), // f64.store + V128 => self.encoder.raw(&[0xfd, 0x01]), // v128.store + I32_8 => self.encoder.byte(0x3a), // i32.store8 + I32_16 => self.encoder.byte(0x3b), // i32.store16 + I64_8 => self.encoder.byte(0x3c), // i64.store8 + I64_16 => self.encoder.byte(0x3d), // i64.store16 + I64_32 => self.encoder.byte(0x3e), // i64.store32 + } + self.encoder.u32(e.arg.align.trailing_zeros()); + self.encoder.u32(e.arg.offset); } } self.id = old; } - fn emit(&mut self, i: elements::Instruction) { - self.instructions.push(i); - } - fn branch_target(&self, block: BlockId) -> u32 { self.blocks.iter().rev().position(|b| *b == block).expect( "attempt to branch to invalid block; bad transformation pass introduced bad branching?", @@ -401,17 +387,15 @@ impl Emit<'_> { fn visit_block(&mut self, e: &Block) { self.blocks.push(Block::new_id(self.id)); - let block_ty = match e.results.len() { - 0 => elements::BlockType::NoResult, - 1 => elements::BlockType::Value(e.results[0].into()), - _ => panic!( - "multiple return values not supported yet; write a transformation to rewrite them" - ), - }; - match e.kind { - BlockKind::Block => self.emit(elements::Instruction::Block(block_ty)), - BlockKind::Loop => self.emit(elements::Instruction::Loop(block_ty)), + BlockKind::Block => { + self.encoder.byte(0x02); // block + self.block_type(&e.results); + } + BlockKind::Loop => { + self.encoder.byte(0x03); // loop + self.block_type(&e.results); + } BlockKind::FunctionEntry | BlockKind::IfElse => {} } @@ -421,7 +405,7 @@ impl Emit<'_> { match e.kind { BlockKind::Block | BlockKind::Loop | BlockKind::FunctionEntry => { - self.emit(elements::Instruction::End) + self.encoder.byte(0x0b); // end } BlockKind::IfElse => {} } @@ -429,52 +413,20 @@ impl Emit<'_> { self.blocks.pop(); } - fn visit_const(&mut self, e: &Const) { - self.emit(match e.value { - Value::I32(i) => elements::Instruction::I32Const(i), - Value::I64(i) => elements::Instruction::I64Const(i), - Value::F32(i) => elements::Instruction::F32Const(i.to_bits()), - Value::F64(i) => elements::Instruction::F64Const(i.to_bits()), - Value::V128(i) => elements::Instruction::V128Const(Box::new([ - (i >> 0) as u8, - (i >> 8) as u8, - (i >> 16) as u8, - (i >> 24) as u8, - (i >> 32) as u8, - (i >> 40) as u8, - (i >> 48) as u8, - (i >> 56) as u8, - (i >> 64) as u8, - (i >> 72) as u8, - (i >> 80) as u8, - (i >> 88) as u8, - (i >> 96) as u8, - (i >> 104) as u8, - (i >> 112) as u8, - (i >> 120) as u8, - ])), - }) - } - fn visit_if_else(&mut self, e: &IfElse) { - let block_ty = { - let consequent = self.func.block(e.consequent); - match consequent.results.len() { - 0 => elements::BlockType::NoResult, - 1 => elements::BlockType::Value(consequent.results[0].into()), - _ => panic!( - "multiple return values not yet supported; write a transformation to \ - rewrite them into single value returns" - ), - } - }; - self.visit(e.condition); - self.emit(elements::Instruction::If(block_ty)); - let _ = self.visit(e.consequent); - self.emit(elements::Instruction::Else); - let _ = self.visit(e.alternative); - self.emit(elements::Instruction::End) + + self.encoder.byte(0x04); // if + let consequent = self.func.block(e.consequent); + self.block_type(&consequent.results); + + self.visit(e.consequent); + + // TODO: don't emit `else` for empty else blocks + self.encoder.byte(0x05); // else + self.visit(e.alternative); + + self.encoder.byte(0x0b); // end } fn visit_br_table(&mut self, e: &BrTable) { @@ -482,15 +434,25 @@ impl Emit<'_> { self.visit(*x); } self.visit(e.which); - let table = e - .blocks - .iter() - .map(|b| self.branch_target(*b)) - .collect::>() - .into_boxed_slice(); + + self.encoder.byte(0x0e); // br_table + self.encoder.usize(e.blocks.len()); + for b in e.blocks.iter() { + let target = self.branch_target(*b); + self.encoder.u32(target); + } let default = self.branch_target(e.default); - self.emit(elements::Instruction::BrTable(Box::new( - elements::BrTableData { table, default }, - ))) + self.encoder.u32(default); + } + + fn block_type(&mut self, ty: &[ValType]) { + match ty.len() { + 0 => self.encoder.byte(0x40), + 1 => ty[0].emit(self.encoder), + _ => panic!( + "multiple return values not yet supported; write a transformation to \ + rewrite them into single value returns" + ), + } } } diff --git a/src/module/functions/local_function/mod.rs b/src/module/functions/local_function/mod.rs index 7c80bf18..11549661 100644 --- a/src/module/functions/local_function/mod.rs +++ b/src/module/functions/local_function/mod.rs @@ -8,6 +8,7 @@ use self::context::FunctionContext; use super::FunctionId; use crate::dot::Dot; use crate::emit::IdsToIndices; +use crate::encode::Encoder; use crate::error::{ErrorKind, Result}; use crate::ir::matcher::{ConstMatcher, Matcher}; use crate::ir::*; @@ -17,8 +18,7 @@ use crate::parse::IndicesToIds; use crate::ty::{TypeId, ValType}; use failure::{bail, Fail, ResultExt}; use id_arena::{Arena, Id}; -use parity_wasm::elements; -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashSet, HashMap}; use std::fmt; use std::mem; use wasmparser::{Operator, OperatorsReader}; @@ -145,11 +145,7 @@ impl LocalFunction { } /// Emit this function's compact locals declarations. - pub(crate) fn emit_locals( - &self, - locals: &ModuleLocals, - indices: &mut IdsToIndices, - ) -> Vec { + pub(crate) fn emit_locals(&self, module: &Module, encoder: &mut Encoder) -> HashMap { struct LocalsVisitor<'a> { func: &'a LocalFunction, locals: &'a ModuleLocals, @@ -176,47 +172,47 @@ impl LocalFunction { } } + // Collect all used locals along with their types let mut v = LocalsVisitor { func: self, - locals, + locals: &module.locals, seen: HashSet::new(), ty_to_locals: BTreeMap::new(), args: self.args.iter().cloned().collect(), }; self.entry_block().visit(&mut v); - // First up allocate indices to the arguments of the function. These - // arguments get the first few indexes in the local index space, and are - // unconditionally used. + let mut local_map = HashMap::with_capacity(v.seen.len()); + + // Allocate an index to all the function arguments, as these are all + // unconditionally used and are implicit locals in wasm. let mut idx = 0; for &arg in self.args.iter() { - indices.set_local_index(arg, idx); + local_map.insert(arg, idx); idx += 1; } - // Next up assign chunks of locals all at once as we see them. - let mut ret = Vec::with_capacity(5); - for (ty, locals) in v.ty_to_locals { - let element_ty = match ty { - ValType::I32 => elements::ValueType::I32, - ValType::I64 => elements::ValueType::I64, - ValType::F32 => elements::ValueType::F32, - ValType::F64 => elements::ValueType::F64, - ValType::V128 => elements::ValueType::V128, - }; - ret.push(elements::Local::new(locals.len() as u32, element_ty)); + // Assign an index to all remaining locals + for (_, locals) in v.ty_to_locals.iter() { for l in locals { - indices.set_local_index(l, idx); + local_map.insert(*l, idx); idx += 1; } } - return ret; + // Use our type map to emit a compact representation of all locals now + encoder.usize(v.ty_to_locals.len()); + for (ty, locals) in v.ty_to_locals.iter() { + encoder.usize(locals.len()); + ty.emit(encoder); + } + + local_map } /// Emit this function's instruction sequence. - pub(crate) fn emit_instructions(&self, indices: &IdsToIndices) -> Vec { - emit::run(self, indices) + pub(crate) fn emit_instructions(&self, indices: &IdsToIndices, local_indices: &HashMap, dst: &mut Encoder) { + emit::run(self, indices, local_indices, dst) } } diff --git a/src/module/functions/mod.rs b/src/module/functions/mod.rs index 97ce3bc8..da3646a0 100644 --- a/src/module/functions/mod.rs +++ b/src/module/functions/mod.rs @@ -3,16 +3,17 @@ mod local_function; use crate::dot::Dot; -use crate::emit::{Emit, EmitContext}; +use crate::emit::{Emit, EmitContext, Section}; +use crate::encode::Encoder; use crate::error::Result; use crate::module::imports::ImportId; use crate::module::Module; use crate::parse::IndicesToIds; +use crate::passes::Used; use crate::ty::TypeId; use crate::ty::ValType; use failure::bail; use id_arena::{Arena, Id}; -use parity_wasm::elements; use rayon::prelude::*; use std::cmp; use std::fmt; @@ -186,6 +187,34 @@ impl ModuleFunctions { pub fn par_iter(&self) -> impl ParallelIterator { self.arena.par_iter().map(|(_, f)| f) } + + pub(crate) fn iter_used<'a>( + &'a self, + used: &'a Used, + ) -> impl Iterator + 'a { + self.iter().filter(move |f| used.funcs.contains(&f.id)) + } + + pub(crate) fn emit_func_section(&self, cx: &mut EmitContext) { + log::debug!("emit function section"); + let functions = used_local_functions(cx); + if functions.len() == 0 { + return; + } + let mut cx = cx.start_section(Section::Function); + cx.encoder.usize(functions.len()); + for (id, function, _size) in functions { + let index = cx.indices.get_type_index(function.ty); + cx.encoder.u32(index); + + // Assign an index to all local defined functions before we start + // translating them. While translating they may refer to future + // functions, so we'll need to have an index for it by that point. + // We're guaranteed the function section is emitted before the code + // section so we should be covered here. + cx.indices.push_func(id); + } + } } impl Module { @@ -196,6 +225,7 @@ impl Module { section: wasmparser::FunctionSectionReader, ids: &mut IndicesToIds, ) -> Result<()> { + log::debug!("parse function section"); for func in section { let ty = ids.get_type(func?)?; let id = self.funcs.arena.next_id(); @@ -213,6 +243,7 @@ impl Module { function_section_count: u32, indices: &mut IndicesToIds, ) -> Result<()> { + log::debug!("parse code section"); let amt = section.get_count(); if amt != function_section_count { bail!("code and function sections must have same number of entries") @@ -293,63 +324,62 @@ impl Module { } } -impl Emit for ModuleFunctions { - fn emit(&self, cx: &mut EmitContext) { - // Extract all local functions because imported ones were already - // emitted as part of the import sectin. Find the size of each local - // function. Sort imported functions in order so that we can get their - // index in the function index space. - let mut functions = Vec::new(); - for (id, f) in &self.arena { - if !cx.used.funcs.contains(&id) { - continue; - } - match &f.kind { - FunctionKind::Local(l) => functions.push((id, l, l.size())), - FunctionKind::Import(_) => {} - FunctionKind::Uninitialized(_) => unreachable!(), - } +fn used_local_functions<'a>(cx: &mut EmitContext<'a>) -> Vec<(FunctionId, &'a LocalFunction, u64)> { + // Extract all local functions because imported ones were already + // emitted as part of the import sectin. Find the size of each local + // function. Sort imported functions in order so that we can get their + // index in the function index space. + let mut functions = Vec::new(); + for (id, f) in &cx.module.funcs.arena { + if !cx.used.funcs.contains(&id) { + continue; } - - // Sort local functions from largest to smallest; we will emit them in - // this order. This helps load times, since wasm engines generally use - // the function as their level of granularity for parallelism. We want - // larger functions compiled before smaller ones because they will take - // longer to compile. - functions.sort_by_key(|(id, _, size)| (cmp::Reverse(*size), *id)); - - let mut funcs = Vec::with_capacity(functions.len()); - let mut codes = Vec::with_capacity(functions.len()); - - // Assign an index to all local defined functions before we start - // translating them. While translating they may refer to future - // functions, so we'll need to have an index for it by that point. - for (id, _func, _size) in functions.iter() { - cx.indices.push_func(*id); + match &f.kind { + FunctionKind::Local(l) => functions.push((id, l, l.size())), + FunctionKind::Import(_) => {} + FunctionKind::Uninitialized(_) => unreachable!(), } + } - for (_id, func, _size) in functions { - debug_assert!(cx.used.types.contains(&func.ty)); - let ty_idx = cx.indices.get_type_index(func.ty); - funcs.push(elements::Func::new(ty_idx)); + // Sort local functions from largest to smallest; we will emit them in + // this order. This helps load times, since wasm engines generally use + // the function as their level of granularity for parallelism. We want + // larger functions compiled before smaller ones because they will take + // longer to compile. + functions.sort_by_key(|(id, _, size)| (cmp::Reverse(*size), *id)); - let locals = func.emit_locals(&cx.module.locals, cx.indices); - let instructions = func.emit_instructions(cx.indices); - let instructions = elements::Instructions::new(instructions); - codes.push(elements::FuncBody::new(locals, instructions)); - } + functions +} - assert_eq!(funcs.len(), codes.len()); - if codes.is_empty() { +impl Emit for ModuleFunctions { + fn emit(&self, cx: &mut EmitContext) { + log::debug!("emit code section"); + let functions = used_local_functions(cx); + if functions.len() == 0 { return; } - let funcs = elements::FunctionSection::with_entries(funcs); - let funcs = elements::Section::Function(funcs); - cx.dst.sections_mut().push(funcs); + let mut cx = cx.start_section(Section::Code); + cx.encoder.usize(functions.len()); - let codes = elements::CodeSection::with_bodies(codes); - let codes = elements::Section::Code(codes); - cx.dst.sections_mut().push(codes); + // Functions can typically take awhile to serialize, so serialize + // everything in parallel. Afterwards we'll actually place all the + // functions together. + let bytes = functions + .into_par_iter() + .map(|(id, func, _size)| { + let mut wasm = Vec::new(); + let mut encoder = Encoder::new(&mut wasm); + let local_indices = func.emit_locals(cx.module, &mut encoder); + func.emit_instructions(cx.indices, &local_indices, &mut encoder); + (wasm, id, local_indices) + }) + .collect::>(); + + cx.indices.locals.reserve(bytes.len()); + for (wasm, id, local_indices) in bytes { + cx.encoder.bytes(&wasm); + cx.indices.locals.insert(id, local_indices); + } } } diff --git a/src/module/globals.rs b/src/module/globals.rs index 6949ee98..3ae10322 100644 --- a/src/module/globals.rs +++ b/src/module/globals.rs @@ -1,14 +1,13 @@ //! Globals within a wasm module. use crate::const_value::Const; -use crate::emit::{Emit, EmitContext}; +use crate::emit::{Emit, EmitContext, Section}; use crate::error::Result; use crate::module::imports::ImportId; use crate::module::Module; use crate::parse::IndicesToIds; use crate::ty::ValType; use id_arena::{Arena, Id}; -use parity_wasm::elements; /// The id of a global. pub type GlobalId = Id; @@ -46,6 +45,13 @@ impl Global { } } +impl Emit for Global { + fn emit(&self, cx: &mut EmitContext) { + Emit::emit(&self.ty, cx); + cx.encoder.byte(self.mutable as u8); + } +} + /// The set of globals in each function in this module. #[derive(Debug, Default)] pub struct ModuleGlobals { @@ -100,6 +106,7 @@ impl Module { section: wasmparser::GlobalSectionReader, ids: &mut IndicesToIds, ) -> Result<()> { + log::debug!("parse global section"); for g in section { let g = g?; let id = self.globals.add_local( @@ -115,30 +122,36 @@ impl Module { impl Emit for ModuleGlobals { fn emit(&self, cx: &mut EmitContext) { - let mut globals = Vec::with_capacity(cx.used.globals.len()); - - for (id, global) in &self.arena { - if !cx.used.globals.contains(&id) { - continue; + log::debug!("emit global section"); + fn get_local<'a>(cx: &EmitContext, global: &'a Global) -> Option<&'a Const> { + // If it's imported we already emitted this in the import section + if !cx.used.globals.contains(&global.id) { + return None; } - let init = match &global.kind { - GlobalKind::Import(_) => continue, // emitted in import section - GlobalKind::Local(init) => init, - }; - - cx.indices.push_global(id); + match &global.kind { + GlobalKind::Import(_) => None, + GlobalKind::Local(local) => Some(local), + } + } - let init_expr = init.emit_instructions(cx.indices); + let globals = self + .arena + .iter() + .filter(|(_id, global)| get_local(cx, global).is_some()) + .count(); - let ty = elements::GlobalType::new(global.ty.into(), global.mutable); - let global = elements::GlobalEntry::new(ty, init_expr); - globals.push(global); + if globals == 0 { + return; } - if !globals.is_empty() { - let globals = elements::GlobalSection::with_entries(globals); - let globals = elements::Section::Global(globals); - cx.dst.sections_mut().push(globals); + let mut cx = cx.start_section(Section::Global); + cx.encoder.usize(globals); + for (id, global) in self.arena.iter() { + if let Some(local) = get_local(&cx, global) { + cx.indices.push_global(id); + global.emit(&mut cx); + local.emit(&mut cx); + } } } } diff --git a/src/module/imports.rs b/src/module/imports.rs index 3cf7ff37..fa32866e 100644 --- a/src/module/imports.rs +++ b/src/module/imports.rs @@ -1,7 +1,7 @@ //! A wasm module's imports. use crate::arena_set::ArenaSet; -use crate::emit::{Emit, EmitContext}; +use crate::emit::{Emit, EmitContext, Section}; use crate::error::Result; use crate::module::functions::FunctionId; use crate::module::globals::GlobalId; @@ -11,7 +11,6 @@ use crate::module::Module; use crate::parse::IndicesToIds; use crate::ty::ValType; use id_arena::Id; -use parity_wasm::elements; /// The id of an import. pub type ImportId = Id; @@ -70,6 +69,7 @@ impl Module { section: wasmparser::ImportSectionReader, ids: &mut IndicesToIds, ) -> Result<()> { + log::debug!("parse import section"); for entry in section { let entry = entry?; let import = self.imports.arena.next_id(); @@ -124,6 +124,7 @@ impl Module { impl Emit for ModuleImports { fn emit(&self, cx: &mut EmitContext) { + log::debug!("emit import section"); let mut imports = Vec::new(); for (_id, import) in self.arena.iter() { @@ -136,42 +137,42 @@ impl Emit for ModuleImports { if !used { continue; } + imports.push(import); + } + if imports.len() == 0 { + return; + } - let external = match import.kind { + let mut cx = cx.start_section(Section::Import); + cx.encoder.usize(imports.len()); + + for import in imports { + cx.encoder.str(&import.module); + cx.encoder.str(&import.name); + match import.kind { ImportKind::Function(id) => { + cx.encoder.byte(0x00); cx.indices.push_func(id); let ty = cx.module.funcs.get(id).ty(); - elements::External::Function(cx.indices.get_type_index(ty)) + let idx = cx.indices.get_type_index(ty); + cx.encoder.u32(idx); } - ImportKind::Global(id) => { - cx.indices.push_global(id); - let global = cx.module.globals.get(id); - let global = elements::GlobalType::new(global.ty.into(), global.mutable); - elements::External::Global(global) + ImportKind::Table(id) => { + cx.encoder.byte(0x01); + cx.indices.push_table(id); + cx.module.tables.get(id).emit(&mut cx); } ImportKind::Memory(id) => { + cx.encoder.byte(0x02); cx.indices.push_memory(id); - let memory = cx.module.memories.get(id); - let memory = - elements::MemoryType::new(memory.initial, memory.maximum, memory.shared); - elements::External::Memory(memory) + cx.module.memories.get(id).emit(&mut cx); } - ImportKind::Table(id) => { - cx.indices.push_table(id); - let table = cx.module.tables.get(id); - let table = elements::TableType::new(table.initial, table.maximum); - elements::External::Table(table) + ImportKind::Global(id) => { + cx.encoder.byte(0x03); + cx.indices.push_global(id); + cx.module.globals.get(id).emit(&mut cx); } - }; - let entry = - elements::ImportEntry::new(import.module.clone(), import.name.clone(), external); - imports.push(entry); - } - - if !imports.is_empty() { - let imports = elements::ImportSection::with_entries(imports); - let imports = elements::Section::Import(imports); - cx.dst.sections_mut().push(imports); + } } } } diff --git a/src/module/memories.rs b/src/module/memories.rs index 4c42a881..37317be9 100644 --- a/src/module/memories.rs +++ b/src/module/memories.rs @@ -1,7 +1,7 @@ //! Memories used in a wasm module. use crate::const_value::Const; -use crate::emit::{Emit, EmitContext, IdsToIndices}; +use crate::emit::{Emit, EmitContext, Section}; use crate::error::Result; use crate::ir::Value; use crate::module::globals::GlobalId; @@ -9,7 +9,6 @@ use crate::module::imports::ImportId; use crate::module::Module; use crate::parse::IndicesToIds; use id_arena::{Arena, Id}; -use parity_wasm::elements; /// The id of a memory. pub type MemoryId = Id; @@ -47,31 +46,34 @@ impl Memory { self.id } - pub(crate) fn emit_data<'a>( - &'a self, - indices: &'a IdsToIndices, - ) -> impl Iterator + 'a { - let index = indices.get_memory_index(self.id); - let absolute = self.data.absolute.iter().map(move |(pos, data)| { - elements::DataSegment::new( - index, - Some(Const::Value(Value::I32(*pos as i32)).emit_instructions(indices)), - data.to_vec(), - false, - ) - }); - let relative = self.data.relative.iter().map(move |(id, data)| { - elements::DataSegment::new( - index, - Some(Const::Global(*id).emit_instructions(indices)), - data.to_vec(), - false, - ) - }); + pub(crate) fn emit_data(&self) -> impl Iterator { + let absolute = self + .data + .absolute + .iter() + .map(move |(pos, data)| (Const::Value(Value::I32(*pos as i32)), &data[..])); + let relative = self + .data + .relative + .iter() + .map(move |(id, data)| (Const::Global(*id), &data[..])); absolute.chain(relative) } } +impl Emit for Memory { + fn emit(&self, cx: &mut EmitContext) { + if let Some(max) = self.maximum { + cx.encoder.byte(if self.shared { 0x03 } else { 0x01 }); + cx.encoder.u32(self.initial); + cx.encoder.u32(max); + } else { + cx.encoder.byte(0x00); + cx.encoder.u32(self.initial); + } + } +} + /// The set of memories in this module. #[derive(Debug, Default)] pub struct ModuleMemories { @@ -139,6 +141,7 @@ impl Module { section: wasmparser::MemorySectionReader, ids: &mut IndicesToIds, ) -> Result<()> { + log::debug!("parse memory section"); for m in section { let m = m?; let id = self @@ -152,25 +155,29 @@ impl Module { impl Emit for ModuleMemories { fn emit(&self, cx: &mut EmitContext) { - let mut memories = Vec::with_capacity(cx.used.memories.len()); - - for (id, mem) in &self.arena { - if !cx.used.memories.contains(&id) { - continue; - } - if mem.import.is_some() { - continue; // already emitted in the import section - } - - cx.indices.push_memory(id); - let memory = elements::MemoryType::new(mem.initial, mem.maximum, mem.shared); - memories.push(memory); + log::debug!("emit memory section"); + let emitted = |cx: &EmitContext, memory: &Memory| { + // If it's imported we already emitted this in the import section + cx.used.memories.contains(&memory.id) && memory.import.is_none() + }; + + let memories = self + .arena + .iter() + .filter(|(_id, memory)| emitted(cx, memory)) + .count(); + + if memories == 0 { + return; } - if !memories.is_empty() { - let memories = elements::MemorySection::with_entries(memories); - let memories = elements::Section::Memory(memories); - cx.dst.sections_mut().push(memories); + let mut cx = cx.start_section(Section::Memory); + cx.encoder.usize(memories); + for (id, memory) in self.arena.iter() { + if emitted(&cx, memory) { + cx.indices.push_memory(id); + memory.emit(&mut cx); + } } } } diff --git a/src/module/mod.rs b/src/module/mod.rs index 33bfce3e..f384fa3f 100644 --- a/src/module/mod.rs +++ b/src/module/mod.rs @@ -12,7 +12,8 @@ pub mod producers; pub mod tables; pub mod types; -use crate::emit::{Emit, EmitContext, IdsToIndices}; +use crate::emit::{Emit, EmitContext, IdsToIndices, Section}; +use crate::encode::Encoder; use crate::error::Result; use crate::module::data::ModuleData; use crate::module::elements::ModuleElements; @@ -28,7 +29,6 @@ use crate::module::types::ModuleTypes; use crate::parse::IndicesToIds; use crate::passes; use failure::{bail, ResultExt}; -use parity_wasm::elements as parity; use std::fs; use std::path::Path; @@ -81,67 +81,56 @@ impl Module { let section = parser.read()?; match section.code { wasmparser::SectionCode::Data => { - log::debug!("parsing data section"); let reader = section.get_data_section_reader()?; ret.parse_data(reader, &mut indices) .context("failed to parse data section")?; } wasmparser::SectionCode::Type => { - log::debug!("parsing type section"); let reader = section.get_type_section_reader()?; ret.parse_types(reader, &mut indices) .context("failed to parse type section")?; } wasmparser::SectionCode::Import => { - log::debug!("parsing import section"); let reader = section.get_import_section_reader()?; ret.parse_imports(reader, &mut indices) .context("failed to parse import section")?; } wasmparser::SectionCode::Table => { - log::debug!("parsing table section"); let reader = section.get_table_section_reader()?; ret.parse_tables(reader, &mut indices) .context("failed to parse table section")?; } wasmparser::SectionCode::Memory => { - log::debug!("parsing memory section"); let reader = section.get_memory_section_reader()?; ret.parse_memories(reader, &mut indices) .context("failed to parse memory section")?; } wasmparser::SectionCode::Global => { - log::debug!("parsing global section"); let reader = section.get_global_section_reader()?; ret.parse_globals(reader, &mut indices) .context("failed to parse global section")?; } wasmparser::SectionCode::Export => { - log::debug!("parsing export section"); let reader = section.get_export_section_reader()?; ret.parse_exports(reader, &mut indices) .context("failed to parse export section")?; } wasmparser::SectionCode::Element => { - log::debug!("parsing element section"); let reader = section.get_element_section_reader()?; ret.parse_elements(reader, &mut indices) .context("failed to parse element section")?; } wasmparser::SectionCode::Start => { - log::debug!("parsing start section"); let idx = section.get_start_section_content()?; ret.start = Some(indices.get_func(idx)?); } wasmparser::SectionCode::Function => { - log::debug!("parsing function section"); let reader = section.get_function_section_reader()?; function_section_size = Some(reader.get_count()); ret.declare_local_functions(reader, &mut indices) .context("failed to parse function section")?; } wasmparser::SectionCode::Code => { - log::debug!("parsing code section"); let function_section_size = match function_section_size.take() { Some(i) => i, None => bail!("cannot have a code section without function section"), @@ -151,7 +140,6 @@ impl Module { .context("failed to parse code section")?; } wasmparser::SectionCode::Custom { name, kind: _ } => { - log::debug!("parsing custom section `{}`", name); let result = match name { "producers" => { let reader = section.get_binary_reader(); @@ -162,6 +150,7 @@ impl Module { .map_err(failure::Error::from) .and_then(|r| ret.parse_name_section(r, &indices)), _ => { + log::debug!("parsing custom section `{}`", name); let mut reader = section.get_binary_reader(); let len = reader.bytes_remaining(); let payload = reader.read_bytes(len)?; @@ -187,7 +176,6 @@ impl Module { .add_processed_by("walrus", env!("CARGO_PKG_VERSION")); // TODO: probably run this in a different location - log::debug!("validating module"); crate::passes::validate::run(&ret)?; log::debug!("parse complete"); @@ -206,67 +194,46 @@ impl Module { /// Emit this module into an in-memory wasm buffer. pub fn emit_wasm(&self) -> Result> { + log::debug!("start emit"); let roots = self.exports.iter(); let used = passes::Used::new(self, roots.map(|e| e.id())); let indices = &mut IdsToIndices::default(); - let mut module = parity::Module::new(Vec::new()); + let mut wasm = Vec::new(); + wasm.extend(&[0x00, 0x61, 0x73, 0x6d]); // magic + wasm.extend(&[0x01, 0x00, 0x00, 0x00]); // version let mut cx = EmitContext { module: self, indices, used: &used, - dst: &mut module, + encoder: Encoder::new(&mut wasm), }; self.types.emit(&mut cx); self.imports.emit(&mut cx); + self.funcs.emit_func_section(&mut cx); self.tables.emit(&mut cx); self.memories.emit(&mut cx); self.globals.emit(&mut cx); - self.funcs.emit(&mut cx); self.exports.emit(&mut cx); if let Some(start) = self.start { let idx = cx.indices.get_func_index(start); - cx.dst.sections_mut().push(parity::Section::Start(idx)); + cx.start_section(Section::Start).encoder.u32(idx); } self.elements.emit(&mut cx); + self.data.emit_data_count(&mut cx); + self.funcs.emit(&mut cx); self.data.emit(&mut cx); - emit_module_name_section(&mut cx); - emit_function_name_section(&mut cx); - emit_local_name_section(&mut cx); - - let producers = parity::CustomSection::new("producers".to_string(), self.producers.emit()); - module - .sections_mut() - .push(parity::Section::Custom(producers)); - + emit_name_section(&mut cx); + self.producers.emit(&mut cx); for section in self.custom.iter() { - let section = parity::CustomSection::new(section.name.clone(), section.value.clone()); - module.sections_mut().push(parity::Section::Custom(section)); + log::debug!("emitting custom section {}", section.name); + cx.custom_section(§ion.name).encoder.raw(§ion.value); } - module.sections_mut().sort_by_key(|s| match s { - parity::Section::Type(_) => 1, - parity::Section::Import(_) => 2, - parity::Section::Function(_) => 3, - parity::Section::Table(_) => 4, - parity::Section::Memory(_) => 5, - parity::Section::Global(_) => 6, - parity::Section::Export(_) => 7, - parity::Section::Start(_) => 8, - parity::Section::Element(_) => 9, - parity::Section::Code(_) => 10, - parity::Section::Data(_) => 11, - - parity::Section::Custom(_) - | parity::Section::Unparsed { .. } - | parity::Section::Reloc(_) - | parity::Section::Name(_) => 12, - }); - let buffer = - parity::serialize(module).context("failed to serialize wasm module to file")?; - Ok(buffer) + log::debug!("emission finished"); + Ok(wasm) } /// Returns an iterator over all functions in this module @@ -279,6 +246,7 @@ impl Module { names: wasmparser::NameSectionReader, indices: &IndicesToIds, ) -> Result<()> { + log::debug!("parse name section"); for name in names { match name? { wasmparser::Name::Module(m) => { @@ -311,56 +279,69 @@ impl Module { } } -fn emit_module_name_section(cx: &mut EmitContext) { - let name = match &cx.module.name { - Some(name) => name, - None => return, - }; - let section = parity::ModuleNameSection::new(name.clone()); - let section = parity::NameSection::Module(section); - cx.dst.sections_mut().push(parity::Section::Name(section)); -} +fn emit_name_section(cx: &mut EmitContext) { + log::debug!("emit name section"); + let mut funcs = cx + .module + .funcs + .iter_used(cx.used) + .filter_map(|func| func.name.as_ref().map(|name| (func, name))) + .map(|(func, name)| (cx.indices.get_func_index(func.id()), name)) + .collect::>(); + funcs.sort_by_key(|p| p.0); // sort by index -fn emit_function_name_section(cx: &mut EmitContext) { - let mut map = parity::NameMap::default(); - for id in cx.used.funcs.iter() { - let name = match &cx.module.funcs.get(*id).name { - Some(name) => name, - None => continue, - }; - map.insert(cx.indices.get_func_index(*id), name.clone()); - } - if map.len() > 0 { - let mut section = parity::FunctionNameSection::default(); - *section.names_mut() = map; - let section = parity::NameSection::Function(section); - cx.dst.sections_mut().push(parity::Section::Name(section)); + let mut locals = cx + .module + .funcs + .iter_used(cx.used) + .filter_map(|func| cx.used.locals.get(&func.id()).map(|l| (func, l))) + .filter_map(|(func, locals)| { + let local_names = locals + .iter() + .filter_map(|id| { + let name = cx.module.locals.get(*id).name.as_ref()?; + let index = cx.indices.locals.get(&func.id())?.get(id)?; + Some((*index, name)) + }) + .collect::>(); + if local_names.len() == 0 { + None + } else { + Some((cx.indices.get_func_index(func.id()), local_names)) + } + }) + .collect::>(); + locals.sort_by_key(|p| p.0); // sort by index + + if cx.module.name.is_none() && funcs.len() == 0 && locals.len() == 0 { + return; } -} -fn emit_local_name_section(cx: &mut EmitContext) { - let mut map = parity::IndexMap::default(); - for id in cx.used.funcs.iter() { - let mut locals = parity::NameMap::default(); + let mut cx = cx.custom_section("name"); + if let Some(name) = &cx.module.name { + cx.subsection(0).encoder.str(name); + } - if let Some(set) = cx.used.locals.get(id) { - for local_id in set { - let name = match &cx.module.locals.get(*local_id).name { - Some(name) => name, - None => continue, - }; - locals.insert(cx.indices.get_local_index(*local_id), name.clone()); - } + if funcs.len() > 0 { + let mut cx = cx.subsection(1); + cx.encoder.usize(funcs.len()); + for (index, name) in funcs { + cx.encoder.u32(index); + cx.encoder.str(name); } + } - if locals.len() > 0 { - map.insert(cx.indices.get_func_index(*id), locals); + if locals.len() > 0 { + let mut cx = cx.subsection(2); + cx.encoder.usize(locals.len()); + for (index, mut map) in locals { + cx.encoder.u32(index); + cx.encoder.usize(map.len()); + map.sort_by_key(|p| p.0); // sort by index + for (index, name) in map { + cx.encoder.u32(index); + cx.encoder.str(name); + } } } - if map.len() > 0 { - let mut section = parity::LocalNameSection::default(); - *section.local_names_mut() = map; - let section = parity::NameSection::Local(section); - cx.dst.sections_mut().push(parity::Section::Name(section)); - } } diff --git a/src/module/producers.rs b/src/module/producers.rs index 605297dd..5fd1973a 100644 --- a/src/module/producers.rs +++ b/src/module/producers.rs @@ -3,10 +3,10 @@ //! Specified upstream at //! https://github.com/WebAssembly/tool-conventions/blob/master/ProducersSection.md +use crate::emit::{Emit, EmitContext}; use crate::error::Result; use crate::module::Module; use failure::bail; -use parity_wasm::elements::*; /// Representation of the wasm custom section `producers` #[derive(Debug, Default)] @@ -27,29 +27,6 @@ struct Value { } impl ModuleProducers { - /// Serialize this producers section into its binary format - pub fn emit(&self) -> Vec { - // re-serialize these fields back into the custom section - let mut dst = Vec::new(); - VarUint32::from(self.fields.len() as u32) - .serialize(&mut dst) - .unwrap(); - - for field in self.fields.iter() { - field.name.clone().serialize(&mut dst).unwrap(); - VarUint32::from(field.values.len() as u32) - .serialize(&mut dst) - .unwrap(); - - for value in field.values.iter() { - value.name.clone().serialize(&mut dst).unwrap(); - value.version.clone().serialize(&mut dst).unwrap(); - } - } - - dst - } - /// Adds a new `language` (versioned) to the producers section pub fn add_language(&mut self, language: &str, version: &str) { self.field("language", language, version); @@ -97,6 +74,7 @@ impl Module { &mut self, mut data: wasmparser::BinaryReader, ) -> Result<()> { + log::debug!("parse producers section"); for _ in 0..data.read_var_u32()? { let name = data.read_string()?.to_string(); let cnt = data.read_var_u32()?; @@ -116,3 +94,24 @@ impl Module { Ok(()) } } + +impl Emit for ModuleProducers { + fn emit(&self, cx: &mut EmitContext) { + log::debug!("emit producers section"); + cx.custom_section("producers").list(&self.fields); + } +} + +impl Emit for Field { + fn emit(&self, cx: &mut EmitContext) { + cx.encoder.str(&self.name); + cx.list(&self.values); + } +} + +impl Emit for Value { + fn emit(&self, cx: &mut EmitContext) { + cx.encoder.str(&self.name); + cx.encoder.str(&self.version); + } +} diff --git a/src/module/tables.rs b/src/module/tables.rs index 66e1f627..df054da5 100644 --- a/src/module/tables.rs +++ b/src/module/tables.rs @@ -1,6 +1,6 @@ //! Tables within a wasm module. -use crate::emit::{Emit, EmitContext}; +use crate::emit::{Emit, EmitContext, Section}; use crate::error::Result; use crate::module::functions::FunctionId; use crate::module::globals::GlobalId; @@ -8,7 +8,6 @@ use crate::module::imports::ImportId; use crate::module::Module; use crate::parse::IndicesToIds; use id_arena::{Arena, Id}; -use parity_wasm::elements; /// The id of a table. pub type TableId = Id; @@ -55,6 +54,21 @@ impl Table { } } +impl Emit for Table { + fn emit(&self, cx: &mut EmitContext) { + match self.kind { + TableKind::Function(_) => { + cx.encoder.byte(0x70); // the `anyfunc` type + } + } + cx.encoder.byte(self.maximum.is_some() as u8); + cx.encoder.u32(self.initial); + if let Some(m) = self.maximum { + cx.encoder.u32(m); + } + } +} + /// The set of tables in this module. #[derive(Debug, Default)] pub struct ModuleTables { @@ -119,6 +133,7 @@ impl Module { section: wasmparser::TableSectionReader, ids: &mut IndicesToIds, ) -> Result<()> { + log::debug!("parse table section"); for t in section { let t = t?; let id = self.tables.add_local( @@ -137,25 +152,29 @@ impl Module { impl Emit for ModuleTables { fn emit(&self, cx: &mut EmitContext) { - let mut tables = Vec::with_capacity(cx.used.tables.len()); - - for (id, table) in &self.arena { - if !cx.used.tables.contains(&id) { - continue; - } - if table.import.is_some() { - continue; // already emitted in the imports section - } - - cx.indices.push_table(id); - let table = elements::TableType::new(table.initial, table.maximum); - tables.push(table); + log::debug!("emit table section"); + let emitted = |cx: &EmitContext, table: &Table| { + // If it's imported we already emitted this in the import section + cx.used.tables.contains(&table.id) && table.import.is_none() + }; + + let tables = self + .arena + .iter() + .filter(|(_id, table)| emitted(cx, table)) + .count(); + + if tables == 0 { + return; } - if !tables.is_empty() { - let tables = elements::TableSection::with_entries(tables); - let tables = elements::Section::Table(tables); - cx.dst.sections_mut().push(tables); + let mut cx = cx.start_section(Section::Table); + cx.encoder.usize(tables); + for (id, table) in self.arena.iter() { + if emitted(&cx, table) { + cx.indices.push_table(id); + table.emit(&mut cx); + } } } } diff --git a/src/module/types.rs b/src/module/types.rs index 95a3a564..29bfd024 100644 --- a/src/module/types.rs +++ b/src/module/types.rs @@ -1,12 +1,11 @@ //! Types in a wasm module. use crate::arena_set::ArenaSet; -use crate::emit::{Emit, EmitContext}; +use crate::emit::{Emit, EmitContext, Section}; use crate::error::Result; use crate::module::Module; use crate::parse::IndicesToIds; use crate::ty::{Type, TypeId, ValType}; -use parity_wasm::elements; /// The set of de-duplicated types within a module. #[derive(Debug, Default)] @@ -38,6 +37,7 @@ impl Module { section: wasmparser::TypeSectionReader, ids: &mut IndicesToIds, ) -> Result<()> { + log::debug!("parsing type section"); for ty in section { let fun_ty = ty?; let id = self.types.arena.next_id(); @@ -63,36 +63,20 @@ impl Module { impl Emit for ModuleTypes { fn emit(&self, cx: &mut EmitContext) { - let mut types = Vec::with_capacity(cx.used.types.len()); + log::debug!("emitting type section"); + let ntypes = cx.used.types.len(); + if ntypes == 0 { + return; + } + let mut cx = cx.start_section(Section::Type); + cx.encoder.usize(ntypes); for (id, ty) in self.arena.iter() { if !cx.used.types.contains(&id) { continue; } cx.indices.push_type(id); - - let params: Vec = - ty.params().iter().cloned().map(Into::into).collect(); - - let ret: Vec = - ty.results().iter().cloned().map(Into::into).collect(); - assert!( - ret.len() <= 1, - "multiple return values not supported yet; \ - write a legalization pass to rewrite them into single value returns \ - and store extra return values in globals." - ); - let ret = if ret.is_empty() { None } else { Some(ret[0]) }; - - types.push(elements::Type::Function(elements::FunctionType::new( - params, ret, - ))); - } - - if !types.is_empty() { - let types = elements::TypeSection::with_types(types); - let types = elements::Section::Type(types); - cx.dst.sections_mut().push(types); + ty.emit(&mut cx); } } } diff --git a/src/passes/used.rs b/src/passes/used.rs index 8f065156..c88672fd 100644 --- a/src/passes/used.rs +++ b/src/passes/used.rs @@ -42,6 +42,7 @@ impl Used { where R: IntoIterator, { + log::debug!("starting to calculate used set"); let mut used = Used::default(); let mut stack = UsedStack { used: &mut used, diff --git a/src/passes/validate.rs b/src/passes/validate.rs index 6446a43b..5ae16a2c 100644 --- a/src/passes/validate.rs +++ b/src/passes/validate.rs @@ -18,6 +18,7 @@ use std::collections::HashSet; /// Validate a wasm module, returning an error if it fails to validate. pub fn run(module: &Module) -> Result<()> { + log::debug!("validating module"); // TODO: should a config option be added to lift these restrictions? They're // only here for the spec tests... if module.tables.iter().count() > 1 { @@ -85,6 +86,9 @@ pub fn run(module: &Module) -> Result<()> { } fn validate_memory(m: &Memory) -> Result<()> { + if m.shared && m.maximum.is_none() { + bail!("shared memories must have a maximum size"); + } validate_limits(m.initial, m.maximum, u32::from(u16::max_value()) + 1) .context("when validating a memory")?; Ok(()) diff --git a/src/ty.rs b/src/ty.rs index 82468921..5eed32b4 100644 --- a/src/ty.rs +++ b/src/ty.rs @@ -1,8 +1,9 @@ //! WebAssembly function and value types. +use crate::emit::{Emit, EmitContext}; +use crate::encode::Encoder; use crate::error::Result; use id_arena::Id; -use parity_wasm::elements; use std::fmt; use std::hash; @@ -66,6 +67,14 @@ impl Type { } } +impl Emit for Type { + fn emit(&self, cx: &mut EmitContext) { + cx.encoder.byte(0x60); + cx.list(self.params.iter()); + cx.list(self.results.iter()); + } +} + /// A value type. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum ValType { @@ -81,30 +90,6 @@ pub enum ValType { V128, } -impl<'a> From<&'a elements::ValueType> for ValType { - fn from(x: &'a elements::ValueType) -> ValType { - match x { - elements::ValueType::I32 => ValType::I32, - elements::ValueType::I64 => ValType::I64, - elements::ValueType::F32 => ValType::F32, - elements::ValueType::F64 => ValType::F64, - elements::ValueType::V128 => ValType::V128, - } - } -} - -impl From for elements::ValueType { - fn from(x: ValType) -> elements::ValueType { - match x { - ValType::I32 => elements::ValueType::I32, - ValType::I64 => elements::ValueType::I64, - ValType::F32 => elements::ValueType::F32, - ValType::F64 => elements::ValueType::F64, - ValType::V128 => elements::ValueType::V128, - } - } -} - impl ValType { /// Construct a vector of `ValType`s from a parity-wasm `BlockType`. pub fn from_block_ty(block_ty: wasmparser::Type) -> Result> { @@ -124,6 +109,16 @@ impl ValType { _ => failure::bail!("not a value type"), } } + + pub(crate) fn emit(&self, encoder: &mut Encoder) { + match self { + ValType::I32 => encoder.byte(0x7f), + ValType::I64 => encoder.byte(0x7e), + ValType::F32 => encoder.byte(0x7d), + ValType::F64 => encoder.byte(0x7c), + ValType::V128 => encoder.byte(0x7b), + } + } } impl fmt::Display for ValType { @@ -141,3 +136,9 @@ impl fmt::Display for ValType { ) } } + +impl Emit for ValType { + fn emit(&self, cx: &mut EmitContext) { + self.emit(&mut cx.encoder); + } +} From 125d2c0d2903a88d3ab3447de76680153f85b17c Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 25 Jan 2019 12:43:18 -0800 Subject: [PATCH 2/4] Avoid collecting used locals twice Turns out we forgot that `Used` was already collecting used locals, so there's no need to do it again in `emit_locals`! --- src/module/functions/local_function/mod.rs | 65 ++++++++----------- src/module/functions/mod.rs | 2 +- .../tests/round_trip/used-local-in-local.wat | 4 +- 3 files changed, 29 insertions(+), 42 deletions(-) diff --git a/src/module/functions/local_function/mod.rs b/src/module/functions/local_function/mod.rs index 11549661..f1d8bf2e 100644 --- a/src/module/functions/local_function/mod.rs +++ b/src/module/functions/local_function/mod.rs @@ -12,11 +12,11 @@ use crate::encode::Encoder; use crate::error::{ErrorKind, Result}; use crate::ir::matcher::{ConstMatcher, Matcher}; use crate::ir::*; -use crate::module::locals::ModuleLocals; use crate::module::Module; use crate::parse::IndicesToIds; use crate::ty::{TypeId, ValType}; use failure::{bail, Fail, ResultExt}; +use crate::passes::Used; use id_arena::{Arena, Id}; use std::collections::{BTreeMap, HashSet, HashMap}; use std::fmt; @@ -145,44 +145,31 @@ impl LocalFunction { } /// Emit this function's compact locals declarations. - pub(crate) fn emit_locals(&self, module: &Module, encoder: &mut Encoder) -> HashMap { - struct LocalsVisitor<'a> { - func: &'a LocalFunction, - locals: &'a ModuleLocals, - seen: HashSet, - // NB: Use `BTreeMap` to make compilation deterministic - ty_to_locals: BTreeMap>, - args: HashSet, - } - - impl<'expr> Visitor<'expr> for LocalsVisitor<'expr> { - fn local_function(&self) -> &'expr LocalFunction { - self.func - } - - fn visit_local_id(&mut self, &id: &LocalId) { - if !self.seen.insert(id) { - return; // already seen? no more work to do - } - if self.args.contains(&id) { - return; // is this an argument? we'll handle that separately - } - let ty = self.locals.get(id).ty(); - self.ty_to_locals.entry(ty).or_insert(Vec::new()).push(id); + pub(crate) fn emit_locals(&self, id: FunctionId, module: &Module, used: &Used, encoder: &mut Encoder) -> HashMap { + let mut used_locals = Vec::new(); + if let Some(locals) = used.locals.get(&id) { + used_locals = locals.iter().cloned().collect(); + // Sort to ensure we assign local indexes deterministically, and + // everything is distinct so we can use a faster unstable sort. + used_locals.sort_unstable(); + } + + // NB: Use `BTreeMap` to make compilation deterministic by emitting + // types in the same order + let mut ty_to_locals = BTreeMap::new(); + let args = self.args.iter().cloned().collect::>(); + + // Partition all locals by their type as we'll create at most one entry + // for each type. Skip all arguments to the function because they're + // handled separately. + for local in used_locals.iter() { + if !args.contains(local) { + let ty = module.locals.get(*local).ty(); + ty_to_locals.entry(ty).or_insert_with(Vec::new).push(*local); } } - // Collect all used locals along with their types - let mut v = LocalsVisitor { - func: self, - locals: &module.locals, - seen: HashSet::new(), - ty_to_locals: BTreeMap::new(), - args: self.args.iter().cloned().collect(), - }; - self.entry_block().visit(&mut v); - - let mut local_map = HashMap::with_capacity(v.seen.len()); + let mut local_map = HashMap::with_capacity(used_locals.len()); // Allocate an index to all the function arguments, as these are all // unconditionally used and are implicit locals in wasm. @@ -193,7 +180,7 @@ impl LocalFunction { } // Assign an index to all remaining locals - for (_, locals) in v.ty_to_locals.iter() { + for (_, locals) in ty_to_locals.iter() { for l in locals { local_map.insert(*l, idx); idx += 1; @@ -201,8 +188,8 @@ impl LocalFunction { } // Use our type map to emit a compact representation of all locals now - encoder.usize(v.ty_to_locals.len()); - for (ty, locals) in v.ty_to_locals.iter() { + encoder.usize(ty_to_locals.len()); + for (ty, locals) in ty_to_locals.iter() { encoder.usize(locals.len()); ty.emit(encoder); } diff --git a/src/module/functions/mod.rs b/src/module/functions/mod.rs index da3646a0..292fce7b 100644 --- a/src/module/functions/mod.rs +++ b/src/module/functions/mod.rs @@ -370,7 +370,7 @@ impl Emit for ModuleFunctions { .map(|(id, func, _size)| { let mut wasm = Vec::new(); let mut encoder = Encoder::new(&mut wasm); - let local_indices = func.emit_locals(cx.module, &mut encoder); + let local_indices = func.emit_locals(id, cx.module, cx.used, &mut encoder); func.emit_instructions(cx.indices, &local_indices, &mut encoder); (wasm, id, local_indices) }) diff --git a/walrus-tests/tests/round_trip/used-local-in-local.wat b/walrus-tests/tests/round_trip/used-local-in-local.wat index 26130aea..662fd7e3 100644 --- a/walrus-tests/tests/round_trip/used-local-in-local.wat +++ b/walrus-tests/tests/round_trip/used-local-in-local.wat @@ -9,5 +9,5 @@ ;; CHECK: (func $foo ;; NEXT: (local i32 i32) -;; NEXT: local.get 1 -;; NEXT: local.set 0) +;; NEXT: local.get 0 +;; NEXT: local.set 1) From 44d04ba5fcaebb0d8f238c701ba23d5af520f56b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 28 Jan 2019 08:47:52 -0800 Subject: [PATCH 3/4] Fix a... spuriously (?) failing test --- walrus-tests/tests/round_trip/used-local-in-local.wat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/walrus-tests/tests/round_trip/used-local-in-local.wat b/walrus-tests/tests/round_trip/used-local-in-local.wat index 662fd7e3..26130aea 100644 --- a/walrus-tests/tests/round_trip/used-local-in-local.wat +++ b/walrus-tests/tests/round_trip/used-local-in-local.wat @@ -9,5 +9,5 @@ ;; CHECK: (func $foo ;; NEXT: (local i32 i32) -;; NEXT: local.get 0 -;; NEXT: local.set 1) +;; NEXT: local.get 1 +;; NEXT: local.set 0) From e16a6c1bb4a45ac618bf0265f605dff31997d864 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 28 Jan 2019 08:50:31 -0800 Subject: [PATCH 4/4] Use the `leb128` crate where we can --- Cargo.toml | 1 + src/encode.rs | 26 +++++++------------------- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bebc5635..2fae61ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ version = "0.1.0" [dependencies] failure = "0.1.2" id-arena = { version = "2.1.0", features = ['rayon'] } +leb128 = "0.2.3" log = "0.4" rayon = "1.0.3" wasmparser = "0.26" diff --git a/src/encode.rs b/src/encode.rs index d52ef065..f2c96b2c 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -28,30 +28,16 @@ impl<'data> Encoder<'data> { self.u32(amt as u32) } - pub fn u32(&mut self, mut amt: u32) { - while amt >= (1 << 7) { - self.byte((amt as u8) & 0x7f | 0x80); - amt >>= 7; - } - self.byte(amt as u8); + pub fn u32(&mut self, amt: u32) { + leb128::write::unsigned(&mut self.dst, amt.into()).unwrap(); } pub fn i32(&mut self, val: i32) { - self.i64(val as i64); + leb128::write::signed(&mut self.dst, val.into()).unwrap(); } - pub fn i64(&mut self, mut val: i64) { - let mut done = false; - while !done { - let mut byte = (val as i8) & 0x7f; - val >>= 7; - if (val == 0 && (byte & 0x40 == 0)) || (val == -1 && (byte & 0x40 != 0)) { - done = true; - } else { - byte |= 0x80u8 as i8; - } - self.byte(byte as u8); - } + pub fn i64(&mut self, val: i64) { + leb128::write::signed(&mut self.dst, val).unwrap(); } pub fn f32(&mut self, val: f32) { @@ -92,6 +78,8 @@ impl<'data> Encoder<'data> { self.dst.len() } + // TODO: don't write this code here, use upstream once + // gimli-rs/leb128#6 is implemented pub fn u32_at(&mut self, pos: usize, mut amt: u32) { for i in 0..MAX_U32_LENGTH { let flag = if i == MAX_U32_LENGTH - 1 { 0 } else { 0x80 };