Skip to content

Commit

Permalink
Merge pull request #30 from alexcrichton/manually-serialize
Browse files Browse the repository at this point in the history
Switch wasm emission to a custom encoder
  • Loading branch information
alexcrichton authored Jan 28, 2019
2 parents 1fc4423 + e16a6c1 commit ce985f8
Show file tree
Hide file tree
Showing 23 changed files with 1,078 additions and 852 deletions.
5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@ version = "0.1.0"
[dependencies]
failure = "0.1.2"
id-arena = { version = "2.1.0", features = ['rayon'] }
parity-wasm = "0.35.6"
petgraph = "0.4.13"
leb128 = "0.2.3"
log = "0.4"
wasmparser = "0.26"
rayon = "1.0.3"
wasmparser = "0.26"

[dependencies.walrus-derive]
path = "./walrus-derive"
Expand Down
9 changes: 9 additions & 0 deletions examples/round-trip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// A small example which is primarily used to help benchmark walrus right now.

fn main() {
env_logger::init();
let a = std::env::args().nth(1).unwrap();
let m = walrus::module::Module::from_file(&a).unwrap();
m.emit_wasm().unwrap();
}

27 changes: 13 additions & 14 deletions src/const_value.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
//! Handling wasm constant values

use crate::emit::IdsToIndices;
use crate::emit::{Emit, EmitContext};
use crate::error::Result;
use crate::ir::Value;
use crate::module::globals::GlobalId;
use crate::parse::IndicesToIds;
use failure::bail;
use parity_wasm::elements::{self, Instruction};

/// A constant which is produced in WebAssembly, typically used in global
/// initializers or element/data offsets.
Expand Down Expand Up @@ -37,18 +36,18 @@ impl Const {
reader.ensure_end()?;
Ok(val)
}
}

pub(crate) fn emit_instructions(&self, indices: &IdsToIndices) -> elements::InitExpr {
let mut instrs = Vec::with_capacity(2);
instrs.push(match *self {
Const::Value(Value::I32(n)) => Instruction::I32Const(n),
Const::Value(Value::I64(n)) => Instruction::I64Const(n),
Const::Value(Value::F32(n)) => Instruction::F32Const(n.to_bits()),
Const::Value(Value::F64(n)) => Instruction::F64Const(n.to_bits()),
Const::Value(Value::V128(_n)) => unimplemented!(),
Const::Global(id) => Instruction::GetGlobal(indices.get_global_index(id)),
});
instrs.push(Instruction::End);
elements::InitExpr::new(instrs)
impl Emit for Const {
fn emit(&self, cx: &mut EmitContext) {
match *self {
Const::Value(val) => val.emit(&mut cx.encoder),
Const::Global(id) => {
let idx = cx.indices.get_global_index(id);
cx.encoder.byte(0x23); // global.get
cx.encoder.u32(idx);
}
}
cx.encoder.byte(0x0b); // end
}
}
105 changes: 83 additions & 22 deletions src/emit.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
//! Traits and code for emitting high-level structures as low-level, raw wasm
//! structures. E.g. translating from globally unique identifiers down to the
//! raw wasm structure's index spaces. Currently "raw wasm structures" are
//! `parity_wasm::elements` types.
//! raw wasm structure's index spaces.

use crate::encode::{Encoder, MAX_U32_LENGTH};
use crate::ir::LocalId;
use crate::module::data::DataId;
use crate::module::elements::ElementId;
Expand All @@ -13,14 +13,19 @@ use crate::module::tables::TableId;
use crate::module::Module;
use crate::passes::Used;
use crate::ty::TypeId;
use parity_wasm::elements;
use std::collections::HashMap;
use std::ops::{Deref, DerefMut};

pub struct EmitContext<'a> {
pub module: &'a Module,
pub used: &'a Used,
pub indices: &'a mut IdsToIndices,
pub dst: &'a mut elements::Module,
pub encoder: Encoder<'a>,
}

pub struct SubContext<'a, 'cx> {
cx: &'cx mut EmitContext<'a>,
write_size_to: usize,
}

/// Anything that can be lowered to raw wasm structures.
Expand All @@ -29,6 +34,12 @@ pub trait Emit {
fn emit(&self, cx: &mut EmitContext);
}

impl<'a, T: ?Sized + Emit> Emit for &'a T {
fn emit(&self, cx: &mut EmitContext) {
T::emit(self, cx)
}
}

/// Maps our high-level identifiers to the raw indices they end up emitted at.
///
/// As we lower to raw wasm structures, we cement various constructs' locations
Expand All @@ -42,10 +53,10 @@ pub struct IdsToIndices {
types: HashMap<TypeId, u32>,
funcs: HashMap<FunctionId, u32>,
globals: HashMap<GlobalId, u32>,
locals: HashMap<LocalId, u32>,
memories: HashMap<MemoryId, u32>,
elements: HashMap<ElementId, u32>,
data: HashMap<DataId, u32>,
pub locals: HashMap<FunctionId, HashMap<LocalId, u32>>,
}

macro_rules! define_get_push_index {
Expand Down Expand Up @@ -81,24 +92,74 @@ define_get_push_index!(get_memory_index, push_memory, MemoryId, memories);
define_get_push_index!(get_element_index, push_element, ElementId, elements);
define_get_push_index!(get_data_index, push_data, DataId, data);

impl IdsToIndices {
/// Get the index for the given identifier.
#[inline]
pub fn get_local_index(&self, id: LocalId) -> u32 {
self.locals.get(&id).cloned().expect(
"Should never try and get the index for an identifier that has not already had \
its index set. This means that either we are attempting to get the index of \
an unused identifier, or that we are emitting sections in the wrong order.",
)
impl<'a> EmitContext<'a> {
pub fn start_section<'b>(&'b mut self, id: Section) -> SubContext<'a, 'b> {
self.subsection(id as u8)
}

pub fn subsection<'b>(&'b mut self, id: u8) -> SubContext<'a, 'b> {
self.encoder.byte(id);
let start = self.encoder.reserve_u32();
SubContext {
cx: self,
write_size_to: start,
}
}

pub fn custom_section<'b>(&'b mut self, name: &str) -> SubContext<'a, 'b> {
let mut cx = self.start_section(Section::Custom);
cx.encoder.str(name);
return cx;
}

/// Adds the given identifier to this set, assigning it the next
/// available index.
#[inline]
pub fn set_local_index(&mut self, id: LocalId, index: u32) {
assert!(
self.locals.insert(id, index).is_none(),
"cannot set local index twice"
);
pub fn list<T>(&mut self, list: T)
where
T: IntoIterator,
T::IntoIter: ExactSizeIterator,
T::Item: Emit,
{
let list = list.into_iter();
self.encoder.usize(list.len());
for item in list {
item.emit(self);
}
}
}

impl<'a> Deref for SubContext<'a, '_> {
type Target = EmitContext<'a>;

fn deref(&self) -> &EmitContext<'a> {
&self.cx
}
}

impl<'a> DerefMut for SubContext<'a, '_> {
fn deref_mut(&mut self) -> &mut EmitContext<'a> {
&mut self.cx
}
}

impl Drop for SubContext<'_, '_> {
fn drop(&mut self) {
let amt = self.cx.encoder.pos() - self.write_size_to - MAX_U32_LENGTH;
assert!(amt <= u32::max_value() as usize);
self.cx.encoder.u32_at(self.write_size_to, amt as u32);
}
}

pub enum Section {
Custom = 0,
Type = 1,
Import = 2,
Function = 3,
Table = 4,
Memory = 5,
Global = 6,
Export = 7,
Start = 8,
Element = 9,
Code = 10,
Data = 11,
DataCount = 12,
}
90 changes: 90 additions & 0 deletions src/encode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
pub const MAX_U32_LENGTH: usize = 5;

#[derive(Debug)]
pub struct Encoder<'a> {
dst: &'a mut Vec<u8>,
}

impl<'data> Encoder<'data> {
pub fn new(dst: &'data mut Vec<u8>) -> Encoder<'data> {
Encoder { dst }
}

pub fn byte(&mut self, byte: u8) {
self.dst.push(byte);
}

pub fn bytes(&mut self, bytes: &[u8]) {
self.usize(bytes.len());
self.raw(bytes);
}

pub fn str(&mut self, data: &str) {
self.bytes(data.as_bytes())
}

pub fn usize(&mut self, amt: usize) {
assert!(amt <= u32::max_value() as usize);
self.u32(amt as u32)
}

pub fn u32(&mut self, amt: u32) {
leb128::write::unsigned(&mut self.dst, amt.into()).unwrap();
}

pub fn i32(&mut self, val: i32) {
leb128::write::signed(&mut self.dst, val.into()).unwrap();
}

pub fn i64(&mut self, val: i64) {
leb128::write::signed(&mut self.dst, val).unwrap();
}

pub fn f32(&mut self, val: f32) {
let bits = val.to_bits();
for i in 0..4 {
self.byte((bits >> (i * 8)) as u8);
}
}

pub fn f64(&mut self, val: f64) {
let bits = val.to_bits();
for i in 0..8 {
self.byte((bits >> (i * 8)) as u8);
}
}

pub fn raw(&mut self, raw: &[u8]) {
self.dst.extend_from_slice(raw);
}

/// Reserves `bytes` bytes of space, returning the position at which the
/// reservation starts
pub fn reserve(&mut self, bytes: usize) -> usize {
let start = self.dst.len();
for _ in 0..bytes {
self.byte(0);
}
return start;
}

/// Reserves space to write a uleb128 `u32`, returning the postition at
/// hwich it can be written.
pub fn reserve_u32(&mut self) -> usize {
self.reserve(MAX_U32_LENGTH)
}

pub fn pos(&self) -> usize {
self.dst.len()
}

// TODO: don't write this code here, use upstream once
// gimli-rs/leb128#6 is implemented
pub fn u32_at(&mut self, pos: usize, mut amt: u32) {
for i in 0..MAX_U32_LENGTH {
let flag = if i == MAX_U32_LENGTH - 1 { 0 } else { 0x80 };
self.dst[pos + i] = (amt as u8) & 0x7f | flag;
amt >>= 7;
}
}
}
30 changes: 30 additions & 0 deletions src/ir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
pub mod matcher;

use crate::dot::Dot;
use crate::encode::Encoder;
use crate::module::functions::FunctionId;
use crate::module::functions::{DisplayExpr, DotExpr};
use crate::module::globals::GlobalId;
Expand Down Expand Up @@ -348,6 +349,35 @@ pub enum Value {
V128(u128),
}

impl Value {
pub(crate) fn emit(&self, encoder: &mut Encoder) {
match *self {
Value::I32(n) => {
encoder.byte(0x41); // i32.const
encoder.i32(n);
}
Value::I64(n) => {
encoder.byte(0x42); // i64.const
encoder.i64(n);
}
Value::F32(n) => {
encoder.byte(0x43); // f32.const
encoder.f32(n);
}
Value::F64(n) => {
encoder.byte(0x44); // f64.const
encoder.f64(n);
}
Value::V128(n) => {
encoder.raw(&[0xfd, 0x02]); // v128.const
for i in 0..16 {
encoder.byte((n >> (i * 8)) as u8);
}
}
}
}
}

impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod arena_set;
pub mod const_value;
pub mod dot;
mod emit;
mod encode;
pub mod error;
pub mod ir;
pub mod module;
Expand Down
Loading

0 comments on commit ce985f8

Please sign in to comment.