diff --git a/Cargo.lock b/Cargo.lock index 02721ed..8a805ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -126,6 +126,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +[[package]] +name = "cobs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" + [[package]] name = "colorchoice" version = "1.0.0" @@ -152,7 +158,9 @@ dependencies = [ "clap", "glob", "log", + "postcard", "rand", + "serde", "simple_logger", "tablestream", ] @@ -372,12 +380,40 @@ dependencies = [ "winapi", ] +[[package]] +name = "postcard" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfa512cd0d087cc9f99ad30a1bf64795b67871edbead083ffc3a4dfafa59aa00" +dependencies = [ + "cobs", + "serde", +] + [[package]] name = "ppv-lite86" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "proc-macro2" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" +dependencies = [ + "proc-macro2", +] + [[package]] name = "rand" version = "0.8.5" @@ -442,6 +478,20 @@ name = "serde" version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.164" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "signal-hook" @@ -497,6 +547,17 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "syn" +version = "2.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "tablestream" version = "0.1.3" @@ -537,6 +598,12 @@ dependencies = [ "time-core", ] +[[package]] +name = "unicode-ident" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" + [[package]] name = "unicode-truncate" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 576d5b1..55712fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,9 +10,18 @@ anyhow = "1.0.71" clap = "4.3.3" glob = "0.3.1" log = "0.4.19" +postcard = { version = "1.0.4", features = ["use-std"], default-features = false } +serde = { version = "1.0.164", features = ["derive"] } simple_logger = "4.1.0" tablestream = "0.1.3" [dev-dependencies] assert_approx_eq = "1.1.0" rand = "0.8.5" + +[build-dependencies] +anyhow = "1.0.71" +glob = "0.3.1" +log = "0.4.19" +postcard = { version = "1.0.4", features = ["use-std"], default-features = false } +serde = { version = "1.0.164", features = ["derive"] } diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..b42bc37 --- /dev/null +++ b/build.rs @@ -0,0 +1,20 @@ +#[allow(dead_code)] +mod corpus { + include!("src/corpus.rs"); +} + +fn main() { + // load default corpus + let default = corpus::load_corpus("cpu_rec_corpus/*.corpus").unwrap(); + println!("cargo:rerun-if-changed=cpu_rec_corpus"); + + // serialize to bytes + let bytes = postcard::to_stdvec(&default).unwrap(); + + // output path to target build folder + let mut outfile = std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap()); + outfile.push("default.pc"); + + // write to file + std::fs::write(outfile, bytes).unwrap(); +} diff --git a/src/corpus.rs b/src/corpus.rs index 0392786..938336d 100644 --- a/src/corpus.rs +++ b/src/corpus.rs @@ -16,12 +16,13 @@ use anyhow::{Context, Error, Ok, Result}; use glob::glob; use log::debug; +use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::fmt::Debug; use std::str::FromStr; use std::string::String; -#[derive(Debug)] +#[derive(Debug, Deserialize, Serialize)] pub struct CorpusStats { pub arch: String, bigrams_freq: HashMap<(u8, u8), f32>, diff --git a/src/main.rs b/src/main.rs index c1ed285..d38c02e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -202,7 +202,7 @@ fn main() -> Result<()> { .propagate_version(true) .author("Raphaƫl Rigo ") .about("Identifies CPU architectures in binaries") - .arg(arg!(--corpus ).default_value("cpu_rec_corpus")) + .arg(arg!(--corpus )) .arg(arg!(-d - -debug)) .arg(arg!(-v - -verbose)) .arg( @@ -223,17 +223,30 @@ fn main() -> Result<()> { }; simple_logger::init_with_level(level)?; - let corpus_dir = args.get_one::("corpus").unwrap().to_owned(); - if !Path::new(&corpus_dir).is_dir() { - return Err(Error::msg(format!( - "{} is not a valid directory", - corpus_dir - ))); - } - let corpus_files: String = args.get_one::("corpus").unwrap().to_owned() + "/*.corpus"; - println!("Loading corpus from {}", corpus_files); + let default_corpus: Vec = { + // serialized bytes embedded from build.rs + let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/default.pc")); + + // deserialize + postcard::from_bytes(bytes).unwrap() + }; - let corpus_stats = load_corpus(&corpus_files)?; + let corpus_stats = match args.get_one::<&str>("corpus") { + // if no arg given, use embedded corpus + None => default_corpus, + // attempt to load the given corpus folder + Some(corpus_dir) => { + if !Path::new(corpus_dir).is_dir() { + return Err(Error::msg(format!( + "{} is not a valid directory", + corpus_dir + ))); + } + let corpus_files = format!("{corpus_dir}/*.corpus"); + println!("Loading corpus from {}", corpus_files); + load_corpus(&corpus_files)? + } + }; info!("Corpus size: {}", corpus_stats.len());