diff --git a/README.md b/README.md index c094f25..b750c2c 100644 --- a/README.md +++ b/README.md @@ -94,14 +94,27 @@ fn custom_matcher(buf: &[u8]) -> bool { return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12; } +fn custom_matcher_read(r: &mut dyn Read) -> Result { + let mut buffer = [0; 4]; + r.read_exact(&mut buffer[..])?; + Ok(custom_matcher(&buffer)) +} + let mut info = infer::Infer::new(); -info.add("custom/foo", "foo", custom_matcher); +info.add("custom/foo", "foo", custom_matcher, Some(custom_matcher_read)); -let buf = [0x10, 0x11, 0x12, 0x13]; -let kind = info.get(&buf).expect("file type is known"); +let buf = [0x10, 0x11, 0x12, 0x13, 0x14]; +let mut kind = info.get(&buf).expect("file type is known"); assert_eq!(kind.mime_type(), "custom/foo"); assert_eq!(kind.extension(), "foo"); + +let mut f = Cursor::new(buf); +kind = info.get_read(&mut f).unwrap().expect("file type is known"); + +assert_eq!(kind.mime_type(), "custom/foo"); +assert_eq!(kind.extension(), "foo"); + ``` ## Supported types diff --git a/src/lib.rs b/src/lib.rs index a78c5f6..706856d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,16 +49,31 @@ Here we actually need to use the `Infer` struct to be able to declare custom mat ```rust # #[cfg(feature = "alloc")] +# #[cfg(feature = "std")] # fn run() { +use std::io::{Result, Read}; + fn custom_matcher(buf: &[u8]) -> bool { return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12; } +fn custom_matcher_read(r: &mut dyn Read) -> Result { + let mut buffer = [0; 4]; + r.read_exact(&mut buffer[..])?; + Ok(custom_matcher(&buffer)) +} + let mut info = infer::Infer::new(); -info.add("custom/foo", "foo", custom_matcher); +info.add("custom/foo", "foo", custom_matcher, Some(custom_matcher_read)); + +let buf = [0x10, 0x11, 0x12, 0x13, 0x14]; +let mut kind = info.get(&buf).unwrap(); + +assert_eq!(kind.mime_type(), "custom/foo"); +assert_eq!(kind.extension(), "foo"); -let buf = [0x10, 0x11, 0x12, 0x13]; -let kind = info.get(&buf).unwrap(); +let mut f = std::io::Cursor::new(buf); +kind = info.get_read(&mut f).unwrap().expect("file type is known"); assert_eq!(kind.mime_type(), "custom/foo"); assert_eq!(kind.extension(), "foo"); @@ -76,10 +91,13 @@ extern crate alloc; mod map; mod matchers; +mod matchtype; + +#[cfg(feature = "std")] +mod read; #[cfg(feature = "alloc")] use alloc::vec::Vec; -use core::fmt; #[cfg(feature = "std")] use std::fs::File; #[cfg(feature = "std")] @@ -88,102 +106,17 @@ use std::io::{self, Read}; use std::path::Path; pub use map::MatcherType; -use map::{WrapMatcher, MATCHER_MAP}; - -/// All the supported matchers categorized and exposed as functions -pub use matchers::*; - -/// Matcher function -pub type Matcher = fn(buf: &[u8]) -> bool; - -/// Generic information for a type -#[derive(Copy, Clone)] -pub struct Type { - matcher_type: MatcherType, - mime_type: &'static str, - extension: &'static str, - matcher: WrapMatcher, -} - -impl Type { - pub(crate) const fn new_static( - matcher_type: MatcherType, - mime_type: &'static str, - extension: &'static str, - matcher: WrapMatcher, - ) -> Self { - Self { - matcher_type, - mime_type, - extension, - matcher, - } - } - - /// Returns a new `Type` with matcher and extension. - pub fn new( - matcher_type: MatcherType, - mime_type: &'static str, - extension: &'static str, - matcher: Matcher, - ) -> Self { - Self::new_static(matcher_type, mime_type, extension, WrapMatcher(matcher)) - } - /// Returns the type of matcher - /// - /// # Examples - /// - /// ```rust - /// let info = infer::Infer::new(); - /// let buf = [0xFF, 0xD8, 0xFF, 0xAA]; - /// let kind = info.get(&buf).expect("file type is known"); - /// - /// assert_eq!(kind.matcher_type(), infer::MatcherType::Image); - /// ``` - pub const fn matcher_type(&self) -> MatcherType { - self.matcher_type - } - - /// Returns the mime type - pub const fn mime_type(&self) -> &'static str { - self.mime_type - } - - /// Returns the file extension - pub const fn extension(&self) -> &'static str { - self.extension - } - - /// Checks if buf matches this Type - fn matches(&self, buf: &[u8]) -> bool { - (self.matcher.0)(buf) - } -} - -impl fmt::Debug for Type { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Type") - .field("matcher_type", &self.matcher_type) - .field("mime_type", &self.mime_type) - .field("extension", &self.extension) - .finish() - } -} +#[cfg(feature = "std")] +use map::{WrapMatcher, WrapReadMatcher}; -impl fmt::Display for Type { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fmt::Display::fmt(self.mime_type, f) - } -} +use map::MATCHER_MAP; -impl PartialEq for Type { - fn eq(&self, other: &Self) -> bool { - self.matcher_type == other.matcher_type - && self.mime_type == other.mime_type - && self.extension == other.extension - } -} +#[cfg(feature = "std")] +pub use crate::read::*; +/// All the supported matchers categorized and exposed as functions +pub use matchers::*; +pub use matchtype::*; /// Infer allows to use a custom set of `Matcher`s for infering a MIME type. /// @@ -280,6 +213,28 @@ impl Infer { .any(|kind| kind.extension() == extension) } + /// Returns the type for the mime type if supported. + /// + /// # Examples + /// + /// See [`is_supported`](./fn.get_type_by_mime.html). + pub fn get_type_by_mime(&self, mime_type: &str) -> Option { + self.iter_matchers() + .find(|kind| kind.mime_type() == mime_type) + .copied() + } + + /// Returns the type for the extension if supported. + /// + /// # Examples + /// + /// See [`is_supported`](./fn.get_type_by_extension.html). + pub fn get_type_by_extension(&self, extension: &str) -> Option { + self.iter_matchers() + .find(|kind| kind.extension() == extension) + .copied() + } + /// Returns whether a mime type is supported. /// /// # Examples @@ -368,13 +323,14 @@ impl Infer { /// /// ```rust /// # #[cfg(feature = "alloc")] + /// # #[cfg(feature = "std")] /// # fn run() { /// fn custom_matcher(buf: &[u8]) -> bool { /// return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12; /// } /// /// let mut info = infer::Infer::new(); - /// info.add("custom/foo", "foo", custom_matcher); + /// info.add("custom/foo", "foo", custom_matcher, None); /// let buf = [0x10, 0x11, 0x12, 0x13]; /// assert!(info.is_custom(&buf)); /// # } @@ -391,25 +347,37 @@ impl Infer { /// # Examples /// /// ```rust + /// # #[cfg(feature = "alloc")] + /// # #[cfg(feature = "std")] + /// # fn run() { /// fn custom_matcher(buf: &[u8]) -> bool { /// return buf.len() >= 3 && buf[0] == 0x10 && buf[1] == 0x11 && buf[2] == 0x12; /// } /// /// let mut info = infer::Infer::new(); - /// info.add("custom/foo", "foo", custom_matcher); + /// info.add("custom/foo", "foo", custom_matcher, None); /// let buf = [0x10, 0x11, 0x12, 0x13]; /// let kind = info.get(&buf).expect("file type is known"); /// /// assert_eq!(kind.mime_type(), "custom/foo"); /// assert_eq!(kind.extension(), "foo"); + /// # } /// ``` #[cfg(feature = "alloc")] - pub fn add(&mut self, mime_type: &'static str, extension: &'static str, m: Matcher) { + #[cfg(feature = "std")] + pub fn add( + &mut self, + mime_type: &'static str, + extension: &'static str, + m: Matcher, + rm: Option, + ) { self.mmap.push(Type::new_static( MatcherType::Custom, mime_type, extension, WrapMatcher(m), + rm.map(WrapReadMatcher), )); } @@ -432,9 +400,8 @@ static INFER: Infer = Infer::new(); /// # Examples /// /// ```rust -/// let info = infer::Infer::new(); /// let buf = [0xFF, 0xD8, 0xFF, 0xAA]; -/// let kind = info.get(&buf).expect("file type is known"); +/// let kind = infer::get(&buf).expect("file type is known"); /// /// assert_eq!(kind.mime_type(), "image/jpeg"); /// assert_eq!(kind.extension(), "jpg"); @@ -606,10 +573,43 @@ pub fn is_video(buf: &[u8]) -> bool { INFER.is_video(buf) } +/// Returns the file type for the mime type if supported. +/// +/// # Examples +/// +/// ```rust +/// let kind = infer::get_type_by_mime("image/jpeg").expect("mime type is known"); +/// +/// assert_eq!(kind.mime_type(), "image/jpeg"); +/// assert_eq!(kind.extension(), "jpg"); +/// ``` +pub fn get_type_by_mime(mime_type: &str) -> Option { + INFER.get_type_by_mime(mime_type) +} + +/// Returns the type for the extension if supported. +/// +/// # Examples +/// +/// ```rust +/// let kind = infer::get_type_by_extension("jpg").expect("extension is known"); +/// +/// assert_eq!(kind.mime_type(), "image/jpeg"); +/// assert_eq!(kind.extension(), "jpg"); +/// ``` +pub fn get_type_by_extension(extension: &str) -> Option { + INFER.get_type_by_extension(extension) +} + #[cfg(test)] mod tests { #[cfg(feature = "alloc")] + #[cfg(feature = "std")] use super::Infer; + #[cfg(feature = "std")] + use std::fs::File; + #[cfg(feature = "std")] + use std::io::{self, Cursor, Read}; #[test] fn test_get_unknown() { @@ -633,6 +633,7 @@ mod tests { } #[cfg(feature = "alloc")] + #[cfg(feature = "std")] #[test] fn test_custom_matcher_ordering() { // overrides jpeg matcher @@ -645,18 +646,42 @@ mod tests { buf.len() > 3 && buf[0] == 0x89 && buf[1] == 0x50 && buf[2] == 0x4E && buf[3] == 0x47 } + fn bar_matcher_read(r: &mut dyn Read) -> io::Result { + let mut buffer = [0; 4]; + r.read_exact(&mut buffer[..])?; + Ok(bar_matcher(&buffer)) + } + let mut info = Infer::new(); - info.add("custom/foo", "foo", foo_matcher); - info.add("custom/bar", "bar", bar_matcher); + info.add("custom/foo", "foo", foo_matcher, None); + info.add("custom/bar", "bar", bar_matcher, Some(bar_matcher_read)); let buf_foo = &[0xFF, 0xD8, 0xFF]; let typ = info.get(buf_foo).expect("type is matched"); assert_eq!(typ.mime_type(), "custom/foo"); assert_eq!(typ.extension(), "foo"); - let buf_bar = &[0x89, 0x50, 0x4E, 0x47]; + let buf_bar = &[0x89, 0x50, 0x4E, 0x47, 0x12]; let typ = info.get(buf_bar).expect("type is matched"); assert_eq!(typ.mime_type(), "custom/bar"); assert_eq!(typ.extension(), "bar"); + + let mut f = Cursor::new(buf_bar); + let kind = info.get_read(&mut f).unwrap().expect("type is matched"); + + assert_eq!(kind.mime_type(), "custom/bar"); + assert_eq!(kind.extension(), "bar"); + } + + #[cfg(feature = "std")] + #[test] + fn test_is_wasm_read() { + let fr = File::open("testdata/sample.wasm"); + if fr.is_err() { + assert!(fr.is_err(), "{:?}", fr.unwrap_err()); + } + let mut f = fr.unwrap(); + let result = crate::app::is_wasm_read(&mut f).unwrap(); + assert!(result); } } diff --git a/src/map.rs b/src/map.rs index 7d03c1a..534e5e7 100644 --- a/src/map.rs +++ b/src/map.rs @@ -1,3 +1,6 @@ +#[cfg(feature = "std")] +use super::ReadMatcher; + use super::{matchers, Matcher, Type}; #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -20,8 +23,18 @@ pub enum MatcherType { #[derive(Copy, Clone)] pub struct WrapMatcher(pub Matcher); +#[cfg(feature = "std")] +#[repr(transparent)] +#[derive(Copy, Clone)] +pub struct WrapReadMatcher(pub ReadMatcher); + macro_rules! matcher_map { - ($(($mtype:expr, $mime_type:literal, $extension:literal, $matcher:expr)),*) => { + ($(($mtype:expr, $mime_type:literal, $extension:literal, $matcher:expr, $read_matcher:expr)),*) => { + #[cfg(feature = "std")] + pub const MATCHER_MAP: &[Type] = &[ + $(Type::new_static($mtype, $mime_type, $extension, WrapMatcher($matcher), $read_matcher),)* + ]; + #[cfg(not(feature = "std"))] pub const MATCHER_MAP: &[Type] = &[ $(Type::new_static($mtype, $mime_type, $extension, WrapMatcher($matcher)),)* ]; @@ -37,557 +50,654 @@ matcher_map!( MatcherType::App, "application/wasm", "wasm", - matchers::app::is_wasm + matchers::app::is_wasm, + Some(WrapReadMatcher(matchers::app::is_wasm_read)) ), ( MatcherType::App, "application/x-executable", "elf", - matchers::app::is_elf + matchers::app::is_elf, + Some(WrapReadMatcher(matchers::app::is_elf_read)) ), ( MatcherType::App, "application/vnd.microsoft.portable-executable", "exe", - matchers::app::is_exe + matchers::app::is_exe, + Some(WrapReadMatcher(matchers::app::is_exe_read)) ), ( MatcherType::App, "application/vnd.microsoft.portable-executable", "dll", - matchers::app::is_dll + matchers::app::is_dll, + Some(WrapReadMatcher(matchers::app::is_dll_read)) ), ( MatcherType::App, "application/java", "class", - matchers::app::is_java + matchers::app::is_java, + Some(WrapReadMatcher(matchers::app::is_java_read)) ), ( MatcherType::App, "application/x-llvm", "bc", - matchers::app::is_llvm + matchers::app::is_llvm, + Some(WrapReadMatcher(matchers::app::is_llvm_read)) ), ( MatcherType::App, "application/x-mach-binary", "mach", - matchers::app::is_mach + matchers::app::is_mach, + Some(WrapReadMatcher(matchers::app::is_mach_read)) ), ( MatcherType::App, "application/vnd.android.dex", "dex", - matchers::app::is_dex + matchers::app::is_dex, + Some(WrapReadMatcher(matchers::app::is_dex_read)) ), ( MatcherType::App, "application/vnd.android.dey", "dey", - matchers::app::is_dey + matchers::app::is_dey, + Some(WrapReadMatcher(matchers::app::is_dey_read)) ), ( MatcherType::App, "application/x-x509-ca-cert", "der", - matchers::app::is_der + matchers::app::is_der, + Some(WrapReadMatcher(matchers::app::is_der_read)) ), ( MatcherType::App, "application/x-executable", "obj", - matchers::app::is_coff + matchers::app::is_coff, + Some(WrapReadMatcher(matchers::app::is_coff_read)) ), ( MatcherType::App, "application/x-x509-ca-cert", "pem", - matchers::app::is_pem + matchers::app::is_pem, + Some(WrapReadMatcher(matchers::app::is_pem_read)) ), // Book ( MatcherType::Book, "application/epub+zip", "epub", - matchers::book::is_epub + matchers::book::is_epub, + Some(WrapReadMatcher(matchers::book::is_epub_read)) ), ( MatcherType::Book, "application/x-mobipocket-ebook", "mobi", - matchers::book::is_mobi + matchers::book::is_mobi, + Some(WrapReadMatcher(matchers::book::is_mobi_read)) ), // Image ( MatcherType::Image, "image/jpeg", "jpg", - matchers::image::is_jpeg + matchers::image::is_jpeg, + Some(WrapReadMatcher(matchers::image::is_jpeg_read)) ), ( MatcherType::Image, "image/jp2", "jp2", - matchers::image::is_jpeg2000 + matchers::image::is_jpeg2000, + Some(WrapReadMatcher(matchers::image::is_jpeg2000_read)) ), ( MatcherType::Image, "image/png", "png", - matchers::image::is_png + matchers::image::is_png, + Some(WrapReadMatcher(matchers::image::is_png_read)) ), ( MatcherType::Image, "image/gif", "gif", - matchers::image::is_gif + matchers::image::is_gif, + Some(WrapReadMatcher(matchers::image::is_gif_read)) ), ( MatcherType::Image, "image/webp", "webp", - matchers::image::is_webp + matchers::image::is_webp, + Some(WrapReadMatcher(matchers::image::is_webp_read)) ), ( MatcherType::Image, "image/x-canon-cr2", "cr2", - matchers::image::is_cr2 + matchers::image::is_cr2, + Some(WrapReadMatcher(matchers::image::is_cr2_read)) ), ( MatcherType::Image, "image/tiff", "tif", - matchers::image::is_tiff + matchers::image::is_tiff, + Some(WrapReadMatcher(matchers::image::is_tiff_read)) ), ( MatcherType::Image, "image/bmp", "bmp", - matchers::image::is_bmp + matchers::image::is_bmp, + Some(WrapReadMatcher(matchers::image::is_bmp_read)) ), ( MatcherType::Image, "image/vnd.ms-photo", "jxr", - matchers::image::is_jxr + matchers::image::is_jxr, + Some(WrapReadMatcher(matchers::image::is_jxr_read)) ), ( MatcherType::Image, "image/vnd.adobe.photoshop", "psd", - matchers::image::is_psd + matchers::image::is_psd, + Some(WrapReadMatcher(matchers::image::is_psd_read)) ), ( MatcherType::Image, "image/vnd.microsoft.icon", "ico", - matchers::image::is_ico + matchers::image::is_ico, + Some(WrapReadMatcher(matchers::image::is_ico_read)) ), ( MatcherType::Image, "image/heif", "heif", - matchers::image::is_heif + matchers::image::is_heif, + None ), ( MatcherType::Image, "image/avif", "avif", - matchers::image::is_avif + matchers::image::is_avif, + None ), ( MatcherType::Image, "image/jxl", "jxl", - matchers::image::is_jxl + matchers::image::is_jxl, + Some(WrapReadMatcher(matchers::image::is_jxl_read)) ), ( MatcherType::Image, "image/openraster", "ora", - matchers::image::is_ora + matchers::image::is_ora, + Some(WrapReadMatcher(matchers::image::is_ora_read)) ), // Video ( MatcherType::Video, "video/mp4", "mp4", - matchers::video::is_mp4 + matchers::video::is_mp4, + Some(WrapReadMatcher(matchers::video::is_mp4_read)) ), ( MatcherType::Video, "video/x-m4v", "m4v", - matchers::video::is_m4v + matchers::video::is_m4v, + Some(WrapReadMatcher(matchers::video::is_m4v_read)) ), ( MatcherType::Video, "video/x-matroska", "mkv", - matchers::video::is_mkv + matchers::video::is_mkv, + Some(WrapReadMatcher(matchers::video::is_mkv_read)) ), ( MatcherType::Video, "video/webm", "webm", - matchers::video::is_webm + matchers::video::is_webm, + Some(WrapReadMatcher(matchers::video::is_webm_read)) ), ( MatcherType::Video, "video/quicktime", "mov", - matchers::video::is_mov + matchers::video::is_mov, + Some(WrapReadMatcher(matchers::video::is_mov_read)) ), ( MatcherType::Video, "video/x-msvideo", "avi", - matchers::video::is_avi + matchers::video::is_avi, + Some(WrapReadMatcher(matchers::video::is_avi_read)) ), ( MatcherType::Video, "video/x-ms-wmv", "wmv", - matchers::video::is_wmv + matchers::video::is_wmv, + Some(WrapReadMatcher(matchers::video::is_wmv_read)) ), ( MatcherType::Video, "video/mpeg", "mpg", - matchers::video::is_mpeg + matchers::video::is_mpeg, + Some(WrapReadMatcher(matchers::video::is_mpeg_read)) ), ( MatcherType::Video, "video/x-flv", "flv", - matchers::video::is_flv + matchers::video::is_flv, + Some(WrapReadMatcher(matchers::video::is_flv_read)) ), // Audio ( MatcherType::Audio, "audio/midi", "midi", - matchers::audio::is_midi + matchers::audio::is_midi, + Some(WrapReadMatcher(matchers::audio::is_midi_read)) ), ( MatcherType::Audio, "audio/mpeg", "mp3", - matchers::audio::is_mp3 + matchers::audio::is_mp3, + Some(WrapReadMatcher(matchers::audio::is_mp3_read)) ), ( MatcherType::Audio, "audio/m4a", "m4a", - matchers::audio::is_m4a + matchers::audio::is_m4a, + Some(WrapReadMatcher(matchers::audio::is_m4a_read)) ), // has to come before ogg ( MatcherType::Audio, "audio/opus", "opus", - matchers::audio::is_ogg_opus + matchers::audio::is_ogg_opus, + Some(WrapReadMatcher(matchers::audio::is_ogg_opus_read)) ), ( MatcherType::Audio, "audio/ogg", "ogg", - matchers::audio::is_ogg + matchers::audio::is_ogg, + Some(WrapReadMatcher(matchers::audio::is_ogg_read)) ), ( MatcherType::Audio, "audio/x-flac", "flac", - matchers::audio::is_flac + matchers::audio::is_flac, + Some(WrapReadMatcher(matchers::audio::is_flac_read)) ), ( MatcherType::Audio, "audio/x-wav", "wav", - matchers::audio::is_wav + matchers::audio::is_wav, + Some(WrapReadMatcher(matchers::audio::is_wav_read)) ), ( MatcherType::Audio, "audio/amr", "amr", - matchers::audio::is_amr + matchers::audio::is_amr, + Some(WrapReadMatcher(matchers::audio::is_amr_read)) ), ( MatcherType::Audio, "audio/aac", "aac", - matchers::audio::is_aac + matchers::audio::is_aac, + Some(WrapReadMatcher(matchers::audio::is_aac_read)) ), ( MatcherType::Audio, "audio/x-aiff", "aiff", - matchers::audio::is_aiff + matchers::audio::is_aiff, + Some(WrapReadMatcher(matchers::audio::is_aiff_read)) ), ( MatcherType::Audio, "audio/x-dsf", "dsf", - matchers::audio::is_dsf + matchers::audio::is_dsf, + Some(WrapReadMatcher(matchers::audio::is_dsf_read)) ), ( MatcherType::Audio, "audio/x-ape", "ape", - matchers::audio::is_ape + matchers::audio::is_ape, + Some(WrapReadMatcher(matchers::audio::is_ape_read)) ), // Font ( MatcherType::Font, "application/font-woff", "woff", - matchers::font::is_woff + matchers::font::is_woff, + Some(WrapReadMatcher(matchers::font::is_woff_read)) ), ( MatcherType::Font, "application/font-woff", "woff2", - matchers::font::is_woff2 + matchers::font::is_woff2, + Some(WrapReadMatcher(matchers::font::is_woff2_read)) ), ( MatcherType::Font, "application/font-sfnt", "ttf", - matchers::font::is_ttf + matchers::font::is_ttf, + Some(WrapReadMatcher(matchers::font::is_ttf_read)) ), ( MatcherType::Font, "application/font-sfnt", "otf", - matchers::font::is_otf + matchers::font::is_otf, + Some(WrapReadMatcher(matchers::font::is_otf_read)) ), // Document ( MatcherType::Doc, "application/msword", "doc", - matchers::doc::is_doc + matchers::doc::is_doc, + Some(WrapReadMatcher(matchers::doc::is_doc_read)) ), ( MatcherType::Doc, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx", - matchers::doc::is_docx + matchers::doc::is_docx, + Some(WrapReadMatcher(matchers::doc::is_docx_read)) ), ( MatcherType::Doc, "application/vnd.ms-excel", "xls", - matchers::doc::is_xls + matchers::doc::is_xls, + Some(WrapReadMatcher(matchers::doc::is_xls_read)) ), ( MatcherType::Doc, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx", - matchers::doc::is_xlsx + matchers::doc::is_xlsx, + Some(WrapReadMatcher(matchers::doc::is_xlsx_read)) ), ( MatcherType::Doc, "application/vnd.ms-powerpoint", "ppt", - matchers::doc::is_ppt + matchers::doc::is_ppt, + Some(WrapReadMatcher(matchers::doc::is_ppt_read)) ), ( MatcherType::Doc, "application/vnd.openxmlformats-officedocument.presentationml.presentation", "pptx", - matchers::doc::is_pptx + matchers::doc::is_pptx, + Some(WrapReadMatcher(matchers::doc::is_pptx_read)) ), // OpenDocument ( MatcherType::Doc, "application/vnd.oasis.opendocument.text", "odt", - matchers::odf::is_odt + matchers::odf::is_odt, + Some(WrapReadMatcher(matchers::odf::is_odt_read)) ), ( MatcherType::Doc, "application/vnd.oasis.opendocument.spreadsheet", "ods", - matchers::odf::is_ods + matchers::odf::is_ods, + Some(WrapReadMatcher(matchers::odf::is_ods_read)) ), ( MatcherType::Doc, "application/vnd.oasis.opendocument.presentation", "odp", - matchers::odf::is_odp + matchers::odf::is_odp, + Some(WrapReadMatcher(matchers::odf::is_odp_read)) ), // Archive ( MatcherType::Archive, "application/epub+zip", "epub", - matchers::archive::is_epub + matchers::archive::is_epub, + Some(WrapReadMatcher(matchers::archive::is_epub_read)) ), ( MatcherType::Archive, "application/zip", "zip", - matchers::archive::is_zip + matchers::archive::is_zip, + Some(WrapReadMatcher(matchers::archive::is_zip_read)) ), ( MatcherType::Archive, "application/x-tar", "tar", - matchers::archive::is_tar + matchers::archive::is_tar, + Some(WrapReadMatcher(matchers::archive::is_tar_read)) ), ( MatcherType::Archive, "application/vnd.rar", "rar", - matchers::archive::is_rar + matchers::archive::is_rar, + Some(WrapReadMatcher(matchers::archive::is_rar_read)) ), ( MatcherType::Archive, "application/gzip", "gz", - matchers::archive::is_gz + matchers::archive::is_gz, + Some(WrapReadMatcher(matchers::archive::is_gz_read)) ), ( MatcherType::Archive, "application/x-bzip2", "bz2", - matchers::archive::is_bz2 + matchers::archive::is_bz2, + Some(WrapReadMatcher(matchers::archive::is_bz2_read)) ), ( MatcherType::Archive, "application/x-7z-compressed", "7z", - matchers::archive::is_7z + matchers::archive::is_7z, + Some(WrapReadMatcher(matchers::archive::is_7z_read)) ), ( MatcherType::Archive, "application/x-xz", "xz", - matchers::archive::is_xz + matchers::archive::is_xz, + Some(WrapReadMatcher(matchers::archive::is_xz_read)) ), ( MatcherType::Archive, "application/pdf", "pdf", - matchers::archive::is_pdf + matchers::archive::is_pdf, + Some(WrapReadMatcher(matchers::archive::is_pdf_read)) ), ( MatcherType::Archive, "application/x-shockwave-flash", "swf", - matchers::archive::is_swf + matchers::archive::is_swf, + Some(WrapReadMatcher(matchers::archive::is_swf_read)) ), ( MatcherType::Archive, "application/rtf", "rtf", - matchers::archive::is_rtf + matchers::archive::is_rtf, + Some(WrapReadMatcher(matchers::archive::is_rtf_read)) ), ( MatcherType::Archive, "application/octet-stream", "eot", - matchers::archive::is_eot + matchers::archive::is_eot, + Some(WrapReadMatcher(matchers::archive::is_eot_read)) ), ( MatcherType::Archive, "application/postscript", "ps", - matchers::archive::is_ps + matchers::archive::is_ps, + Some(WrapReadMatcher(matchers::archive::is_ps_read)) ), ( MatcherType::Archive, "application/vnd.sqlite3", "sqlite", - matchers::archive::is_sqlite + matchers::archive::is_sqlite, + Some(WrapReadMatcher(matchers::archive::is_sqlite_read)) ), ( MatcherType::Archive, "application/x-nintendo-nes-rom", "nes", - matchers::archive::is_nes + matchers::archive::is_nes, + Some(WrapReadMatcher(matchers::archive::is_nes_read)) ), ( MatcherType::Archive, "application/x-google-chrome-extension", "crx", - matchers::archive::is_crx + matchers::archive::is_crx, + Some(WrapReadMatcher(matchers::archive::is_crx_read)) ), ( MatcherType::Archive, "application/vnd.ms-cab-compressed", "cab", - matchers::archive::is_cab + matchers::archive::is_cab, + Some(WrapReadMatcher(matchers::archive::is_cab_read)) ), ( MatcherType::Archive, "application/vnd.debian.binary-package", "deb", - matchers::archive::is_deb + matchers::archive::is_deb, + Some(WrapReadMatcher(matchers::archive::is_deb_read)) ), ( MatcherType::Archive, "application/x-unix-archive", "ar", - matchers::archive::is_ar + matchers::archive::is_ar, + Some(WrapReadMatcher(matchers::archive::is_ar_read)) ), ( MatcherType::Archive, "application/x-compress", "Z", - matchers::archive::is_z + matchers::archive::is_z, + Some(WrapReadMatcher(matchers::archive::is_z_read)) ), ( MatcherType::Archive, "application/x-lzip", "lz", - matchers::archive::is_lz + matchers::archive::is_lz, + Some(WrapReadMatcher(matchers::archive::is_lz_read)) ), ( MatcherType::Archive, "application/x-rpm", "rpm", - matchers::archive::is_rpm + matchers::archive::is_rpm, + Some(WrapReadMatcher(matchers::archive::is_rpm_read)) ), ( MatcherType::Archive, "application/dicom", "dcm", - matchers::archive::is_dcm + matchers::archive::is_dcm, + Some(WrapReadMatcher(matchers::archive::is_dcm_read)) ), ( MatcherType::Archive, "application/zstd", "zst", - matchers::archive::is_zst + matchers::archive::is_zst, + Some(WrapReadMatcher(matchers::archive::is_zst_read)) ), ( MatcherType::Archive, "application/x-ole-storage", "msi", - matchers::archive::is_msi + matchers::archive::is_msi, + Some(WrapReadMatcher(matchers::archive::is_msi_read)) ), ( MatcherType::Archive, "application/x-cpio", "cpio", - matchers::archive::is_cpio + matchers::archive::is_cpio, + Some(WrapReadMatcher(matchers::archive::is_cpio_read)) ), // Text ( MatcherType::Text, "text/html", "html", - matchers::text::is_html + matchers::text::is_html, + Some(WrapReadMatcher(matchers::text::is_html_read)) + ), + ( + MatcherType::Text, + "text/xml", + "xml", + matchers::text::is_xml, + Some(WrapReadMatcher(matchers::text::is_xml_read)) ), - (MatcherType::Text, "text/xml", "xml", matchers::text::is_xml), ( MatcherType::Text, "text/x-shellscript", "sh", - matchers::text::is_shellscript + matchers::text::is_shellscript, + Some(WrapReadMatcher(matchers::text::is_shellscript_read)) ) ); diff --git a/src/matchers/app.rs b/src/matchers/app.rs index 2e77fa2..31e9b05 100644 --- a/src/matchers/app.rs +++ b/src/matchers/app.rs @@ -1,3 +1,6 @@ +#[cfg(feature = "std")] +use std::io::{self, Read}; + /// Returns whether a buffer is a wasm. /// /// # Examples @@ -95,7 +98,7 @@ pub fn is_dey(buf: &[u8]) -> bool { && is_dex(&buf[40..100]) } -/// Returns whether a buffer DER encoded X.509 certificate. +/// Returns whether a buffer is DER encoded X.509 certificate. pub fn is_der(buf: &[u8]) -> bool { // https://en.wikipedia.org/wiki/List_of_file_signatures // https://github.com/ReFirmLabs/binwalk/blob/master/src/binwalk/magic/crypto#L25-L37 @@ -126,7 +129,7 @@ pub fn is_coff(buf: &[u8]) -> bool { is_coff_x64(buf) || is_coff_i386(buf) || is_coff_ia64(buf) } -/// Returns whether a buffer is pem +/// Returns whether a buffer is PEM. pub fn is_pem(buf: &[u8]) -> bool { // https://en.wikipedia.org/wiki/List_of_file_signatures buf.len() > 11 @@ -142,3 +145,68 @@ pub fn is_pem(buf: &[u8]) -> bool { && buf[9] == b'N' && buf[10] == b' ' } + +super::build_fn_read_api!( + /// Returns whether data from a reader is a wasm. + /// + /// # Examples + /// + /// ```rust + /// use std::fs; + /// use std::io::prelude::*; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let mut f = File::open("testdata/sample.wasm")?; + /// let wasm = infer::app::is_wasm_read(&mut f).unwrap(); + /// assert!(wasm); + /// Ok(()) + /// } + /// ``` + (is_wasm_read, is_wasm, 8), + /// Returns whether a data from reader is an EXE. + /// DLL and EXE have the same magic number, so returns true also for a DLL. + /// + /// # Examples + /// + /// ```rust + /// use std::fs; + /// use std::io::prelude::*; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let mut f = File::open("testdata/sample.exe")?; + /// let exe = infer::app::is_exe_read(&mut f).unwrap(); + /// assert!(exe); + /// Ok(()) + /// } + /// ``` + (is_exe_read, is_exe, 2), + /// Returns whether data from a reader is a DLL. + /// DLL and EXE have the same magic number, so returns true also for an EXE. + (is_dll_read, is_dll, 2), + /// Returns whether data from reader is an ELF. + (is_elf_read, is_elf, 53), + /// Returns whether data from reader is compiled Java bytecode. + (is_java_read, is_java, 8), + /// Returns whether data from reader is LLVM Bitcode. + (is_llvm_read, is_llvm, 2), + /// Returns whether data from reader is a Mach-O binary. + (is_mach_read, is_mach, 4), + /// Returns whether data from reader is a Dalvik Executable (DEX). + (is_dex_read, is_dex, 4), + /// Returns whether data from reader is a Dey Optimized Dalvik Executable (ODEX). + (is_dey_read, is_dey, 101), + /// Returns whether data from reader is DER encoded X.509 certificate. + (is_der_read, is_der, 101), + /// Returns whether data from reader is a Common Object File Format for i386 architecture. + (is_coff_i386_read, is_coff_i386, 3), + /// Returns whether data from reader is a Common Object File Format for x64 architecture. + (is_coff_x64_read, is_coff_x64, 3), + /// Returns whether data from reader is a Common Object File Format for Itanium architecture. + (is_coff_ia64_read, is_coff_ia64, 3), + /// Returns whether data from reader is a Common Object File Format. + (is_coff_read, is_coff, 3), + /// Returns whether data from reader is PEM data. + (is_pem_read, is_pem, 12) +); diff --git a/src/matchers/archive.rs b/src/matchers/archive.rs index 3e6c53e..4d598fd 100644 --- a/src/matchers/archive.rs +++ b/src/matchers/archive.rs @@ -1,3 +1,6 @@ +#[cfg(feature = "std")] +use std::io::{self, Read}; + /// Returns whether a buffer is an ePub. pub fn is_epub(buf: &[u8]) -> bool { crate::book::is_epub(buf) @@ -91,7 +94,7 @@ pub fn is_nes(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x4E && buf[1] == 0x45 && buf[2] == 0x53 && buf[3] == 0x1A } -/// Returns whether a buffer is Google Chrome Extension +/// Returns whether a buffer is Google Chrome Extension. pub fn is_crx(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x43 && buf[1] == 0x72 && buf[2] == 0x32 && buf[3] == 0x34 } @@ -103,7 +106,7 @@ pub fn is_cab(buf: &[u8]) -> bool { || (buf[0] == 0x49 && buf[1] == 0x53 && buf[2] == 0x63 && buf[3] == 0x28)) } -/// Returns whether a buffer is a eot octet stream. +/// Returns whether a buffer is an eot octet stream. pub fn is_eot(buf: &[u8]) -> bool { buf.len() > 35 && buf[34] == 0x4C @@ -167,7 +170,7 @@ pub fn is_deb(buf: &[u8]) -> bool { && buf[20] == 0x79 } -/// Returns whether a buffer is a ar archive. +/// Returns whether a buffer is an ar archive. pub fn is_ar(buf: &[u8]) -> bool { buf.len() > 6 && buf[0] == 0x21 @@ -230,3 +233,73 @@ pub fn is_cpio(buf: &[u8]) -> bool { && buf[4] == 0x30 && buf[5] == 0x31) // newc format } + +super::build_fn_read_api!( + /// Returns whether data from reader is an ePub. + (is_epub_read, is_epub, 58), + /// Returns whether data from reader is a zip archive. + (is_zip_read, is_zip, 4), + /// Returns whether data from reader is a tar archive. + (is_tar_read, is_tar, 262), + /// Returns whether data from reader is a RAR archive. + (is_rar_read, is_rar, 7), + /// Returns whether data from reader is a gzip archive. + (is_gz_read, is_gz, 3), + /// Returns whether data from reader is a gzip archive. + (is_bz2_read, is_bz2, 3), + /// Returns whether data from reader a 7z archive. + (is_7z_read, is_7z, 6), + /// Returns whether data from reader a PDF. + (is_pdf_read, is_pdf, 4), + /// Returns whether data from reader is a PDF. + (is_swf_read, is_swf, 3), + /// Returns whether data from reader is an RTF. + (is_rtf_read, is_rtf, 5), + /// Returns whether data from reader is a Nintendo NES ROM. + (is_nes_read, is_nes, 4), + /// Returns whether data from reader is is Google Chrome Extension. + (is_crx_read, is_crx, 4), + /// Returns whether data from reader is a CAB. + (is_cab_read, is_cab, 4), + /// Returns whether data from reader is an eot octet stream. + (is_eot_read, is_eot, 36), + /// Returns whether data from reader is postscript. + (is_ps_read, is_ps, 2), + /// Returns whether data from reader is xz archive. + (is_xz_read, is_xz, 6), + /// Returns whether data from reader is xz archive. + /// + /// # Examples + /// + /// ```rust + /// use std::fs; + /// use std::io::prelude::*; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let mut f = File::open("testdata/sample.db")?; + /// let sqlite = infer::archive::is_sqlite_read(&mut f).unwrap(); + /// assert!(sqlite); + /// Ok(()) + /// } + /// ``` + (is_sqlite_read, is_sqlite, 4), + /// Returns whether data from reader is a deb archive. + (is_deb_read, is_deb, 21), + /// Returns whether data from reader is an ar archive. + (is_ar_read, is_ar, 7), + /// Returns whether data from reader is an z archive. + (is_z_read, is_z, 2), + /// Returns whether data from reader is a lzip archive. + (is_lz_read, is_lz, 4), + /// Returns whether data from reader is an RPM. + (is_rpm_read, is_rpm, 97), + /// Returns whether data from reader is a dcm archive. + (is_dcm_read, is_dcm, 132), + /// Returns whether data from reader is a Zstd archive. + (is_zst_read, is_zst, 4), + /// Returns whether data from reader is a MSI Windows Installer archive. + (is_msi_read, is_msi, 8), + /// Returns whether data from reader is a CPIO archive. + (is_cpio_read, is_cpio, 7) +); diff --git a/src/matchers/audio.rs b/src/matchers/audio.rs index 73cfed4..35aea33 100644 --- a/src/matchers/audio.rs +++ b/src/matchers/audio.rs @@ -1,3 +1,6 @@ +#[cfg(feature = "std")] +use std::io::{self, Read}; + /// Returns whether a buffer is MIDI data. pub fn is_midi(buf: &[u8]) -> bool { buf.len() > 3 && buf[0] == 0x4D && buf[1] == 0x54 && buf[2] == 0x68 && buf[3] == 0x64 @@ -104,3 +107,30 @@ pub fn is_ape(buf: &[u8]) -> bool { // ref: https://github.com/fernandotcl/monkeys-audio/blob/master/src/MACLib/APEHeader.h buf.len() > 4 && buf[0] == b'M' && buf[1] == b'A' && buf[2] == b'C' && buf[3] == b' ' } + +super::build_fn_read_api!( + /// Returns whether data from reader is MIDI data. + (is_midi_read, is_midi, 4), + /// Returns whether data from reader is is MP3 data. + (is_mp3_read, is_mp3, 3), + /// Returns whether data from reader is M4A data. + (is_m4a_read, is_m4a, 11), + /// Returns whether data from reader is OGG data. + (is_ogg_read, is_ogg, 4), + /// Returns whether data from reader is OGG Opus data. + (is_ogg_opus_read, is_ogg_opus, 36), + /// Returns whether data from reader is FLAC data. + (is_flac_read, is_flac, 4), + /// Returns whether data from reader is WAV data. + (is_wav_read, is_wav, 12), + /// Returns whether data from reader is AMR data. + (is_amr_read, is_amr, 12), + /// Returns whether data from reader is AAC data. + (is_aac_read, is_aac, 2), + /// Returns whether data from reader is AIFF data. + (is_aiff_read, is_aiff, 12), + /// Returns whether data from reader is DSF data. + (is_dsf_read, is_dsf, 5), + /// Returns whether data from reader is APE (Monkey's Audio) data. + (is_ape_read, is_ape, 5) +); diff --git a/src/matchers/book.rs b/src/matchers/book.rs index 17d0f12..d9ed3eb 100644 --- a/src/matchers/book.rs +++ b/src/matchers/book.rs @@ -1,3 +1,6 @@ +#[cfg(feature = "std")] +use std::io::{self, Read}; + /// Returns whether a buffer is an ePub. pub fn is_epub(buf: &[u8]) -> bool { buf.len() > 57 @@ -49,3 +52,10 @@ pub fn is_mobi(buf: &[u8]) -> bool { && buf[66] == 0x42 && buf[67] == 0x49 } + +super::build_fn_read_api!( + /// Returns whether data from reader is an ePub. + (is_epub_read, is_epub, 58), + /// Returns whether data from reader is a mobi. + (is_mobi_read, is_mobi, 68) +); diff --git a/src/matchers/doc.rs b/src/matchers/doc.rs index 71c3a03..8449f52 100644 --- a/src/matchers/doc.rs +++ b/src/matchers/doc.rs @@ -1,5 +1,8 @@ use core::convert::TryInto; +#[cfg(feature = "std")] +use std::io::{self, Read}; + use super::compare_bytes; #[allow(clippy::upper_case_acronyms)] @@ -159,3 +162,18 @@ fn search(buf: &[u8], start: usize, range: usize) -> Option { .windows(signature.len()) .position(|window| window == signature) } + +super::build_fn_read_api!( + /// Returns whether data from reader is Microsoft Word Open XML Format Document (DOCX) data. + (is_docx_read, is_docx, 6000), + /// Returns whether data from reader is Microsoft Excel 97-2003 Worksheet (XLS) data. + (is_xls_read, is_xls, 6000), + /// Returns whether data from reader is Microsoft Excel Open XML Format Spreadsheet (XLSX) data. + (is_xlsx_read, is_xlsx, 6000), + /// Returns whether data from reader is Microsoft PowerPoint 97-2003 Presentation (PPT) data. + (is_ppt_read, is_ppt, 6000), + /// Returns whether data from reader is Microsoft PowerPoint Open XML Presentation (PPTX) data. + (is_pptx_read, is_pptx, 6000), + /// Returns whether data from reader is Microsoft Word Document (DOC) data. + (is_doc_read, is_doc, 36) +); diff --git a/src/matchers/font.rs b/src/matchers/font.rs index aca5e1c..9f5ea30 100644 --- a/src/matchers/font.rs +++ b/src/matchers/font.rs @@ -1,3 +1,6 @@ +#[cfg(feature = "std")] +use std::io::{self, Read}; + /// Returns whether a buffer is WOFF font data. pub fn is_woff(buf: &[u8]) -> bool { buf.len() > 7 @@ -43,3 +46,14 @@ pub fn is_otf(buf: &[u8]) -> bool { && buf[3] == 0x4F && buf[4] == 0x00 } + +super::build_fn_read_api!( + /// Returns whether data from reader is WOFF font data. + (is_woff_read, is_woff, 8), + /// Returns whether data from reader is WOFF2 font data. + (is_woff2_read, is_woff2, 8), + /// Returns whether data from reader is TTF font data. + (is_ttf_read, is_ttf, 5), + /// Returns whether data from reader is OTF font data. + (is_otf_read, is_otf, 5) +); diff --git a/src/matchers/image.rs b/src/matchers/image.rs index 0ecf624..89d7c49 100644 --- a/src/matchers/image.rs +++ b/src/matchers/image.rs @@ -1,5 +1,8 @@ use core::convert::TryInto; +#[cfg(feature = "std")] +use std::io::{self, Read}; + /// Returns whether a buffer is JPEG image data. pub fn is_jpeg(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0xFF && buf[1] == 0xD8 && buf[2] == 0xFF @@ -148,20 +151,7 @@ pub fn is_avif(buf: &[u8]) -> bool { false } -// IsISOBMFF checks whether the given buffer represents ISO Base Media File Format data -fn is_isobmff(buf: &[u8]) -> bool { - if buf.len() < 16 { - return false; - } - - if &buf[4..8] != b"ftyp" { - return false; - } - - let ftyp_length = u32::from_be_bytes(buf[0..4].try_into().unwrap()) as usize; - buf.len() >= ftyp_length -} - +/// Returns whether a buffer is OpenRaster (ora) image data. pub fn is_ora(buf: &[u8]) -> bool { buf.len() > 57 && buf[0] == 0x50 @@ -194,7 +184,21 @@ pub fn is_ora(buf: &[u8]) -> bool { && buf[53] == 0x72 } -// GetFtyp returns the major brand, minor version and compatible brands of the ISO-BMFF data +// is_isobmff checks whether the given buffer represents ISO Base Media File Format data. +fn is_isobmff(buf: &[u8]) -> bool { + if buf.len() < 16 { + return false; + } + + if &buf[4..8] != b"ftyp" { + return false; + } + + let ftyp_length = u32::from_be_bytes(buf[0..4].try_into().unwrap()) as usize; + buf.len() >= ftyp_length +} + +// GetFtyp returns the major brand, minor version and compatible brands of the ISO-BMFF data. fn get_ftyp(buf: &[u8]) -> Option<(&[u8], &[u8], impl Iterator)> { if buf.len() < 16 { return None; @@ -210,3 +214,51 @@ fn get_ftyp(buf: &[u8]) -> Option<(&[u8], &[u8], impl Iterator)> { Some((major, minor, compatible)) } + +super::build_fn_read_api!( + /// Returns whether a data from reader is a JPEG. + /// + /// # Examples + /// + /// ```rust + /// use std::fs; + /// use std::io::prelude::*; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let mut f = File::open("testdata/sample.jpg")?; + /// let jpeg = infer::image::is_jpeg_read(&mut f).unwrap(); + /// assert!(jpeg); + /// Ok(()) + /// } + /// ``` + (is_jpeg_read, is_jpeg, 3), + /// Returns whether data from reader is jpg2 image data. + (is_jpeg2000_read, is_jpeg2000, 13), + /// Returns whether data from reader is PNG image data. + (is_png_read, is_png, 4), + /// Returns whether data from reader is GIF image data. + (is_gif_read, is_gif, 3), + /// Returns whether data from reader is WEBP image data. + (is_webp_read, is_webp, 12), + /// Returns whether data from reader is Canon CR2 image data. + (is_cr2_read, is_cr2, 11), + /// Returns whether data from reader is TIFF image data. + (is_tiff_read, is_tiff, 10), + /// Returns whether data from reader is BMP image data. + (is_bmp_read, is_bmp, 2), + /// Returns whether data from reader is jxr image data. + (is_jxr_read, is_jxr, 3), + /// Returns whether data from reader is Photoshop PSD image data. + (is_psd_read, is_psd, 4), + /// Returns whether data from reader is ICO icon image data. + (is_ico_read, is_ico, 4), + /// Returns whether data from reader is HEIF image data. + (is_heif_read, is_heif, 8192), + /// Returns whether data from reader is AVIF image data. + (is_avif_read, is_avif, 8192), + /// Returns whether data from reader is JPEG XL (JXL) image data. + (is_jxl_read, is_jxl, 13), + /// Returns whether data from reader is OpenRaster (ora) image data. + (is_ora_read, is_ora, 58) +); diff --git a/src/matchers/mod.rs b/src/matchers/mod.rs index 30ed410..2c09d7c 100644 --- a/src/matchers/mod.rs +++ b/src/matchers/mod.rs @@ -26,3 +26,25 @@ pub(crate) fn compare_bytes(slice: &[u8], sub_slice: &[u8], start_offset: usize) true } + +macro_rules! build_fn_read_api +{ + ( + $( + $(#[$outer:meta])* + ($name:tt, $impl_fn:ident, $sz:literal) + ),* + ) => { + $( + $(#[$outer])* + #[cfg(feature = "std")] + pub fn $name(r: &mut dyn Read) -> io::Result { + let mut buffer = Vec::with_capacity($sz); + r.take($sz).read_to_end(&mut buffer)?; + Ok($impl_fn(&buffer)) + } + ) * + }; +} + +pub(crate) use build_fn_read_api; diff --git a/src/matchers/odf.rs b/src/matchers/odf.rs index bc0a1de..544208b 100644 --- a/src/matchers/odf.rs +++ b/src/matchers/odf.rs @@ -1,5 +1,8 @@ use super::compare_bytes; +#[cfg(feature = "std")] +use std::io::{self, Read}; + #[derive(Debug, Eq, PartialEq)] enum DocType { Text, @@ -7,17 +10,17 @@ enum DocType { Presentation, } -/// Returns whether a buffer is OpenDocument Text +/// Returns whether a buffer is OpenDocument Text. pub fn is_odt(buf: &[u8]) -> bool { odf(buf) == Some(DocType::Text) } -/// Returns whether a buffer is OpenDocument Spreadsheet +/// Returns whether a buffer is OpenDocument Spreadsheet. pub fn is_ods(buf: &[u8]) -> bool { odf(buf) == Some(DocType::Spreadsheet) } -/// Returns whether a buffer is OpenDocument Presentation +/// Returns whether a buffer is OpenDocument Presentation. pub fn is_odp(buf: &[u8]) -> bool { odf(buf) == Some(DocType::Presentation) } @@ -46,3 +49,12 @@ fn odf(buf: &[u8]) -> Option { } None } + +super::build_fn_read_api!( + /// Returns whether data from reader is OpenDocument Text. + (is_odt_read, is_odt, 104), + /// Returns whether data from reader is OpenDocument Spreadsheet. + (is_ods_read, is_ods, 104), + /// Returns whether data from reader is OpenDocument Presentation. + (is_odp_read, is_odp, 104) +); diff --git a/src/matchers/text.rs b/src/matchers/text.rs index 60297f7..c80a250 100644 --- a/src/matchers/text.rs +++ b/src/matchers/text.rs @@ -1,3 +1,6 @@ +#[cfg(feature = "std")] +use std::io::{self, Read}; + /// Returns whether a buffer is html data. /// /// Conforms to [whatwg](https://mimesniff.spec.whatwg.org/) @@ -113,3 +116,12 @@ mod tests { assert!(!is_shellscript(b"#!")); } } + +super::build_fn_read_api!( + /// Returns whether data from reader is html data. + (is_html_read, is_html, 16), + /// Returns whether data from reader is xml data. + (is_xml_read, is_xml, 16), + /// Returns whether data from reader is a shell script. + (is_shellscript_read, is_shellscript, 3) +); diff --git a/src/matchers/video.rs b/src/matchers/video.rs index d4cb142..ea3078e 100644 --- a/src/matchers/video.rs +++ b/src/matchers/video.rs @@ -1,3 +1,6 @@ +#[cfg(feature = "std")] +use std::io::{self, Read}; + /// Returns whether a buffer is M4V video data. pub fn is_m4v(buf: &[u8]) -> bool { buf.len() > 10 @@ -136,3 +139,24 @@ pub fn is_mp4(buf: &[u8]) -> bool { || (buf[8] == b'F' && buf[9] == b'4' && buf[10] == b'V' && buf[11] == b' ') || (buf[8] == b'F' && buf[9] == b'4' && buf[10] == b'P' && buf[11] == b' ')) } + +super::build_fn_read_api!( + /// Returns whether data from reader is M4V video data. + (is_m4v_read, is_m4v, 11), + /// Returns whether data from reader is MKV video data. + (is_mkv_read, is_mkv, 39), + /// Returns whether data from reader is WEBM video data. + (is_webm_read, is_webm, 4), + /// Returns whether data from reader is Quicktime MOV video data. + (is_mov_read, is_mov, 16), + /// Returns whether data from reader is AVI video data. + (is_avi_read, is_avi, 11), + /// Returns whether data from reader is WMV video data. + (is_wmv_read, is_wmv, 11), + /// Returns whether data from reader is MPEG video data. + (is_mpeg_read, is_mpeg, 4), + /// Returns whether data from reader is FLV video data. + (is_flv_read, is_flv, 4), + /// Returns whether data from reader is MP4 video data. + (is_mp4_read, is_mp4, 12) +); diff --git a/src/matchtype.rs b/src/matchtype.rs new file mode 100644 index 0000000..186eea5 --- /dev/null +++ b/src/matchtype.rs @@ -0,0 +1,176 @@ +#[cfg(feature = "std")] +use std::io::{self, Read}; + +use core::fmt; + +use super::map::{MatcherType, WrapMatcher}; + +#[cfg(feature = "std")] +use super::map::WrapReadMatcher; + +/// Matcher function +pub type Matcher = fn(&[u8]) -> bool; + +#[cfg(feature = "std")] +pub type ReadMatcher = fn(&mut dyn Read) -> io::Result; + +/// Generic information for a type +#[cfg(feature = "std")] +#[derive(Copy, Clone)] +pub struct Type { + matcher_type: MatcherType, + mime_type: &'static str, + extension: &'static str, + matcher: WrapMatcher, + read_matcher: Option, + read_size: Option, +} + +/// Generic information for a type +#[cfg(not(feature = "std"))] +#[derive(Copy, Clone)] +pub struct Type { + matcher_type: MatcherType, + mime_type: &'static str, + extension: &'static str, + matcher: WrapMatcher, +} + +impl Type { + #[cfg(feature = "std")] + pub(crate) const fn new_static( + matcher_type: MatcherType, + mime_type: &'static str, + extension: &'static str, + matcher: WrapMatcher, + read_matcher: Option, + ) -> Self { + Self { + matcher_type, + mime_type, + extension, + matcher, + read_matcher, + read_size: None, + } + } + + #[cfg(not(feature = "std"))] + pub(crate) const fn new_static( + matcher_type: MatcherType, + mime_type: &'static str, + extension: &'static str, + matcher: WrapMatcher, + ) -> Self { + Self { + matcher_type, + mime_type, + extension, + matcher, + } + } + + /// Returns a new `Type` with matcher and extension. + #[cfg(feature = "std")] + pub fn new( + matcher_type: MatcherType, + mime_type: &'static str, + extension: &'static str, + matcher: Matcher, + read_matcher: Option, + ) -> Self { + Self::new_static( + matcher_type, + mime_type, + extension, + WrapMatcher(matcher), + read_matcher.map(WrapReadMatcher), + ) + } + + /// Returns a new `Type` with matcher and extension. + #[cfg(not(feature = "std"))] + pub fn new( + matcher_type: MatcherType, + mime_type: &'static str, + extension: &'static str, + matcher: Matcher, + ) -> Self { + Self::new_static(matcher_type, mime_type, extension, WrapMatcher(matcher)) + } + + /// Returns the type of matcher + /// + /// # Examples + /// + /// ```rust + /// let info = infer::Infer::new(); + /// let buf = [0xFF, 0xD8, 0xFF, 0xAA]; + /// let kind = info.get(&buf).expect("file type is known"); + /// + /// assert_eq!(kind.matcher_type(), infer::MatcherType::Image); + /// ``` + pub const fn matcher_type(&self) -> MatcherType { + self.matcher_type + } + + /// Returns the mime type + pub const fn mime_type(&self) -> &'static str { + self.mime_type + } + + /// Returns the file extension + pub const fn extension(&self) -> &'static str { + self.extension + } + + /// Checks if buf matches this Type + pub(crate) fn matches(&self, buf: &[u8]) -> bool { + (self.matcher.0)(buf) + } + + /// Checks if reader matches this Type + #[cfg(feature = "std")] + pub(crate) fn matches_read(&self, r: &mut impl Read) -> io::Result { + match self.read_matcher { + Some(m) => m.0(r), + None => Ok(false), + } + } + + /// Returns the file extension + #[cfg(feature = "std")] + pub fn read_size(&self) -> usize { + self.read_size.unwrap_or(0) + } + + /// Returns whether the type supports matching by Read. + #[cfg(feature = "std")] + pub fn supports_read_match(&self) -> bool { + self.read_matcher.is_some() + } +} + +impl fmt::Debug for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Type") + .field("matcher_type", &self.matcher_type) + .field("mime_type", &self.mime_type) + .field("extension", &self.extension) + .finish() + } +} + +impl fmt::Display for Type { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self.mime_type, f) + } +} + +impl PartialEq for Type { + fn eq(&self, other: &Self) -> bool { + self.matcher_type == other.matcher_type + && self.mime_type == other.mime_type + && self.extension == other.extension + } +} diff --git a/src/read.rs b/src/read.rs new file mode 100644 index 0000000..f58aa84 --- /dev/null +++ b/src/read.rs @@ -0,0 +1,481 @@ +#[cfg(feature = "std")] +use super::map::MatcherType; +#[cfg(feature = "std")] +use super::matchtype::*; +#[cfg(feature = "std")] +use super::Infer; +#[cfg(feature = "std")] +use super::INFER; +#[cfg(feature = "std")] +use std::io::{self, Read, Seek}; + +#[cfg(feature = "std")] +impl Infer { + /// Returns the file type of the data in the reader. + /// + /// # Examples + /// + /// ```rust + /// use std::fs; + /// use std::io::prelude::*; + /// use std::fs::File; + /// + /// fn main() -> std::io::Result<()> { + /// let info = infer::Infer::new(); + /// let mut f = File::open("testdata/sample.jpg")?; + /// let kind = info.get_read(&mut f).unwrap().expect("file type is known"); + /// assert_eq!(kind.mime_type(), "image/jpeg"); + /// assert_eq!(kind.extension(), "jpg"); + /// Ok(()) + /// } + /// ``` + pub fn get_read(&self, r: &mut R) -> io::Result> + where + R: Read + Seek, + { + let mut res_value: Option = None; + + for kind in self.iter_matchers() { + let match_res = kind.matches_read(r)?; + if match_res { + res_value = Some(*kind); + break; + } + + r.rewind().ok(); + } + + Ok(res_value) + } + + /// Determines whether data from read is of given extension. + /// + /// # Examples + /// + /// See [`is_read`](./fn.is_read.html). + pub fn is_read(&self, r: &mut R, extension: &str) -> io::Result + where + R: Read + Seek, + { + let mut res_value: bool = false; + + for kind in self.iter_matchers() { + if kind.extension() == extension { + let match_res = kind.matches_read(r)?; + if match_res { + res_value = true; + break; + } + + r.rewind().ok(); + } + } + + Ok(res_value) + } + + /// Determines whether data from reader is of given mime type. + /// + /// # Examples + /// + /// See [`is_mime_read`](./fn.is_mime_read.html). + pub fn is_mime_read(&self, r: &mut R, mime_type: &str) -> io::Result + where + R: Read + Seek, + { + let mut res_value: bool = false; + for kind in self.iter_matchers() { + if kind.mime_type() == mime_type { + let match_res = kind.matches_read(r)?; + if match_res { + res_value = true; + break; + } + + r.rewind().ok(); + } + } + + Ok(res_value) + } + + /// Determines whether data is an application type. + /// + /// # Examples + /// + /// See [`is_app_read`](./fn.is_app_read.html). + pub fn is_app_read(&self, r: &mut R) -> io::Result + where + R: Read + Seek, + { + self.is_type_read(r, MatcherType::App) + } + + /// Determines whether data from reader is an archive type. + /// + /// # Examples + /// + /// See [`is_archive_read`](./fn.is_archive_read.html). + pub fn is_archive_read(&self, r: &mut R) -> io::Result + where + R: Read + Seek, + { + self.is_type_read(r, MatcherType::Archive) + } + + /// Determines whether data from reader is an audio type. + /// + /// # Examples + /// + /// See [`is_audio_read`](./fn.is_audio_read.html). + pub fn is_audio_read(&self, r: &mut R) -> io::Result + where + R: Read + Seek, + { + self.is_type_read(r, MatcherType::Audio) + } + + /// Determines whether data from reader is a book type. + /// + /// # Examples + /// + /// See [`is_book_read`](./fn.is_book_read.html). + pub fn is_book_read(&self, r: &mut R) -> io::Result + where + R: Read + Seek, + { + self.is_type_read(r, MatcherType::Book) + } + + /// Determines whether data from reader is a document type. + /// + /// # Examples + /// + /// See [`is_document_read`](./fn.is_document_read.html). + pub fn is_document_read(&self, r: &mut R) -> io::Result + where + R: Read + Seek, + { + self.is_type_read(r, MatcherType::Doc) + } + + /// Determines whether data from reader is a font type. + /// + /// # Examples + /// + /// See [`is_font_read`](./fn.is_font_read.html). + pub fn is_font_read(&self, r: &mut R) -> io::Result + where + R: Read + Seek, + { + self.is_type_read(r, MatcherType::Font) + } + + /// Determines whether data from reader is an image type. + /// + /// # Examples + /// + /// See [`is_image_read`](./fn.is_image_read.html). + pub fn is_image_read(&self, r: &mut R) -> io::Result + where + R: Read + Seek, + { + self.is_type_read(r, MatcherType::Image) + } + + /// Determines whether data from reader is a video type. + /// + /// # Examples + /// + /// See [`is_video_read`](./fn.is_video_read.html). + pub fn is_video_read(&self, r: &mut R) -> io::Result + where + R: Read + Seek, + { + self.is_type_read(r, MatcherType::Video) + } + + /// Determines whether data from reader is one of the custom types added. + pub fn is_custom_read(&self, r: &mut R) -> io::Result + where + R: Read + Seek, + { + self.is_type_read(r, MatcherType::Custom) + } + + fn is_type_read(&self, r: &mut R, matcher_type: MatcherType) -> io::Result + where + R: Read + Seek, + { + let mut res_value: bool = false; + + for kind in self.iter_matchers() { + if kind.matcher_type() == matcher_type && kind.supports_read_match() { + let match_res = kind.matches_read(r)?; + if match_res { + res_value = true; + break; + } + + r.rewind().ok(); + } + } + + Ok(res_value) + } +} + +/// Returns the file type of the data in the reader. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.jpg")?; +/// let kind = infer::get_read(&mut f).unwrap().expect("file type is known"); +/// assert_eq!(kind.mime_type(), "image/jpeg"); +/// assert_eq!(kind.extension(), "jpg"); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn get_read(r: &mut R) -> io::Result> +where + R: Read + Seek, +{ + INFER.get_read(r) +} + +/// Determines whether a buffer is of given extension. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.jpg")?; +/// let jpg = infer::is_read(&mut f, "jpg").unwrap(); +/// assert!(jpg); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_read(r: &mut R, extension: &str) -> io::Result +where + R: Read + Seek, +{ + INFER.is_read(r, extension) +} + +/// Determines whether data from reader is of given mime type. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.jpg")?; +/// assert!(infer::is_mime_read(&mut f, "image/jpeg").unwrap()); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_mime_read(r: &mut R, mime_type: &str) -> io::Result +where + R: Read + Seek, +{ + INFER.is_mime_read(r, mime_type) +} + +/// Determines whether data from reader is an application type. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.wasm")?; +/// assert!(infer::is_app_read(&mut f).unwrap()); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_app_read(r: &mut R) -> io::Result +where + R: Read + Seek, +{ + INFER.is_app_read(r) +} + +/// Determines whether data from reader is an archive type. +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.pdf")?; +/// assert!(infer::is_archive_read(&mut f).unwrap()); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_archive_read(r: &mut R) -> io::Result +where + R: Read + Seek, +{ + INFER.is_archive_read(r) +} + +/// Determines whether data from reader is an audio type. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.mp3")?; +/// assert!(infer::is_audio_read(&mut f).unwrap()); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_audio_read(r: &mut R) -> io::Result +where + R: Read + Seek, +{ + INFER.is_audio_read(r) +} + +/// Determines whether data from buffer is a book type. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.epub")?; +/// assert!(infer::is_book_read(&mut f).unwrap()); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_book_read(r: &mut R) -> io::Result +where + R: Read + Seek, +{ + INFER.is_book_read(r) +} + +/// Determines whether data from reader is a document type. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.docx")?; +/// assert!(infer::is_document_read(&mut f).unwrap()); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_document_read(r: &mut R) -> io::Result +where + R: Read + Seek, +{ + INFER.is_document_read(r) +} + +/// Determines whether data from reader is a font type. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.ttf")?; +/// assert!(infer::is_font_read(&mut f).unwrap()); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_font_read(r: &mut R) -> io::Result +where + R: Read + Seek, +{ + INFER.is_font_read(r) +} + +/// Determines whether data from reader is an image type. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.png")?; +/// assert!(infer::is_image_read(&mut f).unwrap()); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_image_read(r: &mut R) -> io::Result +where + R: Read + Seek, +{ + INFER.is_image_read(r) +} + +/// Determines whether data from reader is a video type. +/// +/// # Examples +/// +/// ```rust +/// use std::fs; +/// use std::io::prelude::*; +/// use std::fs::File; +/// +/// fn main() -> std::io::Result<()> { +/// let mut f = File::open("testdata/sample.mov")?; +/// assert!(infer::is_video_read(&mut f).unwrap()); +/// Ok(()) +/// } +/// ``` +#[cfg(feature = "std")] +pub fn is_video_read(r: &mut R) -> io::Result +where + R: Read + Seek, +{ + INFER.is_video_read(r) +} diff --git a/tests/common.rs b/tests/common.rs index 635bcfd..7446fdc 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -8,11 +8,21 @@ macro_rules! test_format { false } + #[cfg(feature = "std")] + fn matcher_read(_r: &mut dyn std::io::Read) -> std::io::Result { + Ok(false) + } + #[cfg(feature = "std")] #[test] fn get_from_path() { - let expected_kind = - Type::new(MatcherType::$exp_matchert, $exp_mimet, $exp_ext, matcher); + let expected_kind = Type::new( + MatcherType::$exp_matchert, + $exp_mimet, + $exp_ext, + matcher, + Some(matcher_read), + ); let kind = infer::get_from_path(concat!("testdata/", $file)) .expect("test file read") .expect("test file matches"); @@ -20,15 +30,54 @@ macro_rules! test_format { assert_eq!(expected_kind, kind); } + #[cfg(feature = "std")] + #[test] + fn get() { + let expected_kind = Type::new( + MatcherType::$exp_matchert, + $exp_mimet, + $exp_ext, + matcher, + None, + ); + + let buf = include_bytes!(concat!("../testdata/", $file)); + let kind = infer::get(buf).expect("test file matches"); + + assert_eq!(expected_kind, kind); + } + + #[cfg(not(feature = "std"))] #[test] fn get() { let expected_kind = Type::new(MatcherType::$exp_matchert, $exp_mimet, $exp_ext, matcher); + let buf = include_bytes!(concat!("../testdata/", $file)); let kind = infer::get(buf).expect("test file matches"); assert_eq!(expected_kind, kind); } + + #[cfg(feature = "std")] + #[test] + fn get_read() { + let expected_kind = Type::new( + MatcherType::$exp_matchert, + $exp_mimet, + $exp_ext, + matcher, + Some(matcher_read), + ); + + let mut f = std::fs::File::open(concat!("./testdata/", $file)).unwrap(); + let tp = infer::get_type_by_extension($exp_ext).unwrap(); + + if tp.supports_read_match() { + let kind = infer::get_read(&mut f).unwrap().expect("test file matches"); + assert_eq!(expected_kind, kind); + } + } } }; } diff --git a/tests/doc.rs b/tests/doc.rs index e62a4af..8a1c8ef 100644 --- a/tests/doc.rs +++ b/tests/doc.rs @@ -10,10 +10,19 @@ macro_rules! test_format_get_only { false } + fn matcher_read(_r: &mut dyn std::io::Read) -> std::io::Result { + Ok(false) + } + #[test] fn get() { - let expected_kind = - Type::new(MatcherType::$exp_matchert, $exp_mimet, $exp_ext, matcher); + let expected_kind = Type::new( + MatcherType::$exp_matchert, + $exp_mimet, + $exp_ext, + matcher, + Some(matcher_read), + ); let buf = include_bytes!(concat!("../testdata/", $file)); let kind = infer::get(buf).expect("test file matches");