Skip to content

Commit

Permalink
Merge pull request #51 from oscar-project/dev-lib
Browse files Browse the repository at this point in the history
feat(lib): make oscar-tools available as lib
  • Loading branch information
Uinelj authored Aug 31, 2023
2 parents 3a43b89 + 32b88be commit d6a73cb
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 6 deletions.
23 changes: 19 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
[package]
authors = ["Pedro J. Ortiz <[email protected]>", "Julien Abadji <[email protected]>"]
authors = [
"Pedro J. Ortiz <[email protected]>",
"Julien Abadji <[email protected]>",
]
edition = "2021"
name = "oscar-tools"
version = "0.3.0"
repository = "https://github.com/oscar-project/oscar-tools"
description = "Tools for processing OSCAR Corpora"
license = "Apache-2.0"

[[bin]]
name = "oscar-tools"
path = "src/main.rs"

[lib]
name = "oscar_tools"
path = "src/lib.rs"

[features]
zstd = ["dep:zstd"]

Expand All @@ -21,7 +32,7 @@ rayon = "1.5.1"
runiq-lib = "1.2.2"
serde_json = "1.0.78"
sha2 = "0.10.1"
zstd = {version="0.11.2", optional=true}
zstd = { version = "0.11.2", optional = true }
walkdir = "2.3.3"

[dependencies.clap]
Expand Down Expand Up @@ -49,5 +60,9 @@ ci = ["github"]
# The installers to generate for each app
installers = []
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows-msvc", "aarch64-apple-darwin"]

targets = [
"x86_64-unknown-linux-gnu",
"x86_64-apple-darwin",
"x86_64-pc-windows-msvc",
"aarch64-apple-darwin",
]
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mod error;
mod ops;

pub use ops::Checksum;
1 change: 1 addition & 0 deletions src/ops/checksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::{
path::{Path, PathBuf},
};

use log::{debug, error, info, warn};
use rayon::{iter::ParallelIterator, prelude::ParallelBridge};
use sha2::{Digest, Sha384};

Expand Down
3 changes: 2 additions & 1 deletion src/ops/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::{
};

use flate2::{write::GzEncoder, Compression};
use log::{debug, error, info, warn};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use walkdir::WalkDir;

Expand Down Expand Up @@ -79,7 +80,7 @@ pub trait Compress {

Ok(())
}

/// Recursively compresses files in provided folder.
/// If `del_src` is set to `true`, removes the compressed files at `src` upon compression completion.
/// `src` has to exist and be a folder
Expand Down
2 changes: 1 addition & 1 deletion src/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ mod filter_tags;
mod sampling;
mod split;

pub(crate) use checksum::Checksum;
pub use checksum::Checksum;
pub(crate) use compress::Compress;
pub(crate) use dedup::Dedup;
pub(crate) use extract_text::ExtractText;
Expand Down
1 change: 1 addition & 0 deletions src/ops/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
These operations split the corpus into smaller files of a defined max size.
!*/
use log::{debug, error, info, warn};
use std::{
borrow::Cow,
fs::File,
Expand Down

0 comments on commit d6a73cb

Please sign in to comment.