Skip to content

Commit

Permalink
feat: storing TSV lines as string to reduce storage size (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed May 31, 2023
1 parent 986d452 commit f824941
Show file tree
Hide file tree
Showing 9 changed files with 21 additions and 21 deletions.
6 changes: 2 additions & 4 deletions src/tsv/cli/import/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,23 +79,21 @@ pub fn process_tsv_line(
db: &rocksdb::DBWithThreadMode<rocksdb::MultiThreaded>,
cf_data: &std::sync::Arc<rocksdb::BoundColumnFamily>,
) -> Result<(), anyhow::Error> {
let line = line;
let values = ctx.line_to_values(line)?;
let values = values.iter().collect::<Vec<_>>();
let var = ctx.values_to_var(&values)?;

if let Some(var) = var.as_ref() {
let key: Vec<u8> = var.clone().into();
let value = ctx.encode_values(&values)?;

tracing::trace!(
"putting for var = {:?}, key = {:?}, value = {:?}",
&var,
&key,
&value
&line.as_bytes()
);

db.put_cf(cf_data, key, value)?;
db.put_cf(cf_data, key, line.as_bytes())?;
} else {
tracing::trace!("skipping line: {:?}", &line);
}
Expand Down
10 changes: 6 additions & 4 deletions src/tsv/cli/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ fn query_for_variant(
let raw_value = db
.get_cf(&cf_data, key)?
.ok_or_else(|| anyhow::anyhow!("could not find variant in database"))?;
let values = ctx.decode_values(&raw_value)?;
let line = std::str::from_utf8(raw_value.as_slice())?;
let values = ctx.line_to_values(line)?;

Ok(values)
}
Expand Down Expand Up @@ -229,8 +230,8 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error>

// Iterate over all variants until we are behind stop.
while iter.valid() {
if let Some(value) = iter.value() {
tracing::trace!("iterator at {:?} => {:?}", &iter.key(), &value);
if let Some(line_raw) = iter.value() {
tracing::trace!("iterator at {:?} => {:?}", &iter.key(), &line_raw);
if let Some(stop) = stop.as_ref() {
let iter_key = iter.key().unwrap();
let iter_pos: keys::Pos = iter_key.into();
Expand All @@ -240,7 +241,8 @@ pub fn run(common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error>
}
}

let values = ctx.decode_values(value)?;
let line = std::str::from_utf8(line_raw)?;
let values = ctx.line_to_values(line)?;
print_values(&mut out_writer, args.out_format, &meta, values)?;
iter.next();
} else {
Expand Down
4 changes: 2 additions & 2 deletions tests/tsv/example/data.tsv.gz.db/000014.sst
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/tsv/example/data.tsv.gz.db/000016.sst
Git LFS file not shown
2 changes: 1 addition & 1 deletion tests/tsv/example/data.tsv.gz.db/IDENTITY
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/tsv/example/data.tsv.gz.db/LOG
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/tsv/example/data.tsv.gz.db/MANIFEST-000005
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/tsv/example/data.tsv.gz.db/OPTIONS-000009
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/tsv/example/data.tsv.gz.db/OPTIONS-000011
Git LFS file not shown

0 comments on commit f824941

Please sign in to comment.