Skip to content

Commit

Permalink
Arrow CSV reader and filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
GStechschulte committed Jun 24, 2024
1 parent 266dc0b commit 7a5ea7d
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 3 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ version = "0.1.0"
edition = "2021"

[dependencies]
arrow = "52.0.0"
arrow = "52.0.0"
arrow-schema = "52.0.0"
arrow-csv = "52.0.0"
5 changes: 5 additions & 0 deletions data/employee.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id,first_name,last_name,state,job_title,salary
1,Bill,Hopkins,CA,Manager,12000
2,Gregg,Langford,CO,Driver,10000
3,John,Travis,CO,"Manager, Software",11500
4,Von,Mill,,Defensive End,11500
51 changes: 49 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,59 @@
use std::fs::File;
use std::path::Path;
use std::sync::Arc;

use arrow::array::Int32Array;
use arrow_csv::ReaderBuilder;
// use arrow::csv::ReaderBuilder;
// use arrow::datatypes::{DataType, Field, Schema};
use arrow_schema::*;
use rust_query::arrow_types;

// Use our rust-query library
fn main() {

let a = Int32Array::from(vec![10]);
let b = arrow_types::Int8Type::from(vec![10]);

println!("{:?}", a);
println!("{:?}", b);
}

// Code for the query `SELECT * FROM employee WHERE state = 'CO'`
// against a CSV file containing the columns `id`, `first_name`,
// `last_name`, `state`, `job_title`, and `salary`
let schema = Schema::new(vec![
Field::new("id", DataType::Int8, false),
Field::new("first_name", DataType::Utf8, false),
Field::new("last_name", DataType::Utf8, false),
Field::new("state", DataType::Utf8, true),
Field::new("job_title", DataType::Utf8, false),
Field::new("salary", DataType::Int16, false),
]);

let path = Path::new("/Users/gabestechschulte/Documents/repos/rust-query/data/employee.csv");
let display = path.display();
let file = match File::open(&path) {
Err(why) => panic!("Could not read {}: {}", display, why),
Ok(file) => file,
};

let mut batch = ReaderBuilder::new(Arc::new(schema))
.with_header(true)
.build(file)
.unwrap();
let batch_reader = batch.next().unwrap().unwrap();

assert_eq!(batch_reader.num_rows(), 4);
assert_eq!(batch_reader.num_columns(), 6);

let col_by_idx = batch_reader.column(2);
let col_by_name = batch_reader.column_by_name("first_name");

println!("{:?}", col_by_name);

for idx in 0..batch_reader.num_columns() {
let arr = batch_reader.column(idx).as_ref();

// Apply filter using compute kernel
// let filtered = filter
}
}

0 comments on commit 7a5ea7d

Please sign in to comment.