Skip to content

Commit

Permalink
refactor: rename Partition to FileRange
Browse files Browse the repository at this point in the history
  • Loading branch information
evenyag committed Apr 30, 2024
1 parent 9680027 commit 94a1590
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 29 deletions.
2 changes: 1 addition & 1 deletion src/mito2/src/sst/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@

//! SST in parquet format.

pub(crate) mod file_range;
mod format;
pub(crate) mod helper;
pub(crate) mod metadata;
mod page_reader;
pub(crate) mod partition;
pub mod reader;
pub mod row_group;
mod row_selection;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

//! Structs and functions for reading partitions from a parquet file. A partition
//! Structs and functions for reading ranges from a parquet file. A file range
//! is usually a row group in a parquet file.

use std::ops::BitAnd;
Expand All @@ -30,21 +30,21 @@ use crate::row_converter::{McmpRowCodec, RowCodec};
use crate::sst::parquet::format::ReadFormat;
use crate::sst::parquet::reader::{RowGroupReader, RowGroupReaderBuilder, SimpleFilterContext};

/// A partition of a parquet SST. Now it is a row group.
/// We can read different partitions in parallel.
pub struct Partition {
/// A range of a parquet SST. Now it is a row group.
/// We can read different file ranges in parallel.
pub struct FileRange {
/// Shared context.
context: PartitionContextRef,
context: FileRangeContextRef,
/// Index of the row group in the SST.
row_group_idx: usize,
/// Row selection for the row group. `None` means all rows.
row_selection: Option<RowSelection>,
}

impl Partition {
/// Creates a new partition.
impl FileRange {
/// Creates a new [FileRange].
pub(crate) fn new(
context: PartitionContextRef,
context: FileRangeContextRef,
row_group_idx: usize,
row_selection: Option<RowSelection>,
) -> Self {
Expand All @@ -55,7 +55,7 @@ impl Partition {
}
}

/// Returns a reader to read the partition.
/// Returns a reader to read the [FileRange].
#[allow(dead_code)]
pub(crate) async fn reader(&self) -> Result<RowGroupReader> {
let parquet_reader = self
Expand All @@ -68,8 +68,8 @@ impl Partition {
}
}

/// Context shared by partitions of the same parquet SST.
pub(crate) struct PartitionContext {
/// Context shared by ranges of the same parquet SST.
pub(crate) struct FileRangeContext {
// Row group reader builder for the file.
reader_builder: RowGroupReaderBuilder,
/// Filters pushed down.
Expand All @@ -80,10 +80,10 @@ pub(crate) struct PartitionContext {
codec: McmpRowCodec,
}

pub(crate) type PartitionContextRef = Arc<PartitionContext>;
pub(crate) type FileRangeContextRef = Arc<FileRangeContext>;

impl PartitionContext {
/// Creates a new partition context.
impl FileRangeContext {
/// Creates a new [FileRangeContext].
pub(crate) fn new(
reader_builder: RowGroupReaderBuilder,
filters: Vec<SimpleFilterContext>,
Expand Down
28 changes: 14 additions & 14 deletions src/mito2/src/sst/parquet/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ use crate::read::{Batch, BatchReader};
use crate::row_converter::{McmpRowCodec, SortField};
use crate::sst::file::FileHandle;
use crate::sst::index::applier::SstIndexApplierRef;
use crate::sst::parquet::file_range::{FileRange, FileRangeContext, FileRangeContextRef};
use crate::sst::parquet::format::ReadFormat;
use crate::sst::parquet::metadata::MetadataLoader;
use crate::sst::parquet::partition::{Partition, PartitionContext, PartitionContextRef};
use crate::sst::parquet::row_group::InMemoryRowGroup;
use crate::sst::parquet::row_selection::row_selection_from_row_ranges;
use crate::sst::parquet::stats::RowGroupPruningStats;
Expand Down Expand Up @@ -154,21 +154,21 @@ impl ParquetReaderBuilder {
ParquetReader::new(context, row_groups).await
}

/// Builds [Partition]s to read and pushes them to `partitions`.
/// Builds [FileRange]s to read and pushes them to `file_ranges`.
#[allow(dead_code)]
pub async fn build_partitions(&self, partitions: &mut Vec<Partition>) -> Result<()> {
pub async fn build_file_ranges(&self, file_ranges: &mut Vec<FileRange>) -> Result<()> {
let (context, row_groups) = self.build_reader_input().await?;
for (row_group_idx, row_selection) in row_groups {
let partition = Partition::new(context.clone(), row_group_idx, row_selection);
partitions.push(partition);
let file_range = FileRange::new(context.clone(), row_group_idx, row_selection);
file_ranges.push(file_range);
}
Ok(())
}

/// Builds a [PartitionContext] and collects row groups to read.
/// Builds a [FileRangeContext] and collects row groups to read.
///
/// This needs to perform IO operation.
async fn build_reader_input(&self) -> Result<(PartitionContextRef, RowGroupMap)> {
async fn build_reader_input(&self) -> Result<(FileRangeContextRef, RowGroupMap)> {
let start = Instant::now();

let file_path = self.file_handle.file_path(&self.file_dir);
Expand Down Expand Up @@ -243,7 +243,7 @@ impl ParquetReaderBuilder {
.collect(),
);

let context = PartitionContext::new(reader_builder, filters, read_format, codec);
let context = FileRangeContext::new(reader_builder, filters, read_format, codec);
Ok((Arc::new(context), row_groups))
}

Expand Down Expand Up @@ -622,8 +622,8 @@ type RowGroupMap = BTreeMap<usize, Option<RowSelection>>;

/// Parquet batch reader to read our SST format.
pub struct ParquetReader {
/// Partition context.
context: PartitionContextRef,
/// File range context.
context: FileRangeContextRef,
/// Indices of row groups to read, along with their respective row selections.
row_groups: RowGroupMap,
/// Reader of current row group.
Expand Down Expand Up @@ -715,7 +715,7 @@ impl Drop for ParquetReader {
impl ParquetReader {
/// Creates a new reader.
async fn new(
context: PartitionContextRef,
context: FileRangeContextRef,
mut row_groups: BTreeMap<usize, Option<RowSelection>>,
) -> Result<Self> {
// No more items in current row group, reads next row group.
Expand Down Expand Up @@ -749,8 +749,8 @@ impl ParquetReader {

/// Reader to read a row group of a parquet file.
pub(crate) struct RowGroupReader {
/// Context of partitions.
context: PartitionContextRef,
/// Context for file ranges.
context: FileRangeContextRef,
/// Inner parquet reader.
reader: ParquetRecordBatchReader,
/// Buffered batches to return.
Expand All @@ -761,7 +761,7 @@ pub(crate) struct RowGroupReader {

impl RowGroupReader {
/// Creates a new reader.
pub(crate) fn new(context: PartitionContextRef, reader: ParquetRecordBatchReader) -> Self {
pub(crate) fn new(context: FileRangeContextRef, reader: ParquetRecordBatchReader) -> Self {
Self {
context,
reader,
Expand Down

0 comments on commit 94a1590

Please sign in to comment.