-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add paths module to influxdb3_write #24579
Changes from 1 commit
b631ee2
4e0b907
c973beb
c0d2a6b
0c2b9e7
6eeaf9b
d1fbbe8
bb9c479
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
use crate::SegmentId; | ||
use std::convert::AsRef; | ||
use std::ops::Deref; | ||
use std::path::Path; | ||
use std::path::PathBuf; | ||
|
||
/// File extension for catalog files | ||
const CATALOG_FILE_EXTENSION: &str = "json"; | ||
|
||
/// File extension for parquet files | ||
const PARQUET_FILE_EXTENSION: &str = "parquet"; | ||
|
||
/// File extension for segment files | ||
const SEGMENT_FILE_EXTENSION: &str = "wal"; | ||
|
||
#[derive(Debug, Clone, PartialEq, Eq)] | ||
pub struct CatalogFilePath(PathBuf); | ||
|
||
impl CatalogFilePath { | ||
pub fn new(prefix: impl Into<PathBuf>, sequence_number: u64) -> Self { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sequence_number should be a u32. No need for a u64 here and limiting the size ensures that 10 digit padding on the string conversion won't break for any value that is valid. |
||
let mut path = prefix.into(); | ||
path.push("catalogs"); | ||
path.push(format!("{sequence_number:010}")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't going to work as a naming convention for the Catalog or SegmentInfo file names. We need file names such that when ordered lexicographically, the highest numbered ones will be returned first. Changing the file to be named For wal files we don't need this naming convention, but we could follow it to keep things consistent. It would also become handy if we ever want to copy the wal segments into object storage for access by other systems. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I opted out of this for wal files since they don't really need to be human readable, but for the other types I did the |
||
path.set_extension(CATALOG_FILE_EXTENSION); | ||
Self(path) | ||
} | ||
} | ||
|
||
impl Deref for CatalogFilePath { | ||
type Target = Path; | ||
|
||
fn deref(&self) -> &Self::Target { | ||
&self.0 | ||
} | ||
} | ||
|
||
impl AsRef<Path> for CatalogFilePath { | ||
fn as_ref(&self) -> &Path { | ||
&self.0 | ||
} | ||
} | ||
|
||
#[derive(Debug, Clone, PartialEq, Eq)] | ||
pub struct ParquetFilePath(PathBuf); | ||
|
||
impl ParquetFilePath { | ||
pub fn new( | ||
prefix: impl Into<PathBuf>, | ||
db_name: &str, | ||
table_name: &str, | ||
year: u16, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be better to have this be a |
||
month: u8, | ||
day: u8, | ||
file_number: usize, | ||
) -> Self { | ||
let mut path = prefix.into(); | ||
path.push("dbs"); | ||
path.push(db_name); | ||
path.push(table_name); | ||
path.push(format!("{year}-{month:02}-{day:02}")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These days it or time are both maintained now, but I think to keep it consistent I'll use chrono |
||
path.push(format!("{file_number:010}")); | ||
path.set_extension(PARQUET_FILE_EXTENSION); | ||
Self(path) | ||
} | ||
} | ||
|
||
impl Deref for ParquetFilePath { | ||
type Target = Path; | ||
|
||
fn deref(&self) -> &Self::Target { | ||
&self.0 | ||
} | ||
} | ||
|
||
impl AsRef<Path> for ParquetFilePath { | ||
fn as_ref(&self) -> &Path { | ||
&self.0 | ||
} | ||
} | ||
|
||
#[derive(Debug, Clone, PartialEq, Eq)] | ||
pub struct SegmentFilePath(PathBuf); | ||
|
||
impl SegmentFilePath { | ||
pub fn new(prefix: impl Into<PathBuf>, segment_id: SegmentId) -> Self { | ||
let mut path = prefix.into(); | ||
path.push("segments"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The wal files don't need to have this directory since the only thing in the wal directory will be segment files. |
||
path.push(format!("{:010}", segment_id.0)); | ||
path.set_extension(SEGMENT_FILE_EXTENSION); | ||
Self(path) | ||
} | ||
} | ||
|
||
impl Deref for SegmentFilePath { | ||
type Target = Path; | ||
|
||
fn deref(&self) -> &Self::Target { | ||
&self.0 | ||
} | ||
} | ||
|
||
impl AsRef<Path> for SegmentFilePath { | ||
fn as_ref(&self) -> &Path { | ||
&self.0 | ||
} | ||
} | ||
|
||
#[test] | ||
fn catalog_file_path_new() { | ||
assert_eq!( | ||
*CatalogFilePath::new("prefix/dir", 0), | ||
PathBuf::from("prefix/dir/catalogs/0000000000.json").as_ref() | ||
); | ||
} | ||
#[test] | ||
fn parquet_file_path_new() { | ||
assert_eq!( | ||
*ParquetFilePath::new("prefix/dir", "my_db", "my_table", 2038, 1, 19, 0), | ||
PathBuf::from("prefix/dir/dbs/my_db/my_table/2038-01-19/0000000000.parquet").as_ref() | ||
); | ||
} | ||
#[test] | ||
fn segment_file_path_new() { | ||
assert_eq!( | ||
*SegmentFilePath::new("prefix/dir", SegmentId::new(0)), | ||
PathBuf::from("prefix/dir/segments/0000000000.wal").as_ref() | ||
); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe this should be
SEGMENT_WAL_FILE_EXTENSION
to differentiate. Then we'd haveSEGMENT_INFO_FILE_EXTENSION
, which would bejson
.