-
Notifications
You must be signed in to change notification settings - Fork 208
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
nydusify: introduce chunkdict generate subcommand #1401
Open
newthifans
wants to merge
12
commits into
dragonflyoss:master
Choose a base branch
from
newthifans:yuanzhao08
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+2,638
−159
Open
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
fdcb676
Add algorithm "Dbscan cluster" and "exponential_smoothing" and introd…
newthifans db7d395
nydus-image: Store chunk and blob metadata
cslinwang b153bf6
Add function that nydusify invoke "nydus-image chunkdict generate" an…
newthifans d104c12
Merge invoked subcomand in nydusify and add smoke test
newthifans 7cd3631
nydus-image: Store chunk and blob metadata
cslinwang 7de4835
Merge invoked subcomand in nydusify and add smoke test
newthifans 135779b
Modify database fields synchronously
newthifans eb21188
Add push chunkdict to registry and add smoke test(support v5 and v6)
newthifans eae2b55
Revise based on comments
cslinwang 18c7763
Fixed bugs in chunkdict and added blobinfo support to chunkdict.
cslinwang 00ce305
Merge branch 'master' into yuanzhao08
cslinwang 6093309
nydus-image: merge main branch and remove unnecessary output.
cslinwang File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,295 @@ | ||
// Copyright (C) 2023 Nydus Developers. All rights reserved. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
//! Generate Chunkdict RAFS bootstrap. | ||
//! ------------------------------------------------------------------------------------------------- | ||
//! Bug 1: Inconsistent Chunk Size Leading to Blob Size Less Than 4K(v6_block_size) | ||
//! Description: The size of chunks is not consistent, which results in the possibility that a blob, | ||
//! composed of a group of these chunks, may be less than 4K(v6_block_size) in size. | ||
//! This inconsistency leads to a failure in passing the size check. | ||
//! ------------------------------------------------------------------------------------------------- | ||
//! Bug 2: Incorrect Chunk Number Calculation Due to Premature Check Logic | ||
//! Description: The current logic for calculating the chunk number is based on the formula size/chunk size. | ||
//! However, this approach is flawed as it precedes the actual check which accounts for chunk statistics. | ||
//! Consequently, this leads to inaccurate counting of chunk numbers. | ||
|
||
use super::core::node::{ChunkSource, NodeInfo}; | ||
use super::{BlobManager, Bootstrap, BootstrapManager, BuildContext, BuildOutput, Tree}; | ||
use crate::core::node::Node; | ||
use crate::NodeChunk; | ||
use anyhow::Result; | ||
use nydus_rafs::metadata::chunk::ChunkWrapper; | ||
use nydus_rafs::metadata::inode::InodeWrapper; | ||
use nydus_rafs::metadata::layout::RafsXAttrs; | ||
use nydus_storage::meta::BlobChunkInfoV1Ondisk; | ||
use nydus_utils::compress::Algorithm; | ||
use nydus_utils::digest::RafsDigest; | ||
use std::ffi::OsString; | ||
use std::mem::size_of; | ||
use std::path::PathBuf; | ||
use std::sync::Arc; | ||
|
||
#[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
pub struct ChunkdictChunkInfo { | ||
pub image_reference: String, | ||
pub version: String, | ||
pub chunk_blob_id: String, | ||
pub chunk_digest: String, | ||
pub chunk_compressed_size: u32, | ||
pub chunk_uncompressed_size: u32, | ||
pub chunk_compressed_offset: u64, | ||
pub chunk_uncompressed_offset: u64, | ||
} | ||
|
||
pub struct ChunkdictBlobInfo { | ||
pub blob_id: String, | ||
pub blob_compressed_size: u64, | ||
pub blob_uncompressed_size: u64, | ||
pub blob_compressor: String, | ||
pub blob_meta_ci_compressed_size: u64, | ||
pub blob_meta_ci_uncompressed_size: u64, | ||
pub blob_meta_ci_offset: u64, | ||
} | ||
|
||
/// Struct to generate chunkdict RAFS bootstrap. | ||
pub struct Generator {} | ||
|
||
impl Generator { | ||
// Generate chunkdict RAFS bootstrap. | ||
pub fn generate( | ||
ctx: &mut BuildContext, | ||
bootstrap_mgr: &mut BootstrapManager, | ||
blob_mgr: &mut BlobManager, | ||
chunkdict_chunks_origin: Vec<ChunkdictChunkInfo>, | ||
chunkdict_blobs: Vec<ChunkdictBlobInfo>, | ||
) -> Result<BuildOutput> { | ||
// Validate and remove chunks whose belonged blob sizes are smaller than a block. | ||
let mut chunkdict_chunks = chunkdict_chunks_origin.to_vec(); | ||
Self::validate_and_remove_chunks(ctx, &mut chunkdict_chunks); | ||
// build root tree | ||
let mut tree = Self::build_root_tree(ctx)?; | ||
|
||
// build child tree | ||
let child = Self::build_child_tree(ctx, blob_mgr, &chunkdict_chunks, &chunkdict_blobs)?; | ||
let result = vec![child]; | ||
tree.children = result; | ||
|
||
Self::validate_tree(&tree)?; | ||
|
||
// build bootstrap | ||
let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; | ||
let mut bootstrap = Bootstrap::new(tree)?; | ||
bootstrap.build(ctx, &mut bootstrap_ctx)?; | ||
|
||
let blob_table = blob_mgr.to_blob_table(ctx)?; | ||
let storage = &mut bootstrap_mgr.bootstrap_storage; | ||
bootstrap.dump(ctx, storage, &mut bootstrap_ctx, &blob_table)?; | ||
|
||
BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) | ||
} | ||
|
||
/// validate tree | ||
fn validate_tree(tree: &Tree) -> Result<()> { | ||
let pre = &mut |t: &Tree| -> Result<()> { | ||
let node = t.lock_node(); | ||
debug!("chunkdict tree: "); | ||
debug!("inode: {}", node); | ||
for chunk in &node.chunks { | ||
debug!("\t chunk: {}", chunk); | ||
} | ||
Ok(()) | ||
}; | ||
tree.walk_dfs_pre(pre)?; | ||
debug!("chunkdict tree is valid."); | ||
Ok(()) | ||
} | ||
|
||
/// check blob uncompressed size is bigger than block | ||
fn validate_and_remove_chunks(ctx: &mut BuildContext, chunkdict: &mut Vec<ChunkdictChunkInfo>) { | ||
let mut chunk_sizes = std::collections::HashMap::new(); | ||
|
||
// Accumulate the uncompressed size for each chunk_blob_id | ||
for chunk in chunkdict.iter() { | ||
*chunk_sizes.entry(chunk.chunk_blob_id.clone()).or_insert(0) += | ||
chunk.chunk_uncompressed_size as u64; | ||
} | ||
// Find all chunk_blob_ids with a total uncompressed size > v6_block_size | ||
let small_chunks: Vec<String> = chunk_sizes | ||
.into_iter() | ||
.filter(|&(_, size)| size < ctx.v6_block_size()) | ||
.inspect(|(id, _)| { | ||
eprintln!( | ||
"Warning: Blob with id '{}' is smaller than {} bytes.", | ||
id, | ||
ctx.v6_block_size() | ||
) | ||
}) | ||
.map(|(id, _)| id) | ||
.collect(); | ||
|
||
// Retain only chunks with chunk_blob_id that has a total uncompressed size > v6_block_size | ||
chunkdict.retain(|chunk| !small_chunks.contains(&chunk.chunk_blob_id)); | ||
} | ||
|
||
/// Build root tree | ||
pub fn build_root_tree(ctx: &mut BuildContext) -> Result<Tree> { | ||
// inode | ||
let mut inode = InodeWrapper::new(ctx.fs_version); | ||
inode.set_ino(1); | ||
inode.set_uid(1000); | ||
inode.set_gid(1000); | ||
inode.set_projid(0); | ||
inode.set_mode(0o660 | libc::S_IFDIR as u32); | ||
inode.set_nlink(3); | ||
inode.set_name_size("/".len()); | ||
inode.set_rdev(0); | ||
inode.set_blocks(256); | ||
let node_info = NodeInfo { | ||
explicit_uidgid: true, | ||
src_dev: 0, | ||
src_ino: 0, | ||
rdev: 0, | ||
source: PathBuf::from("/"), | ||
path: PathBuf::from("/"), | ||
target: PathBuf::from("/"), | ||
target_vec: vec![OsString::from("/")], | ||
symlink: None, | ||
xattrs: RafsXAttrs::default(), | ||
v6_force_extended_inode: true, | ||
}; | ||
let root_node = Node::new(inode, node_info, 0); | ||
let tree = Tree::new(root_node); | ||
Ok(tree) | ||
} | ||
|
||
/// Build child tree | ||
fn build_child_tree( | ||
ctx: &mut BuildContext, | ||
blob_mgr: &mut BlobManager, | ||
chunkdict_chunks: &[ChunkdictChunkInfo], | ||
chunkdict_blobs: &[ChunkdictBlobInfo], | ||
) -> Result<Tree> { | ||
// node | ||
let mut inode = InodeWrapper::new(ctx.fs_version); | ||
inode.set_ino(2); | ||
inode.set_uid(0); | ||
inode.set_gid(0); | ||
inode.set_projid(0); | ||
inode.set_mode(0o660 | libc::S_IFREG as u32); | ||
inode.set_nlink(1); | ||
inode.set_name_size("chunkdict".len()); | ||
inode.set_rdev(0); | ||
inode.set_blocks(256); | ||
let node_info = NodeInfo { | ||
explicit_uidgid: true, | ||
src_dev: 0, | ||
src_ino: 1, | ||
rdev: 0, | ||
source: PathBuf::from("/"), | ||
path: PathBuf::from("/chunkdict"), | ||
target: PathBuf::from("/chunkdict"), | ||
target_vec: vec![OsString::from("/"), OsString::from("/chunkdict")], | ||
symlink: None, | ||
xattrs: RafsXAttrs::new(), | ||
v6_force_extended_inode: true, | ||
}; | ||
let mut node = Node::new(inode, node_info, 0); | ||
|
||
// insert chunks | ||
Self::insert_chunks(ctx, blob_mgr, &mut node, chunkdict_chunks, chunkdict_blobs)?; | ||
|
||
let node_size: u64 = node | ||
.chunks | ||
.iter() | ||
.map(|chunk| chunk.inner.uncompressed_size() as u64) | ||
.sum(); | ||
node.inode.set_size(node_size); | ||
|
||
// update child count | ||
node.inode.set_child_count(node.chunks.len() as u32); | ||
|
||
let child = Tree::new(node); | ||
child | ||
.lock_node() | ||
.v5_set_dir_size(ctx.fs_version, &child.children); | ||
Ok(child) | ||
} | ||
|
||
/// Insert chunks | ||
fn insert_chunks( | ||
ctx: &mut BuildContext, | ||
blob_mgr: &mut BlobManager, | ||
node: &mut Node, | ||
chunkdict_chunks: &[ChunkdictChunkInfo], | ||
chunkdict_blobs: &[ChunkdictBlobInfo], | ||
) -> Result<()> { | ||
for (index, chunk_info) in chunkdict_chunks.iter().enumerate() { | ||
let chunk_size: u32 = chunk_info.chunk_compressed_size; | ||
let file_offset = index as u64 * chunk_size as u64; | ||
let mut chunk = ChunkWrapper::new(ctx.fs_version); | ||
|
||
// update blob context | ||
let (blob_index, blob_ctx) = | ||
blob_mgr.get_or_cerate_blob_for_chunkdict(ctx, &chunk_info.chunk_blob_id)?; | ||
if blob_ctx.blob_id.is_empty() { | ||
blob_ctx.blob_id = chunk_info.chunk_blob_id.clone(); | ||
} | ||
|
||
// blob_ctx. | ||
let chunk_uncompressed_size = chunk_info.chunk_uncompressed_size; | ||
let pre_d_offset = blob_ctx.current_uncompressed_offset; | ||
blob_ctx.uncompressed_blob_size = pre_d_offset + chunk_uncompressed_size as u64; | ||
blob_ctx.current_uncompressed_offset += chunk_uncompressed_size as u64; | ||
|
||
blob_ctx.blob_meta_header.set_ci_uncompressed_size( | ||
blob_ctx.blob_meta_header.ci_uncompressed_size() | ||
+ size_of::<BlobChunkInfoV1Ondisk>() as u64, | ||
); | ||
blob_ctx.blob_meta_header.set_ci_compressed_size( | ||
blob_ctx.blob_meta_header.ci_uncompressed_size() | ||
+ size_of::<BlobChunkInfoV1Ondisk>() as u64, | ||
); | ||
let chunkdict_blob_info = chunkdict_blobs | ||
.iter() | ||
.find(|blob| blob.blob_id == chunk_info.chunk_blob_id) | ||
.unwrap(); | ||
blob_ctx.blob_compressor = match chunkdict_blob_info.blob_compressor.as_str() { | ||
"None" => Algorithm::None, | ||
"Lz4Block" => Algorithm::Lz4Block, | ||
"GZip" => Algorithm::GZip, | ||
"Zstd" => Algorithm::Zstd, | ||
_ => Algorithm::None, | ||
}; | ||
blob_ctx | ||
.blob_meta_header | ||
.set_ci_uncompressed_size(chunkdict_blob_info.blob_meta_ci_uncompressed_size); | ||
blob_ctx | ||
.blob_meta_header | ||
.set_ci_compressed_size(chunkdict_blob_info.blob_meta_ci_compressed_size); | ||
blob_ctx | ||
.blob_meta_header | ||
.set_ci_compressed_offset(chunkdict_blob_info.blob_meta_ci_offset); | ||
blob_ctx.blob_meta_header.set_ci_compressor(Algorithm::Zstd); | ||
|
||
// update chunk | ||
let chunk_index = blob_ctx.alloc_chunk_index()?; | ||
chunk.set_blob_index(blob_index); | ||
chunk.set_index(chunk_index); | ||
chunk.set_file_offset(file_offset); | ||
chunk.set_compressed_size(chunk_info.chunk_compressed_size); | ||
chunk.set_compressed_offset(chunk_info.chunk_compressed_offset); | ||
chunk.set_uncompressed_size(chunk_info.chunk_uncompressed_size); | ||
chunk.set_uncompressed_offset(chunk_info.chunk_uncompressed_offset); | ||
chunk.set_id(RafsDigest::from_string(&chunk_info.chunk_digest)); | ||
|
||
debug!("chunk id: {}", chunk.id()); | ||
|
||
node.chunks.push(NodeChunk { | ||
source: ChunkSource::Build, | ||
inner: Arc::new(chunk.clone()), | ||
}); | ||
} | ||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A chunk must belong to a blob, when will the
blob_id
be empty?