Skip to content

Commit

Permalink
chore: skip inner zip (AppFlowy-IO#332)
Browse files Browse the repository at this point in the history
* chore: skip inner zip

* chore: add test

* chore: fix test
  • Loading branch information
appflowy authored Nov 1, 2024
1 parent c1b107e commit fa89b05
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 14 deletions.
26 changes: 23 additions & 3 deletions collab-importer/src/zip_tool/sync_zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::fs::{File, OpenOptions};
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use std::{fs, io};
use tracing::warn;
use tracing::{trace, warn};
use zip::read::ZipArchive;

pub struct UnzipFile {
Expand All @@ -29,6 +29,7 @@ pub fn sync_unzip(
let mut root_dir = None;
let mut parts = vec![];

// Determine the root directory if the first entry is a directory
if let Ok(entry) = archive.by_index(0) {
let filename = entry.name().to_string();
if root_dir.is_none() && entry.is_dir() {
Expand All @@ -53,6 +54,12 @@ pub fn sync_unzip(
.map_err(|e| ImporterError::Internal(anyhow!("Failed to read entry: {:?}", e)))?;

let filename = entry.name().to_string();
// Skip zip files within subdirectories
if entry.is_file() && filename.ends_with(".zip") && i != 0 {
trace!("Skipping zip file: {:?}", filename);
continue;
}

let output_path = out_dir.join(&filename);
if entry.is_dir() {
fs::create_dir_all(&output_path)
Expand All @@ -70,7 +77,7 @@ pub fn sync_unzip(
.open(&output_path)
.map_err(|e| {
ImporterError::Internal(anyhow!(
"Failed to create or open file with path: {:?}, error:{:?}",
"Failed to create or open file with path: {:?}, error: {:?}",
output_path,
e
))
Expand All @@ -81,6 +88,7 @@ pub fn sync_unzip(
ImporterError::Internal(anyhow!("Failed to read entry content: {:?}", e))
})?;

// Check if it's a multipart zip file
if buffer.len() >= 4 {
let four_bytes: [u8; 4] = buffer[..4].try_into().unwrap();
if is_multi_part_zip_signature(&four_bytes) {
Expand All @@ -101,6 +109,7 @@ pub fn sync_unzip(
}
}
}
drop(archive);

// Process multipart zip files
if !parts.is_empty() {
Expand Down Expand Up @@ -143,6 +152,11 @@ fn unzip_single_file(
.by_index(i)
.map_err(|e| ImporterError::Internal(anyhow!("Failed to read entry: {:?}", e)))?;

let entry_name = entry.name();
if entry_name == ".DS_Store" || entry_name.starts_with("__MACOSX") {
continue;
}

let file_name = entry.name().to_string();
if root_dir.is_none() && entry.is_dir() {
root_dir = Some(
Expand Down Expand Up @@ -176,7 +190,13 @@ fn unzip_single_file(
.write(true)
.create_new(true)
.open(&path)
.map_err(|e| ImporterError::Internal(anyhow!("Failed to create part file: {:?}", e)))?;
.map_err(|e| {
ImporterError::Internal(anyhow!(
"Failed to create part file: {:?}, path:{:?}",
e,
path
))
})?;

io::copy(&mut entry, &mut outfile)
.map_err(|e| ImporterError::Internal(anyhow!("Failed to write file: {:?}", e)))?;
Expand Down
Binary file not shown.
53 changes: 48 additions & 5 deletions collab-importer/tests/notion_test/import_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use collab_entity::CollabType;
use collab_folder::hierarchy_builder::ParentChildViews;
use collab_folder::{default_folder_data, Folder, View};
use collab_importer::error::ImporterError;
use collab_importer::imported_collab::{import_notion_zip_file, ImportType};
use collab_importer::imported_collab::{import_notion_zip_file, ImportType, ImportedCollabInfo};
use collab_importer::notion::page::NotionPage;
use collab_importer::notion::{is_csv_contained_cached, CSVContentCache, NotionImporter};
use collab_importer::util::{parse_csv, CSVRow};
Expand All @@ -28,9 +28,10 @@ use std::collections::{HashMap, HashSet};
use std::env::temp_dir;
use std::path::PathBuf;
use std::sync::Arc;

// #[tokio::test]
// async fn import_part_zip_test2() {
// let (_cleaner, file_path) = sync_unzip_asset("abc").await.unwrap();
// async fn import_test() {
// let (_cleaner, file_path) = sync_unzip_asset("appflowy_io_full").await.unwrap();
// let importer = NotionImporter::new(
// 1,
// &file_path,
Expand All @@ -41,10 +42,52 @@ use std::sync::Arc;
// let info = importer.import().await.unwrap();
// let nested_view = info.build_nested_views().await;
// println!("{}", nested_view);
// // let collabs = info.into_collab_stream().await.collect::<Vec<_>>().await;
// // for collab in collabs {}
// }

#[tokio::test]
async fn import_zip_file_contains_zip_as_attachments() {
let (_cleaner, file_path) = sync_unzip_asset("project&task_contain_zip_attachment")
.await
.unwrap();
let importer = NotionImporter::new(
1,
&file_path,
uuid::Uuid::new_v4(),
"http://test.appflowy.cloud".to_string(),
)
.unwrap();
let info = importer.import().await.unwrap();
let nested_view = info.build_nested_views().await;
println!("{}", nested_view);

let imported_collabs = info
.into_collab_stream()
.await
.collect::<Vec<ImportedCollabInfo>>()
.await;

assert_eq!(imported_collabs.len(), 4);
assert_eq!(
imported_collabs[0].name,
"project&task_contain_zip_attachment"
);
assert_eq!(imported_collabs[0].imported_collabs.len(), 1);
assert_eq!(imported_collabs[0].resources[0].files.len(), 0);

assert_eq!(imported_collabs[1].name, "Projects & Tasks");
assert_eq!(imported_collabs[1].imported_collabs.len(), 1);
assert_eq!(imported_collabs[1].resources[0].files.len(), 0);

assert_eq!(imported_collabs[2].name, "Projects");
assert_eq!(imported_collabs[2].imported_collabs.len(), 9);
assert_eq!(imported_collabs[2].resources[0].files.len(), 2);
assert_eq!(imported_collabs[2].file_size(), 1143952);

assert_eq!(imported_collabs[3].name, "Tasks");
assert_eq!(imported_collabs[3].imported_collabs.len(), 18);
assert_eq!(imported_collabs[3].resources[0].files.len(), 0);
}

#[tokio::test]
async fn import_csv_without_subpage_folder_test() {
let (_cleaner, file_path_1) = async_unzip_asset("project&task_no_subpages").await.unwrap();
Expand Down
23 changes: 17 additions & 6 deletions collab-importer/tests/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ impl Drop for Cleaner {

pub async fn sync_unzip_asset(file_name: &str) -> std::io::Result<(Cleaner, PathBuf)> {
let zip_file_path = PathBuf::from(format!("./tests/asset/{}.zip", file_name));
if !zip_file_path.exists() {
panic!("File not found: {:?}", zip_file_path);
}
let file_name = zip_file_path
.file_stem()
.unwrap()
Expand All @@ -39,21 +42,29 @@ pub async fn sync_unzip_asset(file_name: &str) -> std::io::Result<(Cleaner, Path
.to_string();

let output_folder_path = temp_dir().join(uuid::Uuid::new_v4().to_string());
// let output_folder_path = std::env::current_dir()
// .unwrap()
// .join(uuid::Uuid::new_v4().to_string());
tokio::fs::create_dir_all(&output_folder_path).await?;

let unzip_file_path = sync_unzip(zip_file_path, output_folder_path, Some(file_name))
.unwrap()
.unzip_dir;
let start = std::time::Instant::now();
let unzip_file_path = tokio::task::spawn_blocking(move || {
sync_unzip(zip_file_path, output_folder_path.clone(), Some(file_name))
.unwrap()
.unzip_dir
})
.await
.unwrap();

println!("sync_unzip_asset took: {:?}", start.elapsed());

Ok((Cleaner::new(unzip_file_path.clone()), unzip_file_path))
}

pub async fn async_unzip_asset(file_name: &str) -> std::io::Result<(Cleaner, PathBuf)> {
setup_log();
let zip_file_path = PathBuf::from(format!("./tests/asset/{}.zip", file_name));
let output_folder_path = temp_dir().join(uuid::Uuid::new_v4().to_string());
// let output_folder_path = std::env::current_dir()
// .unwrap()
// .join(uuid::Uuid::new_v4().to_string());
tokio::fs::create_dir_all(&output_folder_path).await?;

let file_name = zip_file_path
Expand Down

0 comments on commit fa89b05

Please sign in to comment.