Skip to content

Commit

Permalink
Fix lint
Browse files Browse the repository at this point in the history
  • Loading branch information
antoineKorbit committed Jan 25, 2025
1 parent 41a2818 commit fc26188
Showing 1 changed file with 15 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -569,33 +569,42 @@ def test_read_file_content(tmp_dir_type: Type[Union[Path, str]]) -> None:
checksum = hashlib.md5(content).hexdigest()
assert checksum == files_checksum[file]


@pytest.mark.parametrize("tmp_dir_type", [Path, str])
@pytest.mark.skipif(PDFReader is None, reason="llama-index-readers-file not installed")
def test_exclude_empty(tmp_dir_type: Type[Union[Path, str]]) -> None:
"""Test if exclude_empty flag excludes empty files."""
with TemporaryDirectory() as tmp_dir:
tmp_dir = tmp_dir_type(tmp_dir)

# Create non-empty files
with open(f"{tmp_dir}/test1.txt", "w") as f:
f.write("test1")
with open(f"{tmp_dir}/test2.txt", "w") as f:
f.write("test2")

# Create empty files
open(f"{tmp_dir}/empty1.txt", "w").close()
open(f"{tmp_dir}/empty2.txt", "w").close()

# Test with exclude_empty=True
reader_exclude = SimpleDirectoryReader(tmp_dir, exclude_empty=True)
documents_exclude = reader_exclude.load_data()

assert len(documents_exclude) == 2
assert set(doc.metadata["file_name"] for doc in documents_exclude) == {"test1.txt", "test2.txt"}
assert [doc.metadata["file_name"] for doc in documents_exclude] == {
"test1.txt",
"test2.txt",
}

# Test with exclude_empty=False (default behavior)
reader_include = SimpleDirectoryReader(tmp_dir, exclude_empty=False)
documents_include = reader_include.load_data()

assert len(documents_include) == 4
assert set(doc.metadata["file_name"] for doc in documents_include) == {"test1.txt", "test2.txt", "empty1.txt", "empty2.txt"}
assert [doc.metadata["file_name"] for doc in documents_include] == {
"test1.txt",
"test2.txt",
"empty1.txt",
"empty2.txt",
}

0 comments on commit fc26188

Please sign in to comment.