Skip to content

Commit

Permalink
fix: Type HTM upload issue fix on explore page under admin (#1172)
Browse files Browse the repository at this point in the history
Co-authored-by: Himanshi Agrawal <[email protected]>
Co-authored-by: Roopan P M <[email protected]>
  • Loading branch information
3 people authored Jul 24, 2024
1 parent 604b14e commit fd8619a
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def get_available_document_types(self) -> list[str]:
"pdf",
"url",
"html",
"htm",
"md",
"jpeg",
"jpg",
Expand Down
11 changes: 11 additions & 0 deletions code/backend/batch/utilities/helpers/config/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,17 @@
"strategy": "web"
}
},
{
"document_type": "htm",
"chunking": {
"strategy": "layout",
"size": 500,
"overlap": 100
},
"loading": {
"strategy": "web"
}
},
{
"document_type": "docx",
"chunking": {
Expand Down
9 changes: 7 additions & 2 deletions code/tests/utilities/helpers/test_config_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,11 @@ def test_default_config_when_use_advanced_image_processing(env_helper_mock):
"chunking": expected_chunking,
"loading": {"strategy": "web"},
},
{
"document_type": "htm",
"chunking": expected_chunking,
"loading": {"strategy": "web"},
},
{
"document_type": "docx",
"chunking": expected_chunking,
Expand Down Expand Up @@ -409,7 +414,7 @@ def test_get_available_document_types(config: Config):

# then
assert sorted(document_types) == sorted(
["txt", "pdf", "url", "html", "md", "jpeg", "jpg", "png", "docx"]
["txt", "pdf", "url", "html", "htm", "md", "jpeg", "jpg", "png", "docx"]
)


Expand All @@ -424,7 +429,7 @@ def test_get_available_document_types_when_advanced_image_processing_enabled(

# then
assert sorted(document_types) == sorted(
["txt", "pdf", "url", "html", "md", "jpeg", "jpg", "png", "docx", "tiff", "bmp"]
["txt", "pdf", "url", "html", "htm", "md", "jpeg", "jpg", "png", "docx", "tiff", "bmp"]
)


Expand Down

0 comments on commit fd8619a

Please sign in to comment.