diff --git a/CHANGELOG.md b/CHANGELOG.md index 2322390..dba31f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ can be found in [changelog.d folder](https://github.com/papermerge/papermerge-cl +## 0.8.0 - 2024-02-22 + +## Added + +- `--skip-ocr` flag. Works only with Papermerge REST API >= 3.1 + ## 0.7.1 - 2024-02-20 ### Fixed diff --git a/README.md b/README.md index 84f7517..8e89653 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,14 @@ successful import - add `--delete` flag: PLEASE BE CAREFUL WITH `--delete` FLAG AS IT WILL IRREVERSIBLE DELETE THE LOCAL COPY OF THE UPLOADED DOCUMENT! +Choose to skip OCR of imported documents with `--skip-ocr` flag: + + $ papermerge-cli import --skip-ocr /path/to/folder/ + +Skip OCR flag can be used with folders (will apply to all docs in the folder) +or with individual documents. +`--skip-ocr` flag will work only with Papermerge REST API >= v3.1 + ### search Search for node (document or folder) by text or by tags: @@ -169,3 +177,13 @@ or in case of uuid is a folder: You can also specify the format/type of the downloaded archive (e.g. in case node is either a folder): $ papermerge-cli download --uuid -f /path/to/file-system/folder.targz -t targz + + +## Version Compatiblity + + +| CLI Version | REST API version | Remarks| +|-------------|------------------|--------| +| 0.7.0 | 3.0.x || +| 0.7.1 | 3.0.x || +| 0.8.0 | 3.1.x | Skip OCR feature introduced| diff --git a/papermerge_cli/lib/importer.py b/papermerge_cli/lib/importer.py index 06e3257..61e99a8 100644 --- a/papermerge_cli/lib/importer.py +++ b/papermerge_cli/lib/importer.py @@ -14,7 +14,8 @@ def upload_file_or_folder( token: str, file_or_folder: Path, parent_id=None, - delete: bool = False + delete: bool = False, + skip_ocr: bool = False, ) -> None: user: User = get_me(host=host, token=token) @@ -27,6 +28,7 @@ def upload_file_or_folder( host=host, token=token, file_path=file_or_folder, + skip_ocr=skip_ocr, parent_id=parent_id ) if delete: @@ -40,6 +42,7 @@ def upload_file_or_folder( host=host, token=token, file_path=Path(entry.path), + skip_ocr=skip_ocr, parent_id=parent_id ) diff --git a/papermerge_cli/main.py b/papermerge_cli/main.py index 84e69bb..d5355bd 100644 --- a/papermerge_cli/main.py +++ b/papermerge_cli/main.py @@ -92,11 +92,19 @@ help='Delete local(s) file after successful upload.' ) ] +SkipOCR = Annotated[ + bool, + typer.Option( + is_flag=True, + help='Skip OCR i.e. do not trigger OCR operation on upload.' + ' Works only with REST API >= 3.1' + ) +] TargetNodeID = Annotated[ uuid.UUID, typer.Option( - help="UUID of the target/destination folder. " - "Default value is user's Inbox folder's UUID." + is_flag=True, + help="Trigger OCR" ) ] OrderBy = Annotated[ @@ -138,6 +146,7 @@ def import_command( ctx: typer.Context, file_or_folder: FileOrFolderPath, delete: DeleteAfterImport = False, + skip_ocr: SkipOCR = False, target_id: TargetNodeID | None = None ): """Import recursively folders and documents from local filesystem @@ -150,6 +159,7 @@ def import_command( host=ctx.obj['HOST'], token=ctx.obj['TOKEN'], file_or_folder=Path(file_or_folder), + skip_ocr=skip_ocr, parent_id=target_id, delete=delete ) diff --git a/papermerge_cli/rest/documents.py b/papermerge_cli/rest/documents.py index 862cc1f..c109c69 100644 --- a/papermerge_cli/rest/documents.py +++ b/papermerge_cli/rest/documents.py @@ -9,14 +9,16 @@ def upload( host: str, token: str, file_path: Path, - parent_id: UUID + parent_id: UUID, + skip_ocr: bool = False, ) -> Document: api_client = ApiClient[Document](token=token, host=host) doc_to_create = CreateDocument( title=file_path.name, file_name=file_path.name, - parent_id=parent_id + parent_id=parent_id, + ocr=not skip_ocr ) response_doc: Document = api_client.post( diff --git a/papermerge_cli/schema/documents.py b/papermerge_cli/schema/documents.py index edb4d24..ccaf583 100644 --- a/papermerge_cli/schema/documents.py +++ b/papermerge_cli/schema/documents.py @@ -13,6 +13,9 @@ class CreateDocument(BaseModel): parent_id: UUID | None lang: str | None = None file_name: str | None = None + # if true then OCR the document + # if false then skip OCR part + ocr: bool = True class Page(BaseModel): diff --git a/pyproject.toml b/pyproject.toml index 031d619..610bfc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "papermerge-cli" -version = "0.7.1" +version = "0.8.0" description = "Command line utility for your Papermerge DMS instance" authors = ["Eugen Ciur "] license = "Apache 2.0"