diff --git a/README.md b/README.md index f5f9f63..9980b12 100644 --- a/README.md +++ b/README.md @@ -24,15 +24,40 @@ on your platform. use mishagp\OCRmyPDF\OCRmyPDF; //Return file path of outputted, OCRed PDF -echo (new OCRmyPDF('document.pdf'))->run(); +echo OCRmyPDF::make('document.pdf')->run(); //Return file contents of outputted, OCRed PDF -echo (new OCRmyPDF('scannedImage.png'))->setOutputPDFPath(null)->run(); +echo OCRmyPDF::make('scannedImage.png')->setOutputPDFPath(null)->run(); ``` ## API -_This section is a work-in-progress._ +### setParam + +Define invocation parameters for `ocrmypdf`. See `ocrmypdf --help` for a list of available parameters. + +> [!IMPORTANT] +> Parameters configured via `setParam` will override any other parameters or configurations set otherwise. + +```php +use mishagp\OCRmyPDF\OCRmyPDF; + +//Passing a single parameter with a value +OCRmyPDF::make('document_zh-CN.pdf') + ->setParam('-l', 'chi_sim') + ->run(); + +//Passing a single parameter without a value +OCRmyPDF::make('document_withBackground.pdf') + ->setParam('--remove-background') + ->run(); + +//Passing multiple parameters +OCRmyPDF::make('document_withoutAttribution.pdf') + ->setParam('--title', 'Lorem Ipsum') + ->setParam('--keywords', 'Lorem,Ipsum,dolor,sit,amet') + ->run(); +``` ### setInputData @@ -51,7 +76,7 @@ imagepng($img, null, 0); $size = ob_get_length(); $data = ob_get_clean(); -echo (new OCRmyPDF()) +OCRmyPDF::make() ->setInputData($data, $size) ->run(); ``` @@ -62,7 +87,7 @@ Specify a writable path where `ocrmypdf` should generate output PDF. ```php use mishagp\OCRmyPDF\OCRmyPDF; -echo (new OCRmyPDF('document.pdf')) +OCRmyPDF::make('document.pdf') ->setOutputPDFPath('/outputDir/ocr_document.pdf') ->run(); ``` @@ -73,7 +98,7 @@ Define a custom location of the `ocrmypdf` executable, if by any reason it is no ```php use mishagp\OCRmyPDF\OCRmyPDF; -echo (new OCRmyPDF('document.pdf')) +OCRmyPDF::make('document.pdf') ->setExecutable('/path/to/ocrmypdf') ->run(); ``` diff --git a/src/Command.php b/src/Command.php index 13fe48b..03c87b1 100644 --- a/src/Command.php +++ b/src/Command.php @@ -14,7 +14,8 @@ public function __construct( public ?string $inputFilePath = null, public ?string $outputPDFPath = null, public ?string $tempDir = null, - public ?int $threadLimit = null + public ?int $threadLimit = null, + public array $parameters = [] ) { } @@ -65,7 +66,25 @@ public function __toString(): string $cmd = []; $cmd[] = self::escape($this->executable); + if ($this->threadLimit) $cmd[] = "--jobs=$this->threadLimit"; + + foreach ($this->parameters as $key => $value) { + if ($value !== true) { + $paramKeyValue = $key; + $paramKeyValue .= "='"; + if (is_array($value)) { + $paramKeyValue .= join(',', $value); + } else { + $paramKeyValue .= $value; + } + $paramKeyValue .= "'"; + $cmd[] = $paramKeyValue; + } else { + $cmd[] = $key; + } + } + $cmd[] = $this->useFileAsInput ? self::escape((string)$this->inputFilePath) : "-"; $cmd[] = $this->useFileAsOutput ? self::escape($this->getOutputPDFPath()) : "-"; diff --git a/src/OCRmyPDF.php b/src/OCRmyPDF.php index 4544434..27ffa32 100644 --- a/src/OCRmyPDF.php +++ b/src/OCRmyPDF.php @@ -19,6 +19,11 @@ public function __construct(string $inputFile = null, Command $command = null) $this->setInputFile("$inputFile"); } + static function make(string $inputFile = null, Command $command = null): self + { + return new OCRmyPDF($inputFile, $command); + } + /** * @param string $filePath * @return bool @@ -105,7 +110,7 @@ public function run(): string ); } - $process = new Process("$this->command"); + $process = new Process((string)$this->command); if (!$this->command->useFileAsInput) { $process->write( @@ -156,6 +161,7 @@ public function setInputData(string $inputData, int $inputDataSize): OCRmyPDF /** * @return void + * @throws NoWritePermissionsException */ private function cleanTempFiles(): void { @@ -194,4 +200,17 @@ public function setOutputPDFPath(string|null $outputPDFPath): self } return $this; } + + /** + * @param string|string[]|null $value + */ + public function setParam(string $param, null|string|array $value = null): self + { + if (!str_starts_with($param, '-') && !str_starts_with($param, '--')) { + throw new InvalidArgumentException("Parameter $param must start with a - or --"); + } + + $this->command->parameters[$param] = $value ?? true; + return $this; + } } \ No newline at end of file diff --git a/tests/E2E/OCRmyPDFParsesParametersTest.php b/tests/E2E/OCRmyPDFParsesParametersTest.php new file mode 100644 index 0000000..52bbf3c --- /dev/null +++ b/tests/E2E/OCRmyPDFParsesParametersTest.php @@ -0,0 +1,55 @@ +setOutputPDFPath($outputPath) + ->setParam('--title', "Lorem Ipsum"); + + $outputPath = $instance->run(); + $this->assertFileExists($outputPath); + $this->assertFileIsReadable($outputPath); + $this->assertFileIsWritable($outputPath); + echo "Output: $outputPath"; + } + + /** + * @throws OCRmyPDFException + * @throws UnsuccessfulCommandException + */ + public function testProcess_en_US_doc1_SetInvalidParam(): void + { + $this->expectException(UnsuccessfulCommandException::class); + + $inputFile = __DIR__ . DIRECTORY_SEPARATOR . "examples" . DIRECTORY_SEPARATOR . "en_US_doc1.pdf"; + $outputPath = sys_get_temp_dir() . + DIRECTORY_SEPARATOR . + basename((string)tempnam(sys_get_temp_dir(), 'ocr_')) . + ".pdf"; + + $instance = OCRmyPDF::make($inputFile) + ->setOutputPDFPath($outputPath) + ->setParam('--this-is-not-a-valid-param'); + + $instance->run(); + } +}