Skip to content

Commit

Permalink
feat: use internal venv for dependency management (#9)
Browse files Browse the repository at this point in the history
* feat: use internal venv for dependency management

* fix: setup script

* chore: readme & scripts

* chore: service provider

* Fix styling

* chore: progress

* fix: process->start()

* fix: formatting

* chore: reduce verbosity

* fix: --no-progress-bar not available

* chore(ci): remove post-install command

* chore: change post-update to setup-python

* feat: more output

* chore: docs

* fix: install command

* fix: remove unnecessary adds

* chore: fix tests

* fix: ci

* chore(ci): change to default error format

* chore(ci): ignore error in ci

---------

Co-authored-by: kauffinger <[email protected]>
  • Loading branch information
kauffinger and kauffinger authored Dec 18, 2024
1 parent 79979af commit d5e2867
Show file tree
Hide file tree
Showing 20 changed files with 480 additions and 42 deletions.
4 changes: 4 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@
/UPGRADING.md export-ignore
/phpstan.neon.dist export-ignore
/phpstan-baseline.neon export-ignore

# Ensure these files are included
/setup-python-env.sh export-keep
/python/requirements.txt export-keep
2 changes: 1 addition & 1 deletion .github/workflows/phpstan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
uses: ramsey/composer-install@v3

- name: Run PHPStan
run: ./vendor/bin/phpstan --error-format=github
run: ./vendor/bin/phpstan
3 changes: 3 additions & 0 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,8 @@ jobs:
# for windows releases. Defaults to "static"
linking-type: static

- name: venv setup
run: ./setup-python-env.sh

- name: Execute tests
run: vendor/bin/pest --ci
62 changes: 40 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,62 @@ Laravel bindings for markitdown.

## Installation

You can install the package via composer:
1. Install the package via composer:

```bash
composer require innobrain/markitdown
```

### Install Markitdown
2. Publish the configuration file:

Install the markitdown package from pip.
```bash
php artisan vendor:publish --tag="markitdown-config"
```

3. Run the installation command:

```bash
pip install markitdown
php artisan markitdown:install
```

You will need to have `markitdown` available as a binary in your command line.
This will:
- Set up a Python virtual environment with the required dependencies
- Add the setup script to your project's composer.json
- Ensure the environment is kept up to date with future composer updates

⚡ The recommended way to do this is to use `pipx`:
### Alternative Installation Methods

On macOS:
If you prefer not to use the built-in virtual environment, you can disable it in your `.env` file:

```bash
MARKITDOWN_USE_VENV_PACKAGE=false
```

Then install markitdown manually using one of these methods:

#### Using pip directly:

```bash
pip install markitdown
```

#### Using pipx (Recommended for manual installation):

On macOS:
```bash
brew install pipx
pipx ensurepath
sudo pipx ensurepath --global # optional to allow pipx actions with --global argument``
sudo pipx ensurepath --global # optional to allow pipx actions with --global argument
```

Or see how to install on [other platforms](https://github.com/pypa/pipx).
After installling `pipx`, you can install `markitdown` with:
After installing `pipx`, you can install `markitdown` with:

```bash
pipx install markitdown
```

Now, set the path to the `markitdown` executable in your `.env` file. You can retrieve the path with:
When not using the built-in virtual environment, you'll need to set the path to the `markitdown` executable in your `.env` file. You can retrieve the path with:
```bash
which markitdown
```
Expand All @@ -53,23 +76,13 @@ MARKITDOWN_EXECUTABLE=/path/to/markitdown
Also, when running the script anywhere but the console, you need to set the PATH, as php-fpm does not have
access to the PATH variable. You can do this by adding the following to your `.env` file:

```bash
echo $PATH
```

```bash
MARKITDOWN_SYSTEM_PATH=<your path>
```

### Publishing things

You can publish the config file with:
## Configuration

```bash
php artisan vendor:publish --tag="markitdown-config"
```

This is the contents of the published config file:
The package's configuration will be published to `config/markitdown.php`. Here are the available options:

```php
return [
Expand All @@ -84,6 +97,11 @@ return [
*/
'executable' => env('MARKITDOWN_EXECUTABLE', 'markitdown'),

/*
* This will override the above setting and use the new locally installed package.
*/
'use_venv_package' => env('MARKITDOWN_USE_VENV_PACKAGE', true),

/*
* This is needed when you want to run markitdown in php-fpm. One dependency
* of markitdown requires PATH to be set. If you are running in a console,
Expand Down
11 changes: 11 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@
},
"scripts": {
"post-autoload-dump": "@composer run prepare",
"setup-python-env": [
"./setup-python-env.sh"
],
"prepare": "@php vendor/bin/testbench package:discover --ansi",
"analyse": "vendor/bin/phpstan analyse",
"test": "vendor/bin/pest",
Expand All @@ -69,6 +72,14 @@
"aliases": {
"Markitdown": "Innobrain\\Markitdown\\Facades\\Markitdown"
}
},
"hooks": {
"post-install-cmd": [
"./setup-python-env.sh"
],
"post-update-cmd": [
"./setup-python-env.sh"
]
}
},
"minimum-stability": "dev",
Expand Down
8 changes: 7 additions & 1 deletion config/markitdown.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,16 @@

/*
* Use this to set the path to the markitdown executable. If not set,
* the binary will be searched in the PATH.
* the binary will be searched in the PATH. Will be ignored
* if use_venv_package is set to true.
*/
'executable' => env('MARKITDOWN_EXECUTABLE', 'markitdown'),

/*
* This will override the above setting and use the new locally installed package.
*/
'use_venv_package' => env('MARKITDOWN_USE_VENV_PACKAGE', true),

/*
* This is needed when you want to run markitdown in php-fpm. One dependency
* of markitdown requires PATH to be set. If you are running in a console,
Expand Down
2 changes: 2 additions & 0 deletions phpstan.neon.dist
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ parameters:
tmpDir: build/phpstan
checkOctaneCompatibility: true
checkModelProperties: true
ignoreErrors:
- '#Call to an undefined static method Innobrain\\Markitdown\\Facades\\Markitdown::convert\(\)#'
1 change: 1 addition & 0 deletions python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
markitdown==0.0.1a2
63 changes: 63 additions & 0 deletions setup-python-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/bin/bash

set -e # Exit on error

# Function to log messages
log() {
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
}

# Function to check command availability
check_command() {
if ! command -v "$1" &> /dev/null; then
log "Error: $1 is required but not installed."
exit 1
fi
}

# Check required commands
check_command python3
check_command pip3

# Determine the directory of this script
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Path to the python directory within your package
PYTHON_DIR="$DIR/python"

# Create python directory if it doesn't exist
if [ ! -d "$PYTHON_DIR" ]; then
log "Creating Python directory..."
mkdir -p "$PYTHON_DIR"
fi

# Navigate to the python directory
cd "$PYTHON_DIR" || {
log "Error: Failed to change to directory $PYTHON_DIR"
exit 1
}

# Check if the virtual environment already exists
if [ ! -d "venv" ]; then
log "Creating Python virtual environment..."
python3 -m venv venv || {
log "Error: Failed to create virtual environment"
exit 1
}
fi

# Install Python dependencies
log "Installing Python dependencies..."
if [ -f "requirements.txt" ]; then
# Capture both stdout and stderr
if ! output=$(venv/bin/python -m pip install -r requirements.txt -q 2>&1); then
log "Error: Failed to install dependencies"
log "Pip output: $output"
exit 1
fi
else
log "Warning: requirements.txt not found in $PYTHON_DIR"
exit 1
fi

log "Python environment setup complete."
139 changes: 139 additions & 0 deletions src/Commands/InstallCommand.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
<?php

declare(strict_types=1);

namespace Innobrain\Markitdown\Commands;

use Illuminate\Console\Command;
use Illuminate\Support\Facades\Process;

class InstallCommand extends Command
{
public $signature = 'markitdown:install';

public $description = 'Install Markitdown Python dependencies and set up the virtual environment';

public function handle(): int
{
$this->components->info('Installing Markitdown...');

// Get the path to the user's composer.json
$composerPath = $this->getLaravel()->basePath('composer.json');

if (! file_exists($composerPath)) {
$this->components->error('composer.json not found in project root.');

return self::FAILURE;
}

// Read composer.json
$composerJson = file_get_contents($composerPath);

if ($composerJson === false) {
$this->components->error('Failed to read composer.json.');

return self::FAILURE;
}

/** @var mixed $composer */
$composer = json_decode($composerJson, true);

if (! is_array($composer)) {
$this->components->error('Invalid composer.json format.');

return self::FAILURE;
}

/** @var array<string, mixed> $composer */

// Initialize scripts array if it doesn't exist
if (! isset($composer['scripts'])) {
$composer['scripts'] = [];
}

/** @var array<string, array<string, mixed>> $composer */

// Add our script to the project's composer.json scripts
$scriptPath = './vendor/innobrain/markitdown/setup-python-env.sh';
$scriptAdded = false;

// Add to post-autoload-dump
if (! isset($composer['scripts']['post-autoload-dump'])) {
$composer['scripts']['post-autoload-dump'] = [];
}

/** @var array<string, array<int|string, string|array<string>>> $composer */

// Ensure post-autoload-dump is an array
if (! is_array($composer['scripts']['post-autoload-dump'])) {
$composer['scripts']['post-autoload-dump'] = [$composer['scripts']['post-autoload-dump']];
}

if (! in_array($scriptPath, $composer['scripts']['post-autoload-dump'], true)) {
$composer['scripts']['post-autoload-dump'][] = $scriptPath;
$scriptAdded = true;
}

if ($scriptAdded) {
// Write back to composer.json with proper formatting
$encodedJson = json_encode($composer, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);

if ($encodedJson === false) {
$this->components->error('Failed to encode composer.json.');

return self::FAILURE;
}

$formattedJson = str($encodedJson)
->append(PHP_EOL)
->replace(
search: " \"keywords\": [\n \"laravel\",\n \"framework\"\n ],",
replace: ' "keywords": ["laravel", "framework"],'
)
->toString();

if (in_array(file_put_contents($composerPath, $formattedJson), [0, false], true)) {
$this->components->error('Failed to write to composer.json.');

return self::FAILURE;
}

$this->components->info('Added Markitdown setup script to composer.json');
}

// Run the setup script
$scriptPath = realpath(__DIR__.'/../../setup-python-env.sh');

if ($scriptPath === false || ! file_exists($scriptPath)) {
$this->components->error('Setup script not found.');

return self::FAILURE;
}

$this->components->info('Setting up Python virtual environment...');

// Make the script executable
if (! chmod($scriptPath, 0755)) {
$this->components->error('Failed to make setup script executable.');

return self::FAILURE;
}

// Run the setup script
$pendingProcess = Process::path(dirname($scriptPath))
->tty(false)
->timeout(300);

$processResult = $pendingProcess->run($scriptPath);

if (! $processResult->successful()) {
$this->components->error('Failed to set up Python virtual environment: '.$processResult->errorOutput());

return self::FAILURE;
}

$this->components->info('Markitdown installed successfully!');

return self::SUCCESS;
}
}
Loading

0 comments on commit d5e2867

Please sign in to comment.