-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathappveyor.yml
456 lines (354 loc) · 21.3 KB
/
appveyor.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
version: 2.7.1.{build}
image: Visual Studio 2022
# NOTE 2024-03-13: Tesseract 3.x and 4.x seem to be only compatible with leptonica .so version <= 5.x (<= v1.82)
# https://github.com/DanBloomberg/leptonica/blob/32af1df77f4f6311e581f2e263fc51506a5b6340/README.html#L1230
# https://github.com/DanBloomberg/leptonica/commit/e8b670d9357c78a6a93fa23e27bb25675c695fa3
# https://github.com/tesseract-ocr/tesseract/commit/f36c0d019be59cae3b96da0d89d870dbe83e9714
#
# TODO 2024-03-03: During Tesseract CMake config we get "Leptonica was build without TIFF support! Disabling TIFF support..."
# even though loading TIFFs in tesserocr via Leptonica seems to work fine.
# maybe check changes to TIFF_TEST https://github.com/tesseract-ocr/tesseract/commit/54b9fe4de9f0aa3af15f52a6f27ffaf758b43769
environment:
PYTHON_VERSIONS: 36, 37, 38, 39, 310, 311, 312, 313
matrix:
# ------ Tesseract 3.x ----------------------------------------
- job_name: Build Dependencies - Tesseract 3.x
job_group: Build Dependencies
LEPTONICA_COMMIT_ID: f413826 # Leptonica 1.82.0
TESSERACT_COMMIT_ID: 7e5f0d6 # Tesseract 3.05.02
- job_name: Build Wheels - Tesseract 3.x
job_group: Build Wheels
job_depends_on: Build Dependencies - Tesseract 3.x
- job_name: Build Conda Packages - Tesseract 3.x
job_group: Build Conda Packages
job_depends_on: Build Wheels - Tesseract 3.x
# ------ Tesseract 4.x ----------------------------------------
- job_name: Build Dependencies - Tesseract 4.x
job_group: Build Dependencies
LEPTONICA_COMMIT_ID: f413826 # Leptonica 1.82.0
TESSERACT_COMMIT_ID: f38e7a7 # Tesseract 4.1.3
- job_name: Build Wheels - Tesseract 4.x
job_group: Build Wheels
job_depends_on: Build Dependencies - Tesseract 4.x
- job_name: Build Conda Packages - Tesseract 4.x
job_group: Build Conda Packages
job_depends_on: Build Wheels - Tesseract 4.x
# ------ Tesseract 5.x ----------------------------------------
- job_name: Build Dependencies - Tesseract 5.x
job_group: Build Dependencies
LEPTONICA_COMMIT_ID: 7e803e7 # Leptonica 1.84.1
TESSERACT_COMMIT_ID: b5f279e # Tesseract 5.4.1
- job_name: Build Wheels - Tesseract 5.x
job_group: Build Wheels
job_depends_on: Build Dependencies - Tesseract 5.x
- job_name: Build Conda Packages - Tesseract 5.x
job_group: Build Conda Packages
job_depends_on: Build Wheels - Tesseract 5.x
# don't build if only readme changed
skip_commits:
files:
- README.md
# the first failed job cancels other jobs and fails entire build
matrix:
fast_finish: true
for:
- matrix:
only:
- job_group: Build Dependencies
build_script:
- pwsh: |
# ------ Build Leptonica ----------------------------------------
$leptonica_install_64 = Join-Path -Path $PWD -ChildPath "leptonica_install_64"
$leptonica_install_32 = Join-Path -Path $PWD -ChildPath "leptonica_install_32"
# install and initialize sw (Software Network)
Invoke-WebRequest -Uri "https://software-network.org/client/sw-master-windows_x86_64-client.zip" -OutFile sw_client.zip
Expand-Archive -LiteralPath sw_client.zip
$env:Path = "$(Join-Path -Path $PWD -ChildPath "sw_client");" + $env:Path
sw setup
# clone Leptonica and checkout correct version
git clone https://github.com/DanBloomberg/leptonica.git
cd leptonica
git checkout $env:LEPTONICA_COMMIT_ID
$leptonica_build_64 = Join-Path -Path $PWD -ChildPath "build_64"
$leptonica_build_32 = Join-Path -Path $PWD -ChildPath "build_32"
cmake -G "Visual Studio 17 2022" -A x64 -S . -B "build_64" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX="$leptonica_install_64"
cmake -G "Visual Studio 17 2022" -A Win32 -S . -B "build_32" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX="$leptonica_install_32"
cmake --build build_64 --config Release --target install
cmake --build build_32 --config Release --target install
# image libraries used for building leptonica will be added as additional linker dependencies to consuming projects
# without their full file path. this results in a LNK1181 "cannot open input file" even though these libs are not
# needed anyomre during the tesseract build, therefore we remove them from the respective targets cmake file.
foreach ($leptonica_install_dir in @("$leptonica_install_64", "$leptonica_install_32")) {
$targetsCmakePath = "$leptonica_install_dir\lib\cmake\leptonica\LeptonicaTargets.cmake"
$fileContent = Get-Content $targetsCmakePath -Raw
$patchedContent = $fileContent -replace 'set_target_properties\(leptonica PROPERTIES[\s\S]*?\)\r?\n', ''
Set-Content -Path $targetsCmakePath -Value $patchedContent
}
cd ..
# ------ Build Tesseract ----------------------------------------
# prepare install directories
$tesseract_install_64 = Join-Path -Path $PWD -ChildPath "tesseract_install_64"
$tesseract_install_32 = Join-Path -Path $PWD -ChildPath "tesseract_install_32"
# New-Item $tesseract_install_64 -ItemType Directory
# New-Item $tesseract_install_32 -ItemType Directory
# clone and checkout Tesseract
git clone https://github.com/tesseract-ocr/tesseract.git
cd tesseract
git checkout $env:TESSERACT_COMMIT_ID
# store Tesseract version in env variable for setup.py
if (Test-Path ".\VERSION" -PathType Leaf) {
$env:TESSERACT_VERSION = Get-Content -Path .\VERSION -TotalCount 1
} elseif (Test-Path ".\api\baseapi.h" -PathType Leaf) {
# older version -> get version from ".\api\baseapi.h"
$content = Get-Content -Path ".\api\baseapi.h" -Raw
$match_found = $content -match 'TESSERACT_VERSION_STR "(.*)"'
$env:TESSERACT_VERSION = $Matches[1]
}
# apply patches if any (not needed anymore since switch to CMake)
$patch_directory = "..\res\patches\tesseract\$env:TESSERACT_COMMIT_ID"
if (Test-Path -Path $patch_directory) {
Get-ChildItem $patch_directory |
Foreach-Object {
"applying $_"
git apply $_.FullName
}
} else {
"no patches found for Tesseract commit $env:TESSERACT_COMMIT_ID"
}
$tesseract_build_64 = Join-Path -Path $PWD -ChildPath "build_64"
$tesseract_build_32 = Join-Path -Path $PWD -ChildPath "build_32"
# build Tesseract x64 and x86
cmake -G "Visual Studio 17 2022" -A x64 -S . -B "build_64" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="$tesseract_install_64" -DBUILD_TRAINING_TOOLS=OFF -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DOPENMP_BUILD=OFF -DLeptonica_DIR="$leptonica_install_64\lib\cmake\leptonica"
cmake -G "Visual Studio 17 2022" -A Win32 -S . -B "build_32" -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="$tesseract_install_32" -DBUILD_TRAINING_TOOLS=OFF -DSW_BUILD=OFF -DBUILD_SHARED_LIBS=ON -DOPENMP_BUILD=OFF -DLeptonica_DIR="$leptonica_install_32\lib\cmake\leptonica"
cmake --build build_64 --config Release --target install
cmake --build build_32 --config Release --target install
cd ..
# ------ Package Artifacts ----------------------------------------
$artifact_dir = "artifacts"
ForEach ($platform_x64 in @($true, $false)) {
$bitness = if ($platform_x64) { "64" } else { "32" }
# set paths based on platform
$tesseract_build = if ($platform_x64) { $tesseract_build_64 } else { $tesseract_build_32 }
$tesseract_install = if ($platform_x64) { $tesseract_install_64 } else { $tesseract_install_32 }
$leptonica_build = if ($platform_x64) { $leptonica_build_64 } else { $leptonica_build_32 }
$leptonica_install = if ($platform_x64) { $leptonica_install_64 } else { $leptonica_install_32 }
$dest_bitness_dir = "$artifact_dir\${bitness}bit"
# nested hashtable for paths
$paths = @{
"tesseract" = @{
"lib" = "$tesseract_build\Release";
"bin" = "$tesseract_build\bin\Release";
"include" = "$tesseract_install\include"
};
"leptonica" = @{
"lib" = "$leptonica_build\src\Release";
"bin" = "$leptonica_build\bin\Release";
"include" = "$leptonica_install\include"
}
}
foreach ($component in $paths.Keys) {
foreach ($type in $paths[$component].Keys) {
$target_dir = "$dest_bitness_dir\$type"
$source_path = $paths[$component][$type]
New-Item -Path $target_dir -ItemType Directory -Force
Copy-Item -Path "$source_path\*" -Destination $target_dir -Recurse -Force
}
}
}
# zip the organized binaries and header files
$zip_path = "tesseract_leptonica_binaries_" + $env:APPVEYOR_BUILD_VERSION + ".zip"
Compress-Archive -Path "$artifact_dir\*" -DestinationPath $zip_path
artifacts:
- path: tesseract_leptonica_binaries_*.zip
name: archive
- matrix:
only:
- job_group: Build Wheels
build_script:
- pwsh: |
# ------ Download Dependencies ----------------------------------------
# call AppVeyor api to get the id of the 'Build Dependencies' job corresponding to this job's Tesseract version
$request = Invoke-WebRequest -Uri "https://ci.appveyor.com/api/projects/simonflueckiger/tesserocr-windows-build/build/$env:APPVEYOR_BUILD_VERSION" -Method:Get -ContentType "application/xml"
$job_suffix = ($env:APPVEYOR_JOB_NAME -split " ")[-1]
$job = ([xml]$request.Content).ProjectBuildResults.Build.Jobs.BuildJobModel | Where-Object Name -match "Build Dependencies.*$job_suffix"
# piece together the dependencies zip artifact url from 'Build Dependencies' job
$url = "https://ci.appveyor.com/api/buildjobs/" + $job.JobId + "/artifacts/tesseract_leptonica_binaries_" + $env:APPVEYOR_BUILD_VERSION + ".zip"
# download and unpack the archive
Invoke-WebRequest $url -OutFile dependencies.zip
Expand-Archive -Path dependencies.zip
$dep_bin_64 = Join-Path -Path $PWD -ChildPath "dependencies\64bit\bin"
$dep_bin_32 = Join-Path -Path $PWD -ChildPath "dependencies\32bit\bin"
$dep_lib_64 = Join-Path -Path $PWD -ChildPath "dependencies\64bit\lib"
$dep_lib_32 = Join-Path -Path $PWD -ChildPath "dependencies\32bit\lib"
$dep_inc_64 = Join-Path -Path $PWD -ChildPath "dependencies\64bit\include"
$dep_inc_32 = Join-Path -Path $PWD -ChildPath "dependencies\32bit\include"
$env:TESSERACT_VERSION = & "$dep_bin_64\tesseract.exe" --version | Select-String -Pattern "tesseract (\d+\.\d+\.\d+)" | ForEach-Object { $_.Matches.Groups[1].Value } | Select-Object -First 1
# ------ Prepare tesserocr ----------------------------------------
# pull tesserocr submodule
git submodule update --init --recursive
# copy files into newly pulled submodule
Copy-Item -Path "res\image_dataset" -Destination "tesserocr\tests\image_dataset" -Recurse
# apply tesserocr patches
cd tesserocr
$patch_directory = "..\res\patches\tesserocr"
$patch_files = @(
# addresses this issue for tesseract 5.x https://github.com/sirfz/tesserocr/issues/295
# also tests whether leptonica can correctly use all image libraries
"test_api.patch",
# adds functionality to copy dlls to the build directory
"setup.patch"
)
Foreach ($patch_file in $patch_files)
{
$full_path = Join-Path -Path $patch_directory -ChildPath $patch_file
"applying ""$full_path"""
git apply "$full_path"
}
cd ..
# ------ Build tesserocr ----------------------------------------
cd tesserocr
# dumpbin.exe required for finding dependencies in find_libraries_and_dependencies.py
$env:Path = "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin;" + $env:Path
# for each {64bit and 32bit} x {Python Versions}
ForEach ($platform_x64 in @($true, $false)) {
ForEach ($py_ver in $env:PYTHON_VERSIONS -split ", ") {
$py_ver_dot = $py_ver[0] + "." + $py_ver.Substring(1)
# set platform/python-version specific variables
# BUILD_PLATFORM and TESSERACT_VERSION are used in setup.py
if ($platform_x64) {
$env:BUILD_PLATFORM = "x64"
$python_path = "C:\Python$py_ver-x64"
$conda_path = "C:\Miniconda3-x64"
$dep_bin = "$dep_bin_64"
$env:INCLUDE = "$dep_inc_64"
$env:LIBPATH = "$dep_lib_64"
} else {
$env:BUILD_PLATFORM = "x86"
$python_path = "C:\Python$py_ver"
$conda_path = "C:\Miniconda3"
$dep_bin = "$dep_bin_32"
$env:INCLUDE = "$dep_inc_32"
$env:LIBPATH = "$dep_lib_32"
}
# collect dll path of tesseract and all other dlls it points to inside $dep_bin
$env:DLLPATHS = python ..\res\find_libraries_and_dependencies.py --libraries tesseract --search-paths "$dep_bin"
# tesseract.exe required for determining tesseract version in setup.py
$env:Path = "$dep_bin;" + $env:Path
# use miniconda if python version not found on agent
if (Test-Path $python_path) {
$env:Path = "$python_path;" + $env:Path
} else {
(& "$conda_path\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression
conda create -y --name py$py_ver python=$py_ver_dot
conda activate py$py_ver
}
"Building for Python $py_ver-$env:BUILD_PLATFORM"
# upgrade pip
python -m pip install --upgrade pip
# install requirements
python -m pip install --no-warn-script-location --no-input --only-binary :all: -r requirements-dev.txt
# build wheel
python setup.py bdist_wheel
# deactivate conda (not sure if really necessary)
conda deactivate
}
}
test_script:
- pwsh: |
# test suite needs TESSDATA_PREFIX to point to a valid tessdata location
$env:TESSDATA_PREFIX = "$PWD\res\tessdata"
# for each {64bit and 32bit} x {Python Versions}
ForEach ($platform_x64 in @($true, $false)) {
ForEach ($py_ver in $env:PYTHON_VERSIONS -split ", ") {
$py_ver_dot = $py_ver[0] + "." + $py_ver.Substring(1)
# set platform/python-version specific variables
if ($platform_x64) {
$python_path = "C:\Python$py_ver-x64"
$conda_path = "C:\Miniconda3-x64"
$wheel_path = Get-ChildItem -Path tesserocr\dist\*cp$py_ver*amd64*.whl
} else {
$python_path = "C:\Python$py_ver"
$conda_path = "C:\Miniconda3"
$wheel_path = Get-ChildItem -Path tesserocr\dist\*cp$py_ver*win32*.whl
}
# use miniconda if python version not found on agent
if (Test-Path $python_path) {
$env:Path = "$python_path;" + $env:Path
} else {
(& "$conda_path\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression
conda activate py$py_ver
}
# install previously built wheels
python -m pip install $wheel_path
# append python version and tesseract version string to file
python -c "import platform; print(f'------ Python {platform.python_version()} ({platform.architecture()[0]}) ------')" >> versions.txt
python -c "import tesserocr; print(tesserocr.tesseract_version()); print()" >> versions.txt
# https://stackoverflow.com/a/73984985/1786159
cd tesserocr\tests
# execute test suite (verbose output)
python -m unittest -v test_api.py
# throw immediately if not all test passed
if ($LastExitCode -ne 0) { exit $LastExitCode }
# uninstall pillow and execute test suite again for tests which require pillow to not be present
python -m pip uninstall --yes Pillow
# execute test suite again (verbose output)
python -m unittest -v test_api.py
# throw immediately if not all test passed
if ($LastExitCode -ne 0) { exit $LastExitCode }
# deactivate conda (not sure if really necessary)
conda deactivate
cd ..\..
}
}
# create archive for convenience
$zip_name = "wheels_" + $env:APPVEYOR_BUILD_VERSION + ".zip"
Compress-Archive -Path tesserocr\dist\*.whl, versions.txt -DestinationPath tesserocr\dist\$zip_name
artifacts:
- path: tesserocr\dist\*.whl
name: wheel
- path: tesserocr\dist\wheels_*.zip
name: archive
- path: versions.txt
name: versions
- matrix:
only:
- job_group: Build Conda Packages
build_script:
- pwsh: |
# call AppVeyor api to get the id of the 'Build Wheels' job corresponding to this job's Tesseract version
$request = Invoke-WebRequest -Uri "https://ci.appveyor.com/api/projects/simonflueckiger/tesserocr-windows-build/build/$env:APPVEYOR_BUILD_VERSION" -Method:Get -ContentType "application/xml"
$job_suffix = ($env:APPVEYOR_JOB_NAME -split " ")[-1]
$job = ([xml]$request.Content).ProjectBuildResults.Build.Jobs.BuildJobModel | Where-Object Name -match "Build Wheels.*$job_suffix"
# piece together the wheels zip artifact url from 'Build Wheels' job
$url = "https://ci.appveyor.com/api/buildjobs/" + $job.JobId + "/artifacts/tesserocr%2Fdist%2Fwheels_" + $env:APPVEYOR_BUILD_VERSION + ".zip"
# download and unpack the archive
Invoke-WebRequest $url -OutFile wheels.zip
Expand-Archive -Path wheels.zip
# create dictionary from downloaded wheel files $packages = @{wheel; py_ver; py_ver_str; platform_x64}
$wheels = Get-ChildItem wheels -Filter *.whl
$packages = @()
ForEach($wheel in $wheels) {
$match_found = $wheel.Name -match 'tesserocr-\d+\.\d+\.\d+-cp(\d+).*(?:win(32)|amd(64)).*\.whl'
$packages += @{ wheel = $wheel; py_ver = $Matches[1].Insert(1,'.'); py_ver_str = $Matches[1]; platform_x64 = if ($Matches[3] -eq "64") { $true } else { $false } }
}
# get Tesseract version from file
$env:TESSERACT_VERSION = Get-Content -Path wheels\versions.txt | Select-String -Pattern "tesseract (\d+\.\d+\.\d+)" | ForEach-Object { $_.Matches.Groups[1].Value } | Select-Object -First 1
# create package output directories
$conda_package_folder_64 = "conda_packages\win-64"
$conda_package_folder_32 = "conda_packages\win-32"
New-Item $conda_package_folder_64 -ItemType Directory
New-Item $conda_package_folder_32 -ItemType Directory
# convert wheels to conda packages
ForEach ($package in $packages) {
$package_name = $package.wheel.Name
$conda_package_folder = if ($package.platform_x64) { $conda_package_folder_64 } else { $conda_package_folder_32 }
python res\wheel_to_conda_converter.py "$PWD\wheels\$package_name" $env:TESSERACT_VERSION $conda_package_folder
}
# create archive for convenience
# call "python upload_to_anaconda.py <path\to\conda_packages>" to upload all packages to anaconda
Compress-Archive -Path conda_packages/* -DestinationPath conda_packages.zip
artifacts:
- path: conda_packages\win-*\*.tar.bz2
name: conda_package
- path: conda_packages.zip
name: archive