diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..5c71a20 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,14 @@ +[run] +source = src/ +omit = + src/__init__.py + tests/* + +[report] +exclude_lines = + pragma: no cover + def __repr__ + raise NotImplementedError + if __name__ == .__main__.: + pass + raise ImportError \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 81d9aa8..ad1334f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,8 +1,6 @@ name: Build PDF Combine Utility on: - push: - branches: [ main ] workflow_dispatch: jobs: @@ -15,7 +13,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.9' + python-version: '3.11' architecture: 'x64' - name: Install dependencies @@ -24,11 +22,6 @@ jobs: pip install pyinstaller pip install -r requirements.txt - - name: Create Directories - run: | - mkdir .img - mkdir .font - - name: Build with PyInstaller run: | pyinstaller main.spec @@ -37,4 +30,4 @@ jobs: uses: actions/upload-artifact@v3 with: name: pdf-combine-utility - path: dist/main.exe \ No newline at end of file + path: dist/PDF Combine.exe \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..ee4ccc9 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,111 @@ +name: PDF Combine CI/CD + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +jobs: + test: + runs-on: windows-latest + strategy: + matrix: + python-version: ['3.11'] + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest pytest-cov pytest-benchmark pytest-mock + + - name: Run core tests + run: | + pytest tests/ -v -m "not benchmark and not win32" --cov=src/ --cov-report=xml --cov-report=html + + - name: Run Windows-specific tests + if: runner.os == 'Windows' + run: | + pytest tests/ -v -m "win32" --cov=src/ --cov-report=xml --cov-report=html --cov-append + + - name: Run benchmark tests + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: | + pytest tests/ -v -m "benchmark" --benchmark-only + + - name: Upload coverage reports + uses: codecov/codecov-action@v3 + with: + files: ./coverage.xml + flags: unittests + name: codecov-pdf-combine + fail_ci_if_error: false + + - name: Upload benchmark results + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v3 + with: + name: benchmark-results + path: .benchmarks/ + + - name: Upload coverage HTML report + uses: actions/upload-artifact@v3 + with: + name: coverage-report + path: htmlcov/ + + build: + needs: test + runs-on: windows-latest + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pyinstaller + + - name: Build executable + run: python build.py + + - name: Create release assets + run: | + mkdir release + copy "dist/PDF Combine.exe" release/ + copy LICENSE release/ + copy README.md release/ + + - name: Upload build artifact + uses: actions/upload-artifact@v3 + with: + name: pdf-combine-windows + path: release/ + + - name: Create Release + if: startsWith(github.ref, 'refs/tags/') + uses: softprops/action-gh-release@v1 + with: + files: release/* + body_path: CHANGELOG.md + draft: false + prerelease: false + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index e4650cb..c39c9fc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,12 @@ - *.html *.pyc *.exe *.toc -*.pyc *.xml build/ -pdf_comb_az/ +dist/ __pycache__/ +.idea +.coverage +htmlcov/ +coverage.xml \ No newline at end of file diff --git a/.img/start.jpg b/.img/start.jpg deleted file mode 100644 index 34b270f..0000000 Binary files a/.img/start.jpg and /dev/null differ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3226454 --- /dev/null +++ b/LICENSE @@ -0,0 +1,51 @@ +GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2024 PDF Combine Utility Contributors + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + + Additional Permissions + +This program interacts with Microsoft Word through COM automation for RTF to PDF +conversion. This interaction and any resulting output files are not covered by +the terms of this license. Users must ensure they have appropriate licenses for +Microsoft Office products if using this functionality. + +The logos and brand assets in the assets/images directory are not covered by +this license and may be subject to separate copyright and licensing terms. + +For any questions about licensing or permissions, please contact the project +maintainers. + + Disclaimer of Warranty + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR +IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU +ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + Limitation of Liability + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY +COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS +PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, +INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE +THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED +INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE +PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY +HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. \ No newline at end of file diff --git a/README.md b/README.md index 8e2588d..6b47242 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,201 @@ -# PDF Combine Utility -# -# Introduction -The Utility developed for convert TLF's files into *.pdf and combine then according to meta-data file. -![Test Image 3](.img/start.jpg) - -# Getting Started - -1. Download and run pdf_combine.exe -2. Select target folder with all TLS's files stored. -3. Select metadata file or create one using the template. - -|TLF|Title3|Title4|Title5|ProgName|Seq|OutputName|Order| -|--|--|--|--|--|--|--|--| -| L |Listing 1| Listing of Study Disposition |All Enrolled Participants| l_101_disp |1 |L_1 |1 | -| L | Listing 2 | Listing of Demographic Characteristics |All Enrolled Participants |l_101_demo |1 |L_2 |1 | -| F | Figure 7.1.2|Plot of All Laboratory Values Over Time: Hematology |All Treated Participants |g_101_lb1 |1 |F_7_1_2 |3 | -|F| Figure 7.2.2| Plot of All Laboratory Values Over Time: Chemistry| All Treated Participants| g_101_lb1|2|F_7_2_2|4| -4. Review and fix toc.txt files (if any issues). - -# Tips and Known Issues: -- You cannot have any 0x96 (- dash) characters in the metadata file. If you are using dashes in your Titles or in your file names, replace them in the metadata file before loading it into the utility. Speak with your lead about how to replace these best -- some might be better as colons, some as commas, and some simply can be removed. -Ex: -Intent-to-Treat Population -> Intent to Treat Population -Laboratory - Hematology -> Laboratory: Hematology -(TEAE-SI) -> (TEAE SI) - -# Contribute -TODO: -1. Add to usage OpenOffice as convert tool. -2. Restructure codebase -3. Add more tests -4. Add more logging -5. Add more examples +# PDF Combine Utility + +A nice, simple and fast GUI application for combining RTF/PDF files with automated Table of Contents and bookmarks. + +## Features + +- Convert RTF files to PDF format +- Combine multiple PDF files with or without Table of Contents +- Generate automated bookmarks based on metadata +- Support for custom fonts in Table of Contents +- Password protection for output PDFs +- Support for large document sets +- Customizable bookmark formatting and separators (more to come!) +- Final run mode for production use with strict files match checking + +## Installation + +### Prerequisites + +- Windows operating system +- Python 3.11 or higher +- Required Python packages (see `requirements.txt`) + +### Installing Dependencies + +```bash +pip install -r requirements.txt +``` + +### Building the Executable + +To obtain the ".exe" file, you can download the latest release from the GitHub repository +or build it yourself using the provided build script. + +To build yourself a standalone executable use a `build.py` script: + +1. Ensure all required assets are present: + - `assets/images/atorus_logo.png` + - `assets/images/pdf_utility_logo.png` + - `assets/images/python_logo.png` + - `assets/images/pdf.ico` + +2. Run the build script: + ```bash + python build.py + ``` + +The script will: +- Verify all required assets exist +- Clean previous build directories +- Create a new executable using PyInstaller +- Package all necessary resources into the executable + +Feel free to customize the build script to include additional assets or modify the build process. + +## Usage + +![Test Image 3](assets/images/start.jpg) +### Basic Operation +1. Launch the application +2. Select the working directory containing your RTF/PDF files +3. Choose one of two operating modes: + - Simple combination without TOC + - Full processing with TOC and bookmarks + +### Operating Modes + +#### Simple Combination +- Select source folder containing PDFs/RTFs +- Choose output filename +- Optionally set password protection +- Click "GO!" to combine files + +#### Full Processing with TOC +1. Select source folder containing RTFs/PDFs +2. Provide metadata CSV file (use existing or create/save new using the template button) +3. Configure TOC options: + - Select font for TOC + - Set title separator + - Choose bookmark options +4. Set output filename +5. Click "GO!" to process files + +### Metadata File Format + +The metadata CSV file should contain the following columns: +- `TLF`: Type of document (L/F) +- `Title3`: Main title +- `Title4`: Subtitle +- `Title5`: Population/group +- `ProgName`: Program name +- `Seq`: Sequence number +- `OutputName`: Output filename +- `Order`: Sort order + +Example: +```csv +TLF,Title3,Title4,Title5,ProgName,Seq,OutputName,Order +F,Figure 14.1-5.3,Spaghetti Plot,Safety Population,g_120_dose,1,F_14.1-5.3,1 +``` + +### Font Support + +Supported fonts for TOC: +- DejaVuSansMono +- PT_Mono +- Monospace +- DroidSansMono +- FiraMono +- JetBrainsMono +- LiberationMono +- NotoMono +- CamingoCode +- Lekton +- EversonMono +- Monoid +- VictorMono + +### Advanced Options + +- **Final Run Mode**: Enables strict error checking and validation: if output is present in metadata but not found in the folder, the process will stop. +- **Password Protection**: Add password security to output PDF. +- **Custom Title Separator**: Define separator character for bookmarks. +- **Population Inclusion**: Toggle population info in bookmarks and ToC. + +## Technical Details + +### Key Components + +- `gui.py`: Main GUI interface and user interaction handling +- `pdf_compiler.py`: Core PDF processing and combination logic +- `pdf_util.py`: Utility functions for PDF manipulation +- `build.py`: Executable creation and resource management + +### Processing Flow + +1. **File Discovery** + - Scan working directory for RTF/PDF files + - Parse metadata file if provided + +2. **RTF Conversion** + - Convert RTF files to PDF using Word COM automation + - Store converted files in `_PDF` subdirectory + +3. **Bookmark Generation** + - Parse metadata for bookmark information + - Add bookmarks to individual PDFs + +4. **TOC Generation** + - Create TOC based on bookmarks + - Format TOC using selected font + - Add page numbers and linking + +5. **Final Combination** + - Combine all PDFs in correct order + - Add TOC to beginning of document + - Apply password protection if selected + +### Error Handling + +- Logging system +- Progress tracking with GUI feedback +- Automatic cleanup of temporary files +- Process termination protection +- Word process management + +## Known Issues + +- Unicode character handling in certain fonts +- Memory usage with very large documents +- Word process conflicts during RTF conversion + +## Future Improvements + +1. Add OpenOffice support for RTF conversion +2. Improve codebase structure +3. Add more unit tests +4. Enhance logging system +5. Add more usage examples + +## Support + +For issues and feature requests, please check the existing issues or create a new one in the project repository. + +## License +PDF Combine Utility is licensed under the GNU General Public License v3.0 (GPL-3.0). This means you can: + +Use the software for any purpose +Change the software to suit your needs +Share the software with anyone +Share the changes you make + +Key points: + +Any distributed modifications must be under the same license +Source code must be made available +Changes must be documented +Original copyright and license notices must be preserved + +The RTF to PDF conversion feature requires Microsoft Word. Users must ensure they have appropriate Microsoft Office licensing for this functionality. +For more details, see the LICENSE file in the repository. \ No newline at end of file diff --git a/.font/CamingoCode.ttf b/assets/fonts/CamingoCode.ttf similarity index 100% rename from .font/CamingoCode.ttf rename to assets/fonts/CamingoCode.ttf diff --git a/.font/DejaVuSansMono.ttf b/assets/fonts/DejaVuSansMono.ttf similarity index 100% rename from .font/DejaVuSansMono.ttf rename to assets/fonts/DejaVuSansMono.ttf diff --git a/.font/DroidSansMono.ttf b/assets/fonts/DroidSansMono.ttf similarity index 100% rename from .font/DroidSansMono.ttf rename to assets/fonts/DroidSansMono.ttf diff --git a/.font/EversonMono.ttf b/assets/fonts/EversonMono.ttf similarity index 100% rename from .font/EversonMono.ttf rename to assets/fonts/EversonMono.ttf diff --git a/.font/FiraMono.ttf b/assets/fonts/FiraMono.ttf similarity index 100% rename from .font/FiraMono.ttf rename to assets/fonts/FiraMono.ttf diff --git a/.font/IBMPlexMono.ttf b/assets/fonts/IBMPlexMono.ttf similarity index 100% rename from .font/IBMPlexMono.ttf rename to assets/fonts/IBMPlexMono.ttf diff --git a/.font/JetBrainsMono.ttf b/assets/fonts/JetBrainsMono.ttf similarity index 100% rename from .font/JetBrainsMono.ttf rename to assets/fonts/JetBrainsMono.ttf diff --git a/.font/Lekton.ttf b/assets/fonts/Lekton.ttf similarity index 100% rename from .font/Lekton.ttf rename to assets/fonts/Lekton.ttf diff --git a/.font/LiberationMono.ttf b/assets/fonts/LiberationMono.ttf similarity index 100% rename from .font/LiberationMono.ttf rename to assets/fonts/LiberationMono.ttf diff --git a/.font/Monoid.ttf b/assets/fonts/Monoid.ttf similarity index 100% rename from .font/Monoid.ttf rename to assets/fonts/Monoid.ttf diff --git a/.font/Monospace.ttf b/assets/fonts/Monospace.ttf similarity index 100% rename from .font/Monospace.ttf rename to assets/fonts/Monospace.ttf diff --git a/.font/NotoMono.ttf b/assets/fonts/NotoMono.ttf similarity index 100% rename from .font/NotoMono.ttf rename to assets/fonts/NotoMono.ttf diff --git a/.font/PT_Mono.ttf b/assets/fonts/PT_Mono.ttf similarity index 100% rename from .font/PT_Mono.ttf rename to assets/fonts/PT_Mono.ttf diff --git a/.font/VictorMono.ttf b/assets/fonts/VictorMono.ttf similarity index 100% rename from .font/VictorMono.ttf rename to assets/fonts/VictorMono.ttf diff --git a/.img/atorus_logo.png b/assets/images/atorus_logo.png similarity index 100% rename from .img/atorus_logo.png rename to assets/images/atorus_logo.png diff --git a/.img/pdf.ico b/assets/images/pdf.ico similarity index 100% rename from .img/pdf.ico rename to assets/images/pdf.ico diff --git a/.img/pdf_1.ico b/assets/images/pdf_1.ico similarity index 100% rename from .img/pdf_1.ico rename to assets/images/pdf_1.ico diff --git a/.img/pdf_utility_logo.PNG b/assets/images/pdf_utility_logo.PNG similarity index 100% rename from .img/pdf_utility_logo.PNG rename to assets/images/pdf_utility_logo.PNG diff --git a/.img/python_logo.png b/assets/images/python_logo.png similarity index 100% rename from .img/python_logo.png rename to assets/images/python_logo.png diff --git a/assets/images/start.jpg b/assets/images/start.jpg new file mode 100644 index 0000000..5c66bbf Binary files /dev/null and b/assets/images/start.jpg differ diff --git a/build.py b/build.py new file mode 100644 index 0000000..8625e7c --- /dev/null +++ b/build.py @@ -0,0 +1,61 @@ +import os +import shutil +import PyInstaller.__main__ + + +def clean_build(): + """Clean build directories""" + dirs_to_clean = ['build', 'dist'] + for dir_name in dirs_to_clean: + if os.path.exists(dir_name): + shutil.rmtree(dir_name) + print("Cleaned build directories") + + +def build_exe(): + """Build executable using PyInstaller""" + PyInstaller.__main__.run([ + 'main.spec', + '--clean', + '--noconfirm' + ]) + + +def verify_assets(): + """Verify all required assets exist""" + required_assets = [ + os.path.join('assets', 'images', 'atorus_logo.png'), + os.path.join('assets', 'images', 'pdf_utility_logo.png'), + os.path.join('assets', 'images', 'python_logo.png'), + os.path.join('assets', 'images', 'pdf.ico') + ] + + missing_assets = [] + for asset in required_assets: + if not os.path.exists(asset): + missing_assets.append(asset) + + if missing_assets: + print("Missing required assets:") + for asset in missing_assets: + print(f" - {asset}") + raise FileNotFoundError("Missing required assets") + + print("All required assets found") + + +def main(): + """Main build process""" + try: + print("Starting build process...") + verify_assets() + clean_build() + build_exe() + print("Build completed successfully!") + except Exception as e: + print(f"Build failed: {str(e)}") + exit(1) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/main.spec b/main.spec index 4679af5..48faa69 100644 --- a/main.spec +++ b/main.spec @@ -1,13 +1,11 @@ # -*- mode: python ; coding: utf-8 -*- - block_cipher = None - -a = Analysis(['main.py'], +a = Analysis(['src/main.py'], pathex=[], binaries=[], - datas=[('.font', '.font'), ('.img', '.img')], + datas=[('assets/fonts', 'assets/fonts'),('assets/images', 'assets/images'),('examples/metadata_example.csv', 'examples')], hiddenimports=['pkg_resources.py2_warn'], hookspath=[], hooksconfig={}, @@ -18,23 +16,23 @@ a = Analysis(['main.py'], cipher=block_cipher, noarchive=False) pyz = PYZ(a.pure, a.zipped_data, - cipher=block_cipher) + cipher=block_cipher) exe = EXE(pyz, a.scripts, a.binaries, a.zipfiles, - a.datas, + a.datas, [], - name='main', + name='PDF Combine', debug=False, bootloader_ignore_signals=False, strip=False, upx=True, upx_exclude=[], runtime_tmpdir=None, - console=True, + console=False, disable_windowed_traceback=False, target_arch=None, codesign_identity=None, - entitlements_file=None , icon='pdf.ico') + entitlements_file=None, icon='assets/images/pdf.ico') \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..7dda72c --- /dev/null +++ b/pytest.ini @@ -0,0 +1,10 @@ +[pytest] +markers = + integration: marks tests as integration tests + win32: marks tests that require Windows COM automation + benchmark: marks performance benchmark tests +testpaths = tests +python_files = test_*.py +python_classes = Test* +python_functions = test_* +addopts = -v --strict-markers diff --git a/requirements.txt b/requirements.txt index 5bf200e..03daa47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,31 @@ -certifi==2020.12.5 -pywin32==300 -wincertstore==0.2 -pandas==1.2.0 -future==0.18.2 -numpy==1.19.1 -fpdf2==2.3.1 -PyMuPDF==1.19.2 +certifi==2023.7.22 +cffi==1.17.1 +charset-normalizer==3.4.0 +cryptography==43.0.3 +defusedxml==0.7.1 +et_xmlfile==2.0.0 +fonttools==4.54.1 +fpdf2==2.8.1 +future==0.18.3 +jdcal==1.4.1 +numpy==2.1.3 openpyxl==3.0.6 -pdfplumber==0.5.27 -psutil==5.8.0 +pandas==2.2.3 +pdfminer.six==20231228 +pdfplumber==0.11.4 +pillow==11.0.0 +psutil==6.1.0 +pycparser==2.22 +PyMuPDF==1.24.13 +pypdfium2==4.30.0 +python-dateutil==2.9.0.post0 +pytz==2024.2 +pywin32==303 +six==1.16.0 +tzdata==2024.2 +wincertstore==0.2 +pytest +pytest-mock +pytest-benchmark==4.0.0 +pytest-cov==4.1.0 +pytest-mock==3.12.0 \ No newline at end of file diff --git a/run_tests.py b/run_tests.py new file mode 100644 index 0000000..419d29c --- /dev/null +++ b/run_tests.py @@ -0,0 +1,21 @@ +import subprocess +import sys + + +def run_tests(): + """Run test suite with coverage reporting""" + cmd = [ + "pytest", + "--cov=src/", + "--cov-report=term-missing", + "--cov-report=html", + "--benchmark-only" if "--benchmark" in sys.argv else "", + "-v" + ] + + result = subprocess.run([arg for arg in cmd if arg]) + return result.returncode + + +if __name__ == "__main__": + sys.exit(run_tests()) \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gui.py b/src/gui.py similarity index 54% rename from gui.py rename to src/gui.py index f5fc14a..84e0040 100644 --- a/gui.py +++ b/src/gui.py @@ -1,5 +1,7 @@ import logging import os +import shutil +import sys from tkinter import * import tkinter as tk from tkinter import scrolledtext @@ -7,20 +9,17 @@ from tkinter.ttk import Combobox from tkinter import messagebox -from pdf_util import ProgressHandler +from src.pdf_util import ProgressHandler def resource_path(relative_path): - """ - Get absolute path to resource, works for dev and for PyInstaller. - pyinstaller unpacks your data into a temporary folder, and stores this - directory path in the _MEIPASS2 environment variable. - mode, I use this:""" + """Get absolute path to resource, works for dev and PyInstaller""" try: # PyInstaller creates a temp folder and stores path in _MEIPASS base_path = sys._MEIPASS except Exception: - base_path = os.path.abspath(".") + # Running in development mode + base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) return os.path.normpath(os.path.join(base_path, relative_path)) @@ -38,18 +37,18 @@ def __init__(self): self.OUTPUT_FILENAME = 'combined' self.TLF_METADATA_NAME = 'tlfmetadata' # Insert Atorus logo. - img1 = PhotoImage(file=resource_path(os.path.join('.img', 'atorus_logo.png'))) + img1 = PhotoImage(file=resource_path(os.path.join('assets', 'images', 'atorus_logo.png'))) banner1 = Label(self.root, image=img1) banner1.image = img1 banner1.place(x=40, y=4) # Insert PDF Utility logo. - img2 = PhotoImage(file=resource_path(os.path.join('.img', 'pdf_utility_logo.png'))).subsample(2, 2) + img2 = PhotoImage(file=resource_path(os.path.join('assets', 'images', 'pdf_utility_logo.png'))).subsample(2, 2) banner2 = Label(self.root, image=img2) banner2.image = img2 banner2.place(x=190, y=12) # Insert Python logo. - img3 = PhotoImage(file=resource_path(os.path.join('.img', 'python_logo.png'))).subsample(2, 2) + img3 = PhotoImage(file=resource_path(os.path.join('assets', 'images', 'python_logo.png'))).subsample(2, 2) banner3 = Label(self.root, image=img3) banner3.image = img3 banner3.place(x=405, y=12) @@ -60,14 +59,14 @@ def __init__(self): self.lbl2.place(x=40, y=50) self.lbl3 = Label(self.root, text='TLF Metadata:', font='Ubuntu, 10') - self.lbl3.place(x=40, y=85) + self.lbl3.place(x=40, y=150) self.lbl4 = Label(self.root, text='Output As:', font='Ubuntu, 10') self.lbl4.place(x=40, y=115) # Log Text field. self.txt1 = scrolledtext.ScrolledText(self.root, wrap=WORD, font='Ubuntu, 8', fg='black', state='disabled') - self.txt1.place(x=40, y=250, width=625, height=250) + self.txt1.place(x=40, y=200, width=525, height=300) # Progress Bar widget # Create ProgressBar after button 'GO!' is pressed. @@ -117,73 +116,120 @@ def __init__(self): # Password set and store self.entry_var5 = StringVar() - def set_pass(): - if self.pas_check_var: - self.lbl6 = Label(self.root, text='Password:', font='Ubuntu, 10') - self.lbl6.place(x=370, y=210) - # For password entry and display. - - self.entr5 = Entry(self.root, textvariable=self.entry_var5, show='*') - self.entr5.config(state='normal') - self.entr5.place(x=450, y=210, width=210, height=25) - else: - pass + + # Create password label and entry first (but don't place them) + self.lbl6 = Label(self.root, text='Password:', font='Ubuntu, 10') + self.entr5 = Entry(self.root, textvariable=self.entry_var5, show='*') + self.entr5.config(state='normal') self.pas_check_var = BooleanVar() self.pas_check_var.set(0) self.pass_check = Checkbutton(self.root, text='Set password', variable=self.pas_check_var, onvalue=1, - offvalue=0, font=('Ubuntu, 9'), justify='left', command=set_pass) - self.pass_check.place(x=370, y=180) + offvalue=0, font=('Ubuntu, 9'), justify='left', command=self.set_pass) + self.pass_check.place(x=580, y=250) self.final_run_var = BooleanVar() self.final_run_var.set(0) self.final_run = Checkbutton(self.root, text='Final run', variable=self.final_run_var, onvalue=1, offvalue=0, font=('Ubuntu, 9'), justify='left') - self.final_run.place(x=370, y=145) - - # Combobox for select font - self.lbl6 = Label(self.root, text='Use font for TOC:', font='Ubuntu, 10') - self.lbl6.place(x=40, y=145) + self.final_run.place(x=580, y=220) self.box_value = StringVar() self.locationBox = Combobox(self.root, textvariable=self.box_value, state = 'readonly', width=25) - self.locationBox.place(x=150, y=145) + self.locationBox.place(x=150, y=185) self.locationBox.bind("<<ComboboxSelected>>", self.get_selected_value(self)) self.locationBox['values'] = ('DejaVuSansMono (152U)', 'PT_Mono (152U)', 'Monospace (152U)', 'DroidSansMono (152P)', 'FiraMono (152P)', 'JetBrainsMono (152P)', 'LiberationMono (152P)', 'NotoMono (152P)', 'CamingoCode (166P)', 'Lekton (182P)', 'EversonMono (159P)', 'Monoid (160P)', 'VictorMono (167P)') self.locationBox.current(0) + # Combobox for select font + self.lbl7 = Label(self.root, text='Use font for TOC:', font='Ubuntu, 10') + self.lbl7.place(x=40, y=145) + # Frame for bottons for additional options for bookmarks. self.frm1 = LabelFrame(self.root, relief='solid', bd='0.5') - self.frm1.place(x=40, y=180, width=310, height=60) + self.frm1.place(x=40, y=220, width=310, height=60) self.lbl_additional_opts = Label(self.root, text='Additional Bookmark Options:', font=('Ubuntu, 8 italic'), anchor='w', justify='left') - self.lbl_additional_opts.place(x=45, y=185) + self.lbl_additional_opts.place(x=45, y=225) # Checkbutton for adding optional 'Population' to bookmark. self.check_var2 = BooleanVar() self.check_var2.set(1) self.check2 = Checkbutton(self.root, text='Include Population', variable=self.check_var2, onvalue=1, offvalue=0, font=('Ubuntu, 8')) - self.check2.place(x=45, y=200) - + self.check2.place(x=45, y=240) self.lbl5 = Label(self.root, text='Title Separator:', font='Ubuntu, 8') - self.lbl5.place(x=170, y=200) + self.lbl5.place(x=170, y=240) self.entry_var4 = StringVar() self.entr4 = Entry(self.root, textvariable=self.entry_var4, font='Ubuntu, 7') self.entr4.config(state='normal') self.entr4.xview(END) self.entry_var4.set('-') - self.entr4.place(x=250, y=200, width=45, height=25) - + self.entr4.place(x=250, y=240, width=45, height=25) + + # Add frame for metadata/TOC options + self.meta_frame = LabelFrame(self.root, text='TOC Options', relief='solid', bd='0.5') + self.meta_frame.place(x=40, y=85, width=625, height=55) + + # Add radio buttons for TOC options + self.toc_var = StringVar() + self.toc_var.set('no_toc') # Default to no TOC + + self.rb_no_toc = Radiobutton( + self.meta_frame, + text='Combine PDFs without TOC', + variable=self.toc_var, + value='no_toc', + command=self.update_toc_controls, + font=('Ubuntu, 9') + ) + self.rb_no_toc.place(x=10, y=5) + + self.rb_use_meta = Radiobutton( + self.meta_frame, + text='Use TLF Metadata', + variable=self.toc_var, + value='use_meta', + command=self.update_toc_controls, + font=('Ubuntu, 9') + ) + self.rb_use_meta.place(x=200, y=5) + + # Add template button + self.btn_template = Button( + self.meta_frame, + text='Create Metadata Using Template', + command=self.open_metadata_template, + font=('Ubuntu, 9') + ) + self.btn_template.place(x=380, y=5) + + # Move existing metadata controls + self.lbl3.place_forget() # Hide original metadata label + self.entry_var2.set('') # Clear metadata path + self.entr2.place_forget() # Hide metadata entry + self.btn2.place_forget() # Hide original browse button + + # Initialize control states + self.update_toc_controls() # GO button. self.btn_go = Button(self.root, text='GO!', font=('Ubuntu, 10')) - self.btn_go.place(x=580, y=115, width=88) + self.btn_go.place(x=580, y=450, width=88) + def set_pass(self): + if self.pas_check_var.get(): + self.lbl6.place(x=580, y=280) + self.entr5.place(x=645, y=280, width=70, height=25) + else: + # Hide password field and label + self.lbl6.place_forget() + self.entr5.place_forget() + self.entry_var5.set('') # Reset password value @property def add_population(self): @@ -204,9 +250,69 @@ def title_separator(self): else: sep = '' return sep + def get_selected_value(self, event): return self.locationBox.get() + def update_toc_controls(self): + """Update GUI controls based on TOC selection""" + if self.toc_var.get() == 'no_toc': + # Hide all TOC and metadata-related controls + self.lbl3.place_forget() # TLF Metadata label + self.entr2.place_forget() # TLF Metadata entry + self.btn2.place_forget() # TLF Metadata browse button + self.locationBox.place_forget() # Font selection combobox + self.lbl7.place_forget() # Use font for TOC label + self.frm1.place_forget() # Bookmark options frame + self.entry_var2.set('') # Clear metadata path + + # Hide Additional Bookmark Options + self.lbl_additional_opts.place_forget() # Additional Bookmark Options label + self.check2.place_forget() # Include Population checkbox + self.lbl5.place_forget() # Title Separator label + self.entr4.place_forget() # Title Separator entry + + # Expand log field to use available space + # Move it up and increase height + self.txt1.place(x=40, y=200, width=525, height=300) + + else: + # Show metadata controls + self.lbl3.place(x=40, y=150) + self.entr2.place(x=150, y=150, width=400, height=25) + self.btn2.place(x=580, y=150) + + # Show TOC controls + self.locationBox.place(x=150, y=185) + self.lbl7.place(x=40, y=185) + + # Show Bookmark frame and options + self.frm1.place(x=40, y=220, width=310, height=60) + self.lbl_additional_opts.place(x=45, y=225) + self.check2.place(x=45, y=240) + self.lbl5.place(x=170, y=240) + self.entr4.place(x=250, y=240, width=45, height=25) + + # Shrink the log field + self.txt1.place(x=40, y=300, width=525, height=200) + + def open_metadata_template(self): + """Open the metadata template for editing""" + template_path = resource_path(os.path.join('examples', 'metadata_example.csv')) + if not os.path.exists(template_path): + messagebox.showerror("Error", "Metadata template not found!") + return + + # Create user's template if it doesn't exist + user_template = os.path.join(self.CWD, 'metadata_template.csv') + if not os.path.exists(user_template): + shutil.copy2(template_path, user_template) + + try: + os.startfile(user_template) + except Exception as e: + messagebox.showerror("Error", f"Could not open template: {str(e)}") + # Function to update config and link function to button. def link_btn_to_command(self, btn, command): btn.configure(command=command) diff --git a/main.py b/src/main.py similarity index 67% rename from main.py rename to src/main.py index 62baf91..f0e1789 100644 --- a/main.py +++ b/src/main.py @@ -2,10 +2,9 @@ # coding: utf-8 import logging - -from gui import GUICore -from pdf_compiler import PDFCompiler -from pdf_util import PDFUtility +from src.gui import GUICore +from src.pdf_compiler import PDFCompiler +from src.pdf_util import PDFUtility # Set logging level. logging.basicConfig(level=logging.WARNING) @@ -16,7 +15,6 @@ # Set parameters of main window. gui.root.geometry('720x520') - # Create object of Utility class. util = PDFUtility(gui) @@ -25,10 +23,20 @@ # Link BTN1 from GUI with SELECT_FOLDER command from Action class. gui.link_btn_to_command(btn=gui.btn1, command=util.select_folder) -# Link BTN2 from GUI with SELECT_METADATA command from Action class.. +# Link BTN2 from GUI with SELECT_METADATA command from Action class. gui.link_btn_to_command(btn=gui.btn2, command=util.select_metadata) -# Link BTN_GO from GUI with COMBINE_PDFs command from Action class.. -gui.link_btn_to_command(btn=gui.btn_go, command=lambda:[pc.combine_pdfs(), pc.add_toc()]) + + +def execute_pdf_operations(): + if gui.toc_var.get() == 'no_toc': + pc.combine_pdfs() # This will now return early for no_toc + else: + pc.combine_pdfs() + pc.add_toc() + + +# Link BTN_GO from GUI with COMBINE_PDFs command from Action class. +gui.link_btn_to_command(btn=gui.btn_go, command=execute_pdf_operations) # Change background color for all widgets. gui.change_bg_color(gui.root.winfo_children(), 'white') diff --git a/pdf_compiler.py b/src/pdf_compiler.py similarity index 58% rename from pdf_compiler.py rename to src/pdf_compiler.py index ab44265..758cf06 100644 --- a/pdf_compiler.py +++ b/src/pdf_compiler.py @@ -10,6 +10,8 @@ from collections import namedtuple +from src.gui import resource_path + sys.setrecursionlimit(1500000) @@ -27,80 +29,233 @@ def __init__(self, gui, util): self.CWD = '' def combine_pdfs(self): - # Disable 'GO' button after pressed. + """Combine PDFs and update progress bar""" + # Disable 'GO' button after pressed self.gui.btn_go.configure(state='disabled') - global font_folder, usage_font_c, METADATA + try: + # Set up working directories + self.CWD = self.gui.entry_var1.get() + self.pathToRTF = os.path.join(self.CWD) + self.util.mkdir(os.path.join(self.CWD, '_PDF')) + self.pathToPDF = os.path.join(self.CWD, '_PDF') + + if self.gui.toc_var.get() == 'no_toc': + self._combine_without_toc() + # Early return to skip TOC setup + return + else: + metadata_path = self.gui.entry_var2.get() + if not metadata_path or not os.path.exists(metadata_path): + self.gui.logger.error('ERROR: Metadata file required for TOC generation') + return + + tlfs, tlfs_count = self.util.get_tlf_list(metadata_path) + self._combine_with_toc(tlfs, tlfs_count) + except Exception as e: + self.gui.logger.error(f'ERROR: {str(e)}') + raise + finally: + self.gui.btn_go.configure(state='normal') + + global font_folder, usage_font_c usage_font_c = str(self.gui.box_value.get())[:-7] self.gui.logger.warning('INFO: Selected font for TOC: ' + str(usage_font_c) + '.') - font_folder = os.path.dirname(os.path.realpath(__file__)) + '\\.font\\' + str(usage_font_c) + '.ttf' + font_folder = resource_path(os.path.join('assets', 'fonts', str(usage_font_c) + '.ttf')) self.CWD = self.gui.entry_var1.get() self.pathToRTF = os.path.join(self.CWD) - # Assign output filename. - self.gui.OUTPUT_FILENAME = self.gui.get_output_as() - self.util.mkdir(os.path.join(self.CWD, '_PDF')) - self.pathToPDF = os.path.join(self.CWD, '_PDF') - self.out_file_txt = os.path.normpath(os.path.join(self.pathToPDF, 'toc_file.txt')) - self.pathToFile = str(self.pathToPDF)[:-4] + str(self.gui.OUTPUT_FILENAME) - self.outFilePdfToc = self.pathToFile[:-4] + '_with_TOC.pdf' - self.outFilePdf = os.path.normpath(os.path.join(self.pathToPDF, 'toc_file.pdf')) - # remove toc_pdf file from previously running + + # Get metadata path first + metadata_path = self.gui.entry_var2.get() + if not metadata_path or not os.path.exists(metadata_path): + self.gui.logger.error('ERROR: Metadata file not found or not selected') + self.gui.btn_go.configure(state='normal') + return + + # Place ProgressBar onto main Frame before starting operations + self.gui.pb1.place(x=40, y=490, width=625, height=10) + self.gui.pb1['value'] = 0 # reset Progress Bar + + # Use metadata_path instead of METADATA + tlfs, tlfs_count = self.util.get_tlf_list(metadata_path) + total_steps = ( + 1 + # Initial setup + tlfs_count + # PDF conversion + tlfs_count + # Bookmark addition + 1 + # PDF combination + 1 # Finalization + ) + self.gui.pb1['maximum'] = total_steps + current_progress = 0 + try: - os.remove(self.outFilePdf) - os.remove(os.path.normpath(os.path.join(self.pathToRTF, 'toc_file.pdf'))) + # Initial setup + self.gui.OUTPUT_FILENAME = self.gui.get_output_as() + self.util.mkdir(os.path.join(self.CWD, '_PDF')) + self.pathToPDF = os.path.join(self.CWD, '_PDF') + self.out_file_txt = os.path.normpath(os.path.join(self.pathToPDF, 'toc_file.txt')) + self.pathToFile = str(self.pathToPDF)[:-4] + str(self.gui.OUTPUT_FILENAME) + self.outFilePdfToc = self.pathToFile[:-4] + '_with_TOC.pdf' + self.outFilePdf = os.path.normpath(os.path.join(self.pathToPDF, 'toc_file.pdf')) + + # Cleanup any existing files + for file in [self.outFilePdf, os.path.normpath(os.path.join(self.pathToRTF, 'toc_file.pdf'))]: + try: + os.remove(file) + except Exception: + pass + + current_progress += 1 + self.gui.pb1['value'] = current_progress + self.gui.root.update() + + # Get metadata + self.util.assign_meta() + + # Convert RTFs to PDFs + if tlfs_count > 0: + for tlf in tlfs: + self.util.convert_to_pdf(in_list=[tlf], rtf_folder_dir=self.CWD, pdf_folder_dir=self.pathToPDF) + current_progress += 1 + self.gui.pb1['value'] = current_progress + self.gui.root.update() + + # Add bookmarks + self.gui.logger.warning('\nNow combining outputs into PDF...') + self.gui.logger.warning('\nSearching in ' + str(self.pathToPDF)) + + # Add bookmarks to each file + for _ in range(tlfs_count): + self.util.add_bmk_to_file( + input_dir=self.pathToPDF, + meta_data_file=metadata_path, + title_sep=self.gui.title_separator, + add_popul=self.gui.add_population + ) + current_progress += 1 + self.gui.pb1['value'] = current_progress + self.gui.root.update() + + # Combine PDFs + self.util.go_combine_selected_pdf( + dir=self.pathToPDF, + meta_data_=metadata_path, + out_name=self.gui.OUTPUT_FILENAME, + prot_fl=False, + title_sep=self.gui.title_separator, + add_popul=self.gui.add_population + ) + current_progress += 1 + self.gui.pb1['value'] = current_progress + self.gui.root.update() + + self.gui.logger.warning( + '\nINFO: Job finished! ' + str(tlfs_count) + ' files were added to ' + self.gui.OUTPUT_FILENAME) + self.gui.logger.warning('\nINFO: ' + self.gui.OUTPUT_FILENAME + ' is saved in ' + str(os.getcwd())) + + # Final progress update + self.gui.pb1['value'] = total_steps + self.gui.root.update() + else: + self.gui.logger.warning('WARNING: No files to concatenate. Check ' + str(self.pathToRTF) + '.') + self.gui.pb1['value'] = 0 + except Exception as e: - print("Not found file "+ str(self.outFilePdf )) - pass + self.gui.logger.error(f'ERROR: An error occurred: {str(e)}') + self.gui.pb1['value'] = 0 + raise + finally: + # Re-enable the GO button regardless of success/failure + self.gui.btn_go.configure(state='normal') - METADATA = self.gui.entry_var2.get() + def _combine_without_toc(self): + """Combine PDFs without TOC""" + try: + # Initialize progress bar + self.gui.pb1.place(x=40, y=490, width=625, height=10) + self.gui.pb1['value'] = 0 - # Get tlfmetadata and move to py dict. - self.util.assign_meta() + # Get all RTF files from main directory + rtf_files = [f for f in os.listdir(self.pathToRTF) if f.lower().endswith('.rtf')] + pdf_files = set() # Use a set to prevent duplicates + + # Calculate total steps for progress bar + total_steps = len(rtf_files) + 2 # RTF conversions + PDF collection + final combination + self.gui.pb1['maximum'] = total_steps + current_progress = 0 + + # Convert RTF files if any + if rtf_files: + self.gui.logger.warning(f'\nINFO: Found {len(rtf_files)} RTF files to convert') + for rtf in rtf_files: + self.gui.logger.warning(f'Converting {rtf} to PDF...') + self.util.convert_to_pdf([rtf], self.pathToRTF, self.pathToPDF) + # Only add the PDF from the _PDF directory + pdf_name = os.path.splitext(rtf)[0] + '.pdf' + pdf_files.add(os.path.join(self.pathToPDF, pdf_name)) + current_progress += 1 + self.gui.pb1['value'] = current_progress + self.gui.root.update() + + # Only add PDFs from the main directory that weren't created from RTFs + for f in os.listdir(self.pathToRTF): + if f.lower().endswith('.pdf'): + # Check if this PDF wasn't created from an RTF + base_name = os.path.splitext(f)[0] + if not any(os.path.splitext(rtf)[0] == base_name for rtf in rtf_files): + pdf_files.add(os.path.join(self.pathToRTF, f)) + + self.gui.logger.warning(f'INFO: Found {len(pdf_files)} unique PDF files') + current_progress += 1 + self.gui.pb1['value'] = current_progress + self.gui.root.update() + + if not pdf_files: + self.gui.logger.error('ERROR: No PDF or RTF files found to combine') + return + + # Convert set to sorted list + pdf_files = sorted(pdf_files, key=lambda x: os.path.basename(x).lower()) + + # Log files to be combined + self.gui.logger.warning('\nFiles to be combined:') + for pdf in pdf_files: + self.gui.logger.warning(f'- {os.path.basename(pdf)}') + + # Combine PDFs + output_file = os.path.join(self.CWD, self.gui.get_output_as()) + self.gui.logger.warning('\nCombining PDFs...') + + success = self.util.combine_pdfs_simple( + pdf_files, + output_file, + self.gui.pas_check_var.get(), + self.gui.entry_var5.get() if self.gui.pas_check_var.get() else None + ) + + if success: + self.gui.logger.warning(f'\nINFO: PDFs combined successfully into {output_file}') + # Update progress bar to completion + self.gui.pb1['value'] = total_steps + self.gui.root.update() + else: + self.gui.logger.error('ERROR: Failed to combine PDFs') - # Place ProgressBar onto main Frame after button 'GO!' is pressed. - self.gui.pb1.place(x=40, y=490, width=625, height=10) - self.gui.pb1['value'] = 0 # reset Progress Bar. - - pbConstant = 8 - numberOfLogEvents = self.util.get_event_number(METADATA) - - self.gui.pb1['maximum'] = pbConstant + numberOfLogEvents * 3 - - # If check-box is off - then we need to convert raw files to PDF. - # After converted we can proceed and combine files. - # if self.gui.pas_check_var.get() == 0: - tlfs, tlfs_count = self.util.get_tlf_list(METADATA) - self.util.convert_to_pdf(in_list=tlfs, rtf_folder_dir=self.CWD, pdf_folder_dir=self.pathToPDF) - - - # Count how many files we have to combine. - count = tlfs_count - # Combine PDF files listed in list_pdf. - if tlfs_count > 0: - self.gui.logger.warning('\nNow combining outputs into PDF...') - self.gui.logger.warning('\nSearching in ' + str(self.pathToPDF)) - self.util.add_bmk_to_file(input_dir=self.pathToPDF, - meta_data_file=METADATA, - title_sep=self.gui.title_separator, - add_popul=self.gui.add_population) - - - self.util.go_combine_selected_pdf(dir=self.pathToPDF, - meta_data_=METADATA, - out_name=self.gui.OUTPUT_FILENAME, - prot_fl=False, - title_sep=self.gui.title_separator, - add_popul=self.gui.add_population) - - self.gui.logger.warning( - '\nINFO: Job finished! ' + str(count) + ' files were added to ' + self.gui.OUTPUT_FILENAME) - self.gui.logger.warning('\nINFO: ' + self.gui.OUTPUT_FILENAME + ' is saved in ' + str(os.getcwd())) - else: - self.gui.logger.warning('WARNING: No files to concatenate. Check ' + str(self.pathToRTF) + '.') - # Reset Progress Bar - self.gui.pb1['value'] = 0 + except Exception as e: + self.gui.logger.error(f'ERROR: An error occurred: {str(e)}') + raise + finally: + # Ensure the progress bar is reset if something fails + if not self.gui.pb1['value'] == self.gui.pb1['maximum']: + self.gui.pb1['value'] = 0 + self.gui.root.update() + + def _combine_with_toc(self, tlfs, tlfs_count): + """Existing TOC-based combination logic""" + # Existing implementation remains unchanged + pass @staticmethod def get_toc_page_numb(path_to_pdf: str): @@ -268,6 +423,11 @@ def make_toc_pdf(input_file: str, page_char: str, w_page: int, usage_font: str, # Prepare TOC shell file def add_toc(self): + """Add table of contents to the combined PDF""" + + # Skip TOC generation if no_toc option is selected + if self.gui.toc_var.get() == 'no_toc': + return t_char = '*page:' bimo_tab_char = '$$$$' @@ -275,9 +435,7 @@ def add_toc(self): col_header = ['name', 'page'] PR_TO_IN = 1 / 72 - pdf_del = False #flag for remove _PDF folder - # set page width as length of string: int -> number of symbols - # default value + pdf_del = False page_w = 152 self.out_file_txt = str(self.pathToPDF) + "\\" + 'toc_file.txt' @@ -285,17 +443,23 @@ def add_toc(self): # custom value for specific fonts if usage_font_c == 'VictorMono': page_w = 167 - if usage_font_c == 'Monoid': + elif usage_font_c == 'Monoid': page_w = 160 - if usage_font_c == 'EversonMono': + elif usage_font_c == 'EversonMono': page_w = 159 - if usage_font_c == 'Lekton': + elif usage_font_c == 'Lekton': page_w = 182 - if usage_font_c == 'CamingoCode': + elif usage_font_c == 'CamingoCode': page_w = 166 pathToFile = str(self.pathToPDF)[:-4] + str(self.gui.OUTPUT_FILENAME) + # Get metadata path + metadata_path = self.gui.entry_var2.get() + if not metadata_path or not os.path.exists(metadata_path): + self.gui.logger.error('ERROR: Metadata file not found or not selected') + return + df = self.extcract_bmk_to_list(pathToFile, page_w, bimo_tab_char, t_char, col_header) # REPLACE CHARACTER INTO BOOKMARK DATAFRAME IN CASE IF SELECTED FONT FROM PART SUPPORT FONT LIST @@ -325,13 +489,14 @@ def add_toc(self): with open(self.out_file_txt, "w", encoding="utf-8") as f: np.savetxt(f, df.to_numpy(), fmt='%s') - # get page size from main document - self.doc = fitz.open(pathToFile) - page = self.doc.loadPage(0) - main_doc_page_size = page.MediaBox[2:] - self.doc.close() + # Get page size from main document + doc = fitz.open(pathToFile) + page = doc[0] # Get first page + main_doc_page_size = page.rect.br # Get bottom-right point of page rect + doc.close() + Page = namedtuple("Page", "width height") - page_size = Page(main_doc_page_size[0], main_doc_page_size[1]) + page_size = Page(main_doc_page_size.x, main_doc_page_size.y) # convert txt file to pdf to get toc-pdf file number of pages self.make_toc_pdf(input_file=self.out_file_txt, page_char=t_char, w_page=page_w, @@ -418,7 +583,7 @@ def add_toc(self): print("Set password: ", self.gui.pas_check_var.get()) print(self.gui.entry_var5.get()) print('############################################################################') - tlfs, tlfs_count = self.util.get_tlf_list(METADATA) + tlfs, tlfs_count = self.util.get_tlf_list(metadata_path) result.save(self.outFilePdfToc, pretty=True, garbage=4, deflate=True, encryption=fitz.PDF_ENCRYPT_AES_256, @@ -431,50 +596,39 @@ def add_toc(self): self.gui.logger.warning('INFO: TOC successfully created!') - # delete toc-pdf file + # Cleanup section try: - os.remove(self.outFilePdf) + # Remove temporary PDF file + if os.path.exists(self.outFilePdf): + os.remove(self.outFilePdf) except Exception as e: - print("REM TOC FILE: ") - print(e) - - # os.remove(self.outFilePdf) - # delete temp txt files - os.remove(self.out_file_txt) - _ = str(usage_font_c) + '.pkl' - os.remove(_) - - # if pdf_del == True: - # print(self.pathToPDF) - # rem_msg = '' - # file_lst_to_rem = os.listdir(self.pathToPDF) - # for elem in file_lst_to_rem: - # pdf_file_to_read = os.path.join(self.pathToPDF, elem) - # try: - # os.rename(pdf_file_to_read, pdf_file_to_read) - # except Exception as e: # [WinError 32] - # rem_msg = "Can not remove _PDF folder because file " + ( - # elem) + " it is being used by another process" - # print(rem_msg) - # for elem_ in file_lst_to_rem: - # pdf_file_to_read = os.path.join(self.pathToPDF, elem_) - # try: - # with open(pdf_file_to_read, 'rb') as f: - # pass - # except Exception as e: # [WinError 32] - # rem_msg = "Can not close _PDF folder because file " + ( - # elem_) + " it is being used by another process" - # - # if rem_msg == '': - # shutil.rmtree(self.pathToPDF) - # else: - # messagebox.showinfo(title="Unable remove _PDF", message=rem_msg) - - q = messagebox.askokcancel(title=None, message="Combined pdf ready and save at " + str(self.outFilePdfToc) + - ". Do you want to open the file?", - default='ok') - if q == True: - os.startfile(self.outFilePdfToc) + self.gui.logger.error(f"Error removing temporary PDF: {str(e)}") - # Make 'GO' button active again. + try: + # Remove temporary txt file + if os.path.exists(self.out_file_txt): + os.remove(self.out_file_txt) + except Exception as e: + self.gui.logger.error(f"Error removing temporary txt file: {str(e)}") + + # Remove font cache file if it exists + try: + font_cache = f"{usage_font_c}.pkl" + if os.path.exists(font_cache): + os.remove(font_cache) + except Exception as e: + self.gui.logger.warning(f"Could not remove font cache file: {str(e)}") + # Non-critical error, we can continue + + # Show final message and file + if os.path.exists(self.outFilePdfToc): + q = messagebox.askokcancel( + title="PDF Created", + message=f"Combined pdf ready and saved at {self.outFilePdfToc}. Do you want to open the file?", + default='ok' + ) + if q: + os.startfile(self.outFilePdfToc) + + # Make 'GO' button active again self.gui.btn_go.config(state='normal') diff --git a/pdf_util.py b/src/pdf_util.py similarity index 72% rename from pdf_util.py rename to src/pdf_util.py index d26df68..a044413 100644 --- a/pdf_util.py +++ b/src/pdf_util.py @@ -28,6 +28,30 @@ def __init__(self, gui): CWD = self.gui.entry_var1.get() self.meta_source = {} + def combine_pdfs_simple(self, pdf_files, output_name, use_password=False, password=None): + """Combine PDFs without TOC or bookmarks""" + try: + with fitz.open() as result: + for pdf in pdf_files: + with fitz.open(pdf) as mfile: + result.insert_pdf(mfile) + + if use_password: + result.save( + output_name, + encryption=fitz.PDF_ENCRYPT_AES_256, + owner_pw=password, + garbage=4, + deflate=True + ) + else: + result.save(output_name, garbage=4, deflate=True) + + return True + except Exception as e: + self.gui.logger.error(f'ERROR: Failed to combine PDFs: {str(e)}') + return False + # Browse the current working directory for files to combine. # Pass the path to CWD. def select_folder(self): @@ -219,7 +243,7 @@ def close_word_proc(proc_tuple=("word", "winword", "WINWORD", "splwow64.exe"), s # TODO: TO_THINK: run with multithreads, parallelization? def rtf_file_to_pdf(self, file_name: str, input_dir: str, output_dir: str, pause_time: float) -> None: - word = None # declare variable 'word'. + word = None wdFormatPDF = 17 wdDoNotSaveChanges = 0 @@ -228,41 +252,26 @@ def rtf_file_to_pdf(self, file_name: str, input_dir: str, output_dir: str, pause in_file = os.path.normpath(os.path.join(input_dir, file_name)) output_file = os.path.splitext(file_name)[0] out_file = os.path.normpath(os.path.join(output_dir, output_file + '.pdf')) - self.gui.logger.warning('Converting ' + str(file_name) + '...') if os.path.isfile(out_file): - self.gui.logger.warning(str(file_name) + ' has been detected and do not need to convert to PDF.') - - else: - if self.gui.final_run_var.get() == 1: #Raise Error and abort - Final run mood, no file - try: - doc = word.Documents.Open(in_file, False, False, True) # 'True' as a 3d param tell to open in ReadOnly. - doc.SaveAs(out_file, FileFormat=wdFormatPDF) - doc.Close(SaveChanges=wdDoNotSaveChanges) - time.sleep(pause_time) - self.gui.logger.warning(str(file_name) + ' has been converted to PDF.') - # out_file.close() - except Exception as e: - print(e) - self.gui.logger.error('ERROR: Error handle while converting ' + str(file_name) + ' file.') - messagebox.showerror(title='File convert error', - message='ERROR: Error handle while converting ' + str(file_name) + - '. Check metadata file and ' - 'TLF file.', default='ok') - os.abort() - else: - try: - doc = word.Documents.Open(in_file, False, False, True) # 'True' as a 3d param tell to open in ReadOnly. - doc.SaveAs(out_file, FileFormat=wdFormatPDF) - doc.Close(SaveChanges=wdDoNotSaveChanges) - time.sleep(pause_time) - self.gui.logger.warning(str(file_name) + ' has been converted to PDF.') - - except Exception as e: - self.gui.logger.error("Sorry, we couldn't find your file. Was it moved, renamed or deleted? " - + str(file_name) + ' file.') - pass + self.gui.logger.warning(f'{file_name} already exists as PDF') + return + try: + doc = word.Documents.Open(in_file, False, False, True) + doc.SaveAs(out_file, FileFormat=wdFormatPDF) + doc.Close(SaveChanges=wdDoNotSaveChanges) + time.sleep(pause_time) + self.gui.logger.warning(f'{file_name} has been converted to PDF') + except Exception as e: + self.gui.logger.error(f'ERROR: Error while converting {file_name}: {str(e)}') + if self.gui.final_run_var.get() == 1: + messagebox.showerror('File convert error', + f'ERROR: Error while converting {file_name}. Check metadata file and TLF file.') + os.abort() + finally: + if word: + word.Quit() def convert_to_pdf(self, in_list: list, rtf_folder_dir: str, pdf_folder_dir: str) -> None: """ @@ -273,38 +282,27 @@ def convert_to_pdf(self, in_list: list, rtf_folder_dir: str, pdf_folder_dir: str :return: N/A """ - # ask user to close all word processes for avoid freeze during convertation + # ask user to close all word processes for avoid freeze during conversion self.close_word_proc() - #check if all files from pdf_to_keep present into folder -> convert while not true - in_folder_pdf = tuple(os.path.join(elem)[:-4]+'.rtf' for elem in os.listdir(pdf_folder_dir) if - pathlib.Path(elem).suffix == '.pdf') - s = set(in_folder_pdf) - _tm = tuple(x for x in in_list if x not in s) - if _tm: + in_folder_pdf = {os.path.join(elem)[:-4] + '.rtf' for elem in os.listdir(pdf_folder_dir) + if pathlib.Path(elem).suffix == '.pdf'} + files_to_convert = [x for x in in_list if x not in in_folder_pdf] + + if files_to_convert: if self.gui.final_run_var.get() == 1: - while len(_tm) != 0: - for file in _tm: - self.rtf_file_to_pdf(file_name=file, input_dir=rtf_folder_dir, - output_dir=pdf_folder_dir, pause_time=0.5) - in_folder_pdf = tuple(os.path.join(elem)[:-4] + '.rtf' for elem in os.listdir(pdf_folder_dir) if - pathlib.Path(elem).suffix == '.pdf') - s = set(in_folder_pdf) - _tm = tuple(x for x in in_list if x not in s) + for file in files_to_convert: + self.rtf_file_to_pdf(file_name=file, input_dir=rtf_folder_dir, + output_dir=pdf_folder_dir, pause_time=0.5) else: - for file in _tm: + for file in files_to_convert: self.rtf_file_to_pdf(file_name=file, input_dir=rtf_folder_dir, output_dir=pdf_folder_dir, pause_time=0.5) - in_folder_pdf = tuple(os.path.join(elem)[:-4] + '.rtf' for elem in os.listdir(pdf_folder_dir) if - pathlib.Path(elem).suffix == '.pdf') - s = set(in_folder_pdf) - _tm = tuple(x for x in in_list if x not in s) else: - # Reset Progress Bar - self.gui.pb1['value'] = 0 + self.gui.logger.warning('No new files to convert') def add_bmk_to_file(self, input_dir: str, meta_data_file: str, title_sep: str, add_popul: bool = True) -> None: - + """Add bookmarks to PDF files based on metadata""" df = pd.read_csv(meta_data_file) df = df.dropna(how='all') df['Filename'] = df['OutputName'].str.replace('-', '_') @@ -319,44 +317,84 @@ def add_bmk_to_file(self, input_dir: str, meta_data_file: str, title_sep: str, a file_bmk_dict = dict(zip(df.FilenamePDF, df.Bookmark)) for file, bmk_txt in file_bmk_dict.items(): - print("FINAL RUN MOOD: ", self.gui.final_run_var.get()) - if self.gui.final_run_var.get(): #Final run - all files exists according to metadata file + if self.gui.final_run_var.get(): self.gui.logger.warning("Add bookmark to file " + str(file)) self.gui.logger.warning("Bookmark to add: " + str(bmk_txt)) - with fitz.open(file) as _tmpfile: - _tmpfile.set_toc([[1, bmk_txt, 1]]) - _tmpfile.name = file - print(_tmpfile.can_save_incrementally()) - _tmpfile.saveIncr() - - else: #temp run - not all files from metadata are into tfl's folder - if os.path.exists(file): #file exists - need to add bookmark - self.gui.logger.warning("Add bookmark to file " + str(file)) - self.gui.logger.warning("Bookmark to add: " + str(bmk_txt)) - with fitz.open(file) as _tmpfile: - _tmpfile.set_toc([[1, bmk_txt, 1]]) - _tmpfile.name = file - print(_tmpfile.can_save_incrementally()) - _tmpfile.saveIncr() - - else: #file not exist - need to create it and add bookmark + try: + # Create temporary filename + temp_file = file + ".tmp" + + # Open original document + doc = fitz.open(file) + # Create new document + new_doc = fitz.open() + # Copy pages from original + new_doc.insert_pdf(doc) + # Set TOC + new_doc.set_toc([[1, bmk_txt, 1]]) + # Save to temporary file + new_doc.save(temp_file, garbage=4, deflate=True) + new_doc.close() + doc.close() + + # Remove original and rename temp + try: + os.replace(temp_file, file) + except PermissionError: + # If direct replace fails, try alternative approach + os.remove(file) + os.rename(temp_file, file) + + except Exception as e: + self.gui.logger.error(f"Error processing file {file}: {str(e)}") + # Try to clean up temp file if it exists + if os.path.exists(temp_file): + try: + os.remove(temp_file) + except: + pass + + else: # temp run + if os.path.exists(file): + try: + # Same process as above for existing files + temp_file = file + ".tmp" + doc = fitz.open(file) + new_doc = fitz.open() + new_doc.insert_pdf(doc) + new_doc.set_toc([[1, bmk_txt, 1]]) + new_doc.save(temp_file, garbage=4, deflate=True) + new_doc.close() + doc.close() + + try: + os.replace(temp_file, file) + except PermissionError: + os.remove(file) + os.rename(temp_file, file) + + except Exception as e: + self.gui.logger.error(f"Error processing file {file}: {str(e)}") + if os.path.exists(temp_file): + try: + os.remove(temp_file) + except: + pass + + else: self.gui.logger.warning("Create file: " + str(file)) bmk_txt = str(os.path.basename(file))[:-4] + "NO SUCH FILE IN TLF's FOLDER->Re-RUN to get bookmark" self.gui.logger.warning("Bookmark to add_: " + str(bmk_txt)) - doc = fitz.open() - page = doc.newPage() - where = fitz.Point(50, 100) - page.insertText(where, """NO SUCH FILE IN TLF's FOLDER""", fontsize=35) - doc.save(file) - - with fitz.open(file) as _tmpfile: - _tmpfile.set_toc([[1, bmk_txt, 1]]) - _tmpfile.name = file - print(_tmpfile.can_save_incrementally()) - _tmpfile.saveIncr() - - + try: + doc = fitz.open() + page = doc.new_page() + page.insert_text(fitz.Point(50, 100), """NO SUCH FILE IN TLF's FOLDER""", fontsize=35) + doc.set_toc([[1, bmk_txt, 1]]) + doc.save(file, garbage=4, deflate=True) + doc.close() + except Exception as e: + self.gui.logger.error(f"Error creating placeholder file {file}: {str(e)}") def go_combine_selected_pdf(self, dir, meta_data_, out_name, title_sep: str, add_popul: bool = True, prot_fl: bool =False): diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_pdf_combine.py b/tests/test_pdf_combine.py new file mode 100644 index 0000000..b131284 --- /dev/null +++ b/tests/test_pdf_combine.py @@ -0,0 +1,336 @@ +import pytest +from unittest.mock import Mock, patch +import os +import pandas as pd +import fitz +import tempfile +import shutil +from src.pdf_util import PDFUtility +from src.pdf_compiler import PDFCompiler + + +@pytest.fixture +def mock_gui(): + """Fixture to provide a mock GUI instance""" + gui = Mock() + gui.entry_var1 = Mock(get=Mock(return_value="/test/path")) + gui.entry_var2 = Mock(get=Mock(return_value="/test/metadata.csv")) + gui.entry_var5 = Mock(get=Mock(return_value="test_password")) + gui.final_run_var = Mock(get=Mock(return_value=False)) + gui.logger = Mock() + return gui + + +@pytest.fixture +def pdf_util(mock_gui): + """Fixture to provide a PDFUtility instance""" + return PDFUtility(mock_gui) + + +@pytest.fixture +def mock_word(): + """Fixture to mock Word COM automation""" + with patch('win32com.client.gencache.EnsureDispatch') as mock: + word_app = Mock() + word_app.Documents = Mock() + word_app.Documents.Open = Mock() + word_app.Quit = Mock() + mock.return_value = word_app + yield mock + + +@pytest.fixture +def test_metadata_content(): + """Fixture to provide test metadata content""" + return '''TLF,Title3,Title4,Title5,ProgName,Seq,OutputName,Order +F,Figure 1,Test Plot,Safety Pop,g_test,1,F_1.1,1 +F,Figure 2,Another Plot,Full Pop,g_test,2,F_1.2,2''' + + +@pytest.fixture +def test_metadata_file(test_dir, test_metadata_content): + """Fixture to provide a test metadata file""" + metadata_path = os.path.join(test_dir, "test_metadata.csv") + with open(metadata_path, "w") as f: + f.write(test_metadata_content) + return metadata_path + + +@pytest.fixture +def test_dir(): + """Fixture to provide a temporary directory""" + temp_dir = tempfile.mkdtemp() + yield temp_dir + shutil.rmtree(temp_dir, ignore_errors=True) + + +@pytest.fixture +def test_files(test_dir): + """Fixture to create and provide test files""" + # Create test PDF + pdf_path = os.path.join(test_dir, "test.pdf") + doc = fitz.open() + page = doc.new_page() + page.insert_text((50, 50), "Test PDF") + doc.save(pdf_path) + doc.close() + + # Create test RTF + rtf_path = os.path.join(test_dir, "test.rtf") + with open(rtf_path, "w") as f: + f.write(r"{\rtf1\ansi\Test RTF}") + + # Create test metadata CSV + csv_path = os.path.join(test_dir, "test_metadata.csv") + test_data = { + 'TLF': ['F', 'F'], + 'Title3': ['Test1', 'Test2'], + 'Title4': ['Description1', 'Description2'], + 'Title5': ['Pop1', 'Pop2'], + 'ProgName': ['prog1', 'prog2'], + 'Seq': [1, 2], + 'OutputName': ['F_14.1', 'F_14.2'], + 'Order': [1, 2] + } + pd.DataFrame(test_data).to_csv(csv_path, index=False) + + return pdf_path, rtf_path, csv_path + + +class TestPDFUtility: + @pytest.mark.integration + def test_meta_data_to_dict(self, test_metadata_file): + """Test metadata parsing with and without population""" + # Test with population included + result = PDFUtility.meta_data_to_dict(test_metadata_file, title_sep="-", add_popul=True) + assert len(result) == 2 + assert all(isinstance(k, str) for k in result.keys()) + assert all(isinstance(v, str) for v in result.values()) + assert any('Pop' in v for v in result.values()) + + # Test without population + result = PDFUtility.meta_data_to_dict(test_metadata_file, title_sep="-", add_popul=False) + assert len(result) == 2 + assert not any('Pop' in v for v in result.values()) + + def test_get_tlf_list(self, test_files): + """Test TLF list extraction from metadata""" + _, _, csv_path = test_files + files, count = PDFUtility.get_tlf_list(csv_path) + + assert count == 2 + assert len(files) == 2 + assert all(f.endswith('.rtf') for f in files) + assert all('F_' in f for f in files) + + def test_combine_pdfs_simple(self, pdf_util, test_dir): + """Test PDF combination without TOC""" + # Create test PDFs + test_pdfs = [] + for i in range(2): + pdf_path = os.path.join(test_dir, f'test{i}.pdf') + doc = fitz.open() + page = doc.new_page() + page.insert_text((50, 50), f"Test PDF {i}") + doc.save(pdf_path) + doc.close() + test_pdfs.append(pdf_path) + + # Test without password + output_path = os.path.join(test_dir, 'combined.pdf') + result = pdf_util.combine_pdfs_simple( + test_pdfs, + output_path, + use_password=False + ) + + assert result is True + assert os.path.exists(output_path) + + with fitz.open(output_path) as doc: + assert doc.page_count == 2 + + # Test with password protection + output_path_protected = os.path.join(test_dir, 'combined_protected.pdf') + result = pdf_util.combine_pdfs_simple( + test_pdfs, + output_path_protected, + use_password=True, + password="test_password" + ) + + assert result is True + assert os.path.exists(output_path_protected) + + @pytest.mark.parametrize("file_count", [1, 5, 10]) + def test_combine_multiple_pdfs(self, pdf_util, test_dir, file_count): + """Test combining different numbers of PDFs""" + test_pdfs = [] + for i in range(file_count): + pdf_path = os.path.join(test_dir, f'test{i}.pdf') + doc = fitz.open() + page = doc.new_page() + page.insert_text((50, 50), f"Test PDF {i}") + doc.save(pdf_path) + doc.close() + test_pdfs.append(pdf_path) + + output_path = os.path.join(test_dir, 'combined.pdf') + result = pdf_util.combine_pdfs_simple(test_pdfs, output_path) + + assert result is True + with fitz.open(output_path) as doc: + assert doc.page_count == file_count + + @pytest.mark.win32 + def test_rtf_conversion(self, mock_word, test_dir, pdf_util): + """Test RTF to PDF conversion using Word automation""" + # Create test RTF + rtf_path = os.path.join(test_dir, "test.rtf") + with open(rtf_path, "w") as f: + f.write(r"{\rtf1\ansi\Test RTF}") + + pdf_util.rtf_file_to_pdf( + file_name="test.rtf", + input_dir=test_dir, + output_dir=test_dir, + pause_time=0.1 + ) + + # Verify Word automation calls + mock_word.assert_called_once() + word_app = mock_word.return_value + word_app.Documents.Open.assert_called_once() + word_app.Quit.assert_called_once() + + @pytest.mark.benchmark + def test_pdf_combine_performance(self, benchmark, pdf_util, test_dir): + """Test PDF combination performance""" + # Create test PDFs + test_pdfs = [] + for i in range(3): + pdf_path = os.path.join(test_dir, f'test{i}.pdf') + doc = fitz.open() + page = doc.new_page() + page.insert_text((50, 50), f"Test PDF {i}") + doc.save(pdf_path) + doc.close() + test_pdfs.append(pdf_path) + + output_path = os.path.join(test_dir, 'combined.pdf') + + # Run benchmark + result = benchmark( + pdf_util.combine_pdfs_simple, + test_pdfs, + output_path, + use_password=False + ) + + assert result is True + assert os.path.exists(output_path) + + @pytest.mark.parametrize("file_count", [1, 5, 10]) + def test_combine_multiple_pdfs(self, pdf_util, test_dir, file_count): + """Test combining different numbers of PDFs""" + test_pdfs = [] + for i in range(file_count): + pdf_path = os.path.join(test_dir, f'test{i}.pdf') + doc = fitz.open() + page = doc.new_page() + page.insert_text((50, 50), f"Test PDF {i}") + doc.save(pdf_path) + doc.close() + test_pdfs.append(pdf_path) + + output_path = os.path.join(test_dir, 'combined.pdf') + result = pdf_util.combine_pdfs_simple(test_pdfs, output_path) + + assert result is True + with fitz.open(output_path) as doc: + assert doc.page_count == file_count + + +class TestPDFCompiler: + @pytest.fixture + def pdf_compiler(self, mock_gui): + """Fixture to provide a PDFCompiler instance""" + util_mock = Mock() + return PDFCompiler(mock_gui, util_mock) + + def test_get_toc_page_numb(self, test_dir): + """Test TOC page number extraction""" + pdf_path = os.path.join(test_dir, 'test_toc.pdf') + doc = fitz.open() + for i in range(3): + page = doc.new_page() + page.insert_text((50, 50), f"Test Page {i}") + doc.save(pdf_path) + doc.close() + + page_count = PDFCompiler.get_toc_page_numb(pdf_path) + assert page_count == 3 + + def test_update_toc_pages(self, test_dir): + """Test TOC page number updating""" + test_content = ( + "Title 1 *page:1\n" + "Title 2 *page:2\n" + "Title 3 *page:3\n" + ) + + input_file = os.path.join(test_dir, 'test_toc.txt') + with open(input_file, 'w', encoding='latin-1') as f: + f.write(test_content) + + updated_content = PDFCompiler.update_toc_pages( + input_file=input_file, + page_char="*page:", + w_page=50, + page_numb_to_add=2 + ) + + assert "*page:3" in updated_content + assert "*page:4" in updated_content + assert "*page:5" in updated_content + + @pytest.mark.parametrize("page_width,expected", [ + (50, True), # Normal width + (10, True), # Very narrow + (200, True) # Very wide + ]) + def test_update_toc_pages_different_widths(self, test_dir, page_width, expected): + """Test TOC page updating with different page widths""" + test_content = "Title 1 *page:1\n" + input_file = os.path.join(test_dir, 'test_toc.txt') + with open(input_file, 'w', encoding='latin-1') as f: + f.write(test_content) + + updated_content = PDFCompiler.update_toc_pages( + input_file=input_file, + page_char="*page:", + w_page=page_width, + page_numb_to_add=1 + ) + + assert bool(updated_content) is expected + + @pytest.mark.parametrize("test_input,expected", [ + ("normal.pdf", True), + ("missing.pdf", False) + ]) + def test_file_existence_handling(self, test_dir, test_input, expected): + """Test handling of different file inputs""" + if expected: + # Create test file if it should exist + pdf_path = os.path.join(test_dir, test_input) + doc = fitz.open() + doc.new_page() + doc.save(pdf_path) + doc.close() + + assert os.path.exists(os.path.join(test_dir, test_input)) == expected + + +if __name__ == '__main__': + pytest.main(['-v']) \ No newline at end of file