From ebcfa969dde8ed039741df24d1f6ab52785deabe Mon Sep 17 00:00:00 2001 From: SWHL Date: Wed, 20 Dec 2023 09:36:06 +0800 Subject: [PATCH] Init commit --- .github/workflows/auto_push_pypi.yml | 74 +++++++++++++ .gitignore | 159 +++++++++++++++++++++++++++ .pre-commit-config.yaml | 19 ++++ README.md | 44 ++++++++ demo.py | 15 +++ latex_to_image/__init__.py | 4 + latex_to_image/crop_img.py | 78 +++++++++++++ latex_to_image/main.py | 22 ++++ latex_to_image/render_latex.py | 98 +++++++++++++++++ requirements.txt | 2 + res2.png | Bin 0 -> 743 bytes t.py | 148 +++++++++++++++++++++++++ tests/test_latex2img.py | 23 ++++ tests/test_render_two.py | 26 +++++ tests/test_template.py | 5 + 15 files changed, 717 insertions(+) create mode 100644 .github/workflows/auto_push_pypi.yml create mode 100755 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 README.md create mode 100644 demo.py create mode 100644 latex_to_image/__init__.py create mode 100644 latex_to_image/crop_img.py create mode 100644 latex_to_image/main.py create mode 100644 latex_to_image/render_latex.py create mode 100644 requirements.txt create mode 100644 res2.png create mode 100644 t.py create mode 100644 tests/test_latex2img.py create mode 100644 tests/test_render_two.py create mode 100644 tests/test_template.py diff --git a/.github/workflows/auto_push_pypi.yml b/.github/workflows/auto_push_pypi.yml new file mode 100644 index 0000000..9ac1b98 --- /dev/null +++ b/.github/workflows/auto_push_pypi.yml @@ -0,0 +1,74 @@ +name: Push latex to image to pypi + +on: + push: + # branches: [ main ] + # paths: + # - 'lineless_table_rec/**' + # tags: + # - v* + +jobs: + UnitTesting: + runs-on: ubuntu-latest + steps: + - name: Pull latest code + uses: actions/checkout@v3 + + - name: Set up Python 3.7 + uses: actions/setup-python@v4 + with: + python-version: '3.7' + architecture: 'x64' + + - name: Display Python version + run: python -c "import sys; print(sys.version)" + + - name: Unit testings + run: | + pip install -r requirements.txt + pip install pytest + + wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip + unzip lineless_table_rec_models.zip + mv lineless_table_rec_models/*.onnx lineless_table_rec/models/ + + pytest tests/test_lore.py + + GenerateWHL_PushPyPi: + needs: UnitTesting + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python 3.7 + uses: actions/setup-python@v4 + with: + python-version: '3.7' + architecture: 'x64' + + - name: Run setup.py + run: | + pip install -r requirements.txt + python -m pip install --upgrade pip + pip install wheel get_pypi_latest_version + + wget https://github.com/RapidAI/TableStructureRec/releases/download/v0.0.0/lineless_table_rec_models.zip + unzip lineless_table_rec_models.zip + mv lineless_table_rec_models/*.onnx lineless_table_rec/models/ + + python setup_lineless.py bdist_wheel ${{ github.event.head_commit.message }} + + # - name: Publish distribution 📦 to Test PyPI + # uses: pypa/gh-action-pypi-publish@v1.5.0 + # with: + # password: ${{ secrets.TEST_PYPI_API_TOKEN }} + # repository_url: https://test.pypi.org/legacy/ + # packages_dir: dist/ + + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@v1.5.0 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + packages_dir: dist/ diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..4c21ff5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,159 @@ +# Created by .ignore support plugin (hsz.mobi) +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +.pytest_cache + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +# *.manifest +# *.spec +*.res + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +#idea +.vs +.vscode +.idea +/images +/models + +#models +*.onnx + +*.ttf +*.ttc + +long1.jpg + +*.bin +*.mapping +*.xml + +*.pdiparams +*.pdiparams.info +*.pdmodel + +.DS_Store +*.npy \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..5c227d6 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +repos: +- repo: https://gitee.com/SWHL/autoflake + rev: v2.1.1 + hooks: + - id: autoflake + args: + [ + "--recursive", + "--in-place", + "--remove-all-unused-imports", + "--remove-unused-variable", + "--ignore-init-module-imports", + ] + files: \.py$ +- repo: https://gitee.com/SWHL/black + rev: 23.1.0 + hooks: + - id: black + files: \.py$ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..1357436 --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +
+
+

LaTeX To Image

+
+ + + SemVer2.0 + + GitHub + +
+ +### 简介 +该仓库是用于将LaTeX的公式借助LaTeX工具转换为对应的图像。 + + +### 安装 +1. 安装texlive +- Ubuntu + ```bash + # Ubuntu + sudo apt-get install texlive-full + + # 确认是否安装成功 + $ pdflatex --help + Usage: pdftex [OPTION]... [TEXNAME[.tex]] [COMMANDS] + or: pdftex [OPTION]... \FIRST-LINE + or: pdftex [OPTION]... &FMT ARGS + Run pdfTeX on TEXNAME, usually creating TEXNAME.pdf. + ``` +- MacOS +推荐安装[MacTex](https://tug.org/mactex/mactex-download.html) + + +2. 安装运行环境 + + +### 使用 + + + +### 参考代码 +- [LaTeX-OCR](https://github.com/lukas-blecher/LaTeX-OCR/blob/main/pix2tex/dataset/latex2png.py) +- [latex2image](https://pypi.org/project/latex2image/#description) \ No newline at end of file diff --git a/demo.py b/demo.py new file mode 100644 index 0000000..a7497b4 --- /dev/null +++ b/demo.py @@ -0,0 +1,15 @@ +# -*- encoding: utf-8 -*- +# @Author: SWHL +# @Contact: liekkaskono@163.com +from PIL import Image + +from latex_to_image import LaTeXToImg + +render = LaTeXToImg() + +formula = "x^2 + y ^2 = 1" + +img_formula = render(formula) +img_formula = Image.fromarray(img_formula) +img_formula.save("res2.png") +print("ok") diff --git a/latex_to_image/__init__.py b/latex_to_image/__init__.py new file mode 100644 index 0000000..8231886 --- /dev/null +++ b/latex_to_image/__init__.py @@ -0,0 +1,4 @@ +# -*- encoding: utf-8 -*- +# @Author: SWHL +# @Contact: liekkaskono@163.com +from .main import LaTeXToImg diff --git a/latex_to_image/crop_img.py b/latex_to_image/crop_img.py new file mode 100644 index 0000000..126bd0e --- /dev/null +++ b/latex_to_image/crop_img.py @@ -0,0 +1,78 @@ +# -*- encoding: utf-8 -*- +# @Author: SWHL +# @Contact: liekkaskono@163.com +import cv2 +import numpy as np + + +class CropByProject: + """投影法裁剪""" + + def __init__(self, threshold=128): + self.threshold = threshold + + def __call__(self, origin_img, margin=(0, 0, 0, 0)): + # image = cv2.cvtColor(origin_img, cv2.COLOR_BGR2GRAY) + + # 将图片二值化 + retval, img = cv2.threshold( + origin_img, self.threshold, 255, cv2.THRESH_BINARY_INV + ) + + # 使文字增长成块 + closed = cv2.dilate(img, None, iterations=1) + + # 水平投影 + x0, x1 = self.get_project_loc(closed, direction="width") + + # 竖直投影 + y0, y1 = self.get_project_loc(closed, direction="height") + + h, w = img.shape[:2] + x0 = max(x0 - margin[0], 0) + y0 = max(y0 - margin[1], 0) + x1 = min(x1 + margin[2], w) + y1 = min(y1 + margin[3], h) + + return origin_img[y0:y1, x0:x1] + + @staticmethod + def get_project_loc(img, direction): + """获得裁剪的起始和终点索引位置 + Args: + img (ndarray): 二值化后得到的图像 + direction (str): 'width/height' + Raises: + ValueError: 不支持的求和方向 + Returns: + tuple: 起始索引位置 + """ + if direction == "width": + axis = 0 + elif direction == "height": + axis = 1 + else: + raise ValueError(f"direction {direction} is not supported!") + + loc_sum = np.sum(img == 255, axis=axis) + loc_range = np.argwhere(loc_sum > 0) + i0, i1 = loc_range[0][0], loc_range[-1][0] + return i0, i1 + + +if __name__ == "__main__": + croper = CropByProject() + + img_path = "/Users/joshuawang/projects/latex2img/res.png" + img = cv2.imread(img_path) + + img = croper(img) + h, w = img.shape[:2] + + img_half = img[: int(h / 2), :] + img_half2 = img[int(h / 2) :, :] + + crop_im1 = croper(img_half) + crop_im2 = croper(img_half2) + cv2.imwrite("crop_im1.png", crop_im1) + cv2.imwrite("crop_im2.png", crop_im2) diff --git a/latex_to_image/main.py b/latex_to_image/main.py new file mode 100644 index 0000000..86fddd1 --- /dev/null +++ b/latex_to_image/main.py @@ -0,0 +1,22 @@ +# -*- encoding: utf-8 -*- +# @Author: SWHL +# @Contact: liekkaskono@163.com +import numpy as np + +from .crop_img import CropByProject +from .render_latex import RenderLaTeX + + +class LaTeXToImg: + def __init__( + self, + ): + self.cropper = CropByProject() + self.latex = RenderLaTeX() + + def __call__(self, math: str) -> np.ndarray: + img = self.latex(math) + img = self.cropper(img) + return img + + diff --git a/latex_to_image/render_latex.py b/latex_to_image/render_latex.py new file mode 100644 index 0000000..fb8fef5 --- /dev/null +++ b/latex_to_image/render_latex.py @@ -0,0 +1,98 @@ +# -*- encoding: utf-8 -*- +# mostly taken from http://code.google.com/p/latexmath2png/ +# install preview.sty +import io +import os +import re +import shlex +import subprocess +import tempfile +import traceback +from pathlib import Path +from typing import Union + +import numpy as np +from PIL import Image + + +class RenderLaTeX: + def __init__(self, dpi=200): + self.dpi = dpi + self.BASE = r"""\documentclass[12pt]{article}\usepackage{fontspec,unicode-math}\thispagestyle{empty}\setmathfont{Latin Modern Math}\begin{document}$%s$\end{document}""" + + def __call__(self, math: str): + work_dir, tex_file = self.generate_tmp(math) + try: + pdf_file = self.render_by_xelatex(work_dir, tex_file) + img = self.convert_pdf_to_png(pdf_file) + return img + except Exception as e: + traceback.print_exc() + return None + finally: + self.clear_files(tex_file) + + def generate_tmp(self, math): + workdir = tempfile.gettempdir() + fd, tex_file = tempfile.mkstemp(".tex", "eq", workdir, True) + with os.fdopen(fd, "w+") as f: + document = self.BASE % (math) + f.write(document) + return workdir, tex_file + + def render_by_xelatex(self, work_dir, in_file) -> Path: + cmd = f"xelatex -interaction errorstopmode -file-line-error -output-directory {work_dir} {in_file}" + sout, _ = self.run_cmd(cmd) + + pdf_file: Path = Path(in_file).with_suffix(".pdf") + expression = pdf_file.parent / rf"{pdf_file.stem}.p\ndf \((\d+)? page" + flag = self.is_success( + text=sout, + expression=str(expression), + ) + if flag: + return pdf_file + raise LatexError("xelatex meets error.") + + def convert_pdf_to_png(self, pdf_file): + png_file: Path = Path(pdf_file).with_suffix(".png") + cmd = f"convert -background white -flatten -density {self.dpi} -colorspace gray {pdf_file} -quality 90 {png_file}" + _, return_code = self.run_cmd(cmd) + if return_code != 0: + raise LatexError(f"PDF to png error\n{cmd}\n{pdf_file}") + img = np.array(Image.open(png_file)) + return img + + @staticmethod + def run_cmd(shell_cmd: str): + with subprocess.Popen( + shlex.split(shell_cmd), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) as p: + sout, serr = p.communicate() + return sout, p.returncode + + @staticmethod + def is_success(text, expression=None): + try: + pattern = re.compile(expression) + results = re.findall(pattern, text) + if int(results[0]) != 1: + return False + return True + except Exception: + traceback.print_exc() + return False + + @staticmethod + def clear_files(in_file: Union[str, Path]) -> None: + invalid_files = Path(in_file).parent.glob(f"{Path(in_file).stem}*") + for file_path in invalid_files: + file_path.unlink() + + +class LatexError(Exception): + pass diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9b70bf7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +opencv_python_headless +Pillow diff --git a/res2.png b/res2.png new file mode 100644 index 0000000000000000000000000000000000000000..7cae05973bc839760cfcf7a33404ea8e943d56dd GIT binary patch literal 743 zcmV?P)`mC%&{5y@W!OhkKs*2ps0m znVE#0@c&~UxbLmcG9AyKgy&=S?od>o?oTm9vZvyaIha;Nq$ZnPU5`XOvqqluRYXKI zbrg9jp4oIe{|Uem8QIesx5#RM^G3n@>d;J=gN@*MhKNX> zUnN(Ui|M7L7m@tF+HWeHi|KDnOPc;Q&xXROvK-p3qSfaHYiJ(X*(Uj_)zH)c-lM}j zZf;wovV}5w@d-w|0VrO#idC>4cLw&fjcROpY%8T5QOO2QUx(b=V~Q#~ zSb*UR*a_Z~DHz=P-gd}>5w~EPN?)Um{DH?