Skip to content

Commit

Permalink
refactor and add 35 postscript base fonts (fix #1)
Browse files Browse the repository at this point in the history
  • Loading branch information
ashutoshvarma committed Aug 4, 2020
1 parent 570f745 commit 8d02a11
Show file tree
Hide file tree
Showing 40 changed files with 781 additions and 43 deletions.
51 changes: 8 additions & 43 deletions pyxpdf_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,11 @@
from distutils.sysconfig import get_python_lib
from pathlib import Path

__version__ = "1.0.1"


ROOT = Path(__file__).parent
POPPLER_DATA_DIR = Path(ROOT, "poppler_data")


def _get_root_files(path):
return [x for x in Path(path).iterdir() if x.is_file()]


def _get_root_dirs(path):
return [x for x in Path(path).iterdir() if x.is_dir()]
from .encodings import _get_encodings_block
from .fonts import _get_fonts_block, get_fonts
from .utils import POPPLER_DATA_DIR


def _process_poppler_data(entry):
lines = [
"# {0}".format(entry),
]

if entry == "nameToUnicode":
for file in _get_root_files(Path(POPPLER_DATA_DIR, entry)):
lines.append('nameToUnicode "{0}"'.format(file.absolute()))
elif entry == "cidToUnicode":
for file in _get_root_files(Path(POPPLER_DATA_DIR, entry)):
lines.append('cidToUnicode {0} "{1}"'.format(file.name, file.absolute()))
elif entry == "unicodeMap":
for file in _get_root_files(Path(POPPLER_DATA_DIR, entry)):
lines.append('unicodeMap {0} "{1}"'.format(file.name, file.absolute()))
elif entry == "cMap":
for directory in _get_root_dirs(Path(POPPLER_DATA_DIR, entry)):
lines.append(
'cMapDir {0} "{1}"'.format(directory.name, directory.absolute())
)

lines.append(os.linesep)
return lines
__version__ = "1.0.1"


def _xpdfrc_header():
Expand All @@ -55,11 +23,8 @@ def _xpdfrc_header():
def generate_xpdfrc():
xpdfrc = _xpdfrc_header()

for entry in ["nameToUnicode", "cidToUnicode", "unicodeMap", "cMap"]:
xpdfrc += _process_poppler_data(entry)

# add trailing newline
xpdfrc.append("")
xpdfrc += _get_encodings_block()
xpdfrc += _get_fonts_block()

return os.linesep.join(xpdfrc)

Expand All @@ -86,5 +51,5 @@ def get_xpdfrc(force_rewrite=True):
return str(xpdfrc_path.absolute())


if __name__ == "__main__":
print(get_xpdfrc())
__all__ = [get_fonts, get_xpdfrc, get_poppler_dir, generate_xpdfrc]

38 changes: 38 additions & 0 deletions pyxpdf_data/encodings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
from pathlib import Path

from .utils import POPPLER_DATA_DIR, _get_root_dirs, _get_root_files


def _process_poppler_data(entry):
lines = [
"# {0}".format(entry),
]

if entry == "nameToUnicode":
for file in _get_root_files(Path(POPPLER_DATA_DIR, entry)):
lines.append('nameToUnicode "{0}"'.format(file.absolute()))
elif entry == "cidToUnicode":
for file in _get_root_files(Path(POPPLER_DATA_DIR, entry)):
lines.append('cidToUnicode {0} "{1}"'.format(file.name, file.absolute()))
elif entry == "unicodeMap":
for file in _get_root_files(Path(POPPLER_DATA_DIR, entry)):
lines.append('unicodeMap {0} "{1}"'.format(file.name, file.absolute()))
elif entry == "cMap":
for directory in _get_root_dirs(Path(POPPLER_DATA_DIR, entry)):
lines.append(
'cMapDir {0} "{1}"'.format(directory.name, directory.absolute())
)

lines.append("")
return lines


def _get_encodings_block():
enc_block = [
"# Extra Encodings".upper(),
]
for entry in ["nameToUnicode", "cidToUnicode", "unicodeMap", "cMap"]:
enc_block += _process_poppler_data(entry)
enc_block.append("")
return enc_block
61 changes: 61 additions & 0 deletions pyxpdf_data/fonts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from pathlib import Path

from .utils import FONT_DATA_DIR

# fmt: off
fonts_mapping = {
"AvantGarde-Book": "URWGothic-Book",
"AvantGarde-BookOblique": "URWGothic-BookOblique",
"AvantGarde-Demi": "URWGothic-Demi",
"AvantGarde-DemiOblique": "URWGothic-DemiOblique",
"Bookman-Demi": "URWBookman-Demi",
"Bookman-DemiItalic": "URWBookman-DemiItalic",
"Bookman-Light": "URWBookman-Light",
"Bookman-LightItalic": "URWBookman-LightItalic",
"Courier": "NimbusMonoPS-Regular",
"Courier-Bold": "NimbusMonoPS-Bold",
"Courier-BoldOblique": "NimbusMonoPS-BoldItalic",
"Courier-Oblique": "NimbusMonoPS-Italic",
"Helvetica": "NimbusSans-Regular",
"Helvetica-Bold": "NimbusSans-Bold",
"Helvetica-BoldOblique": "NimbusSans-BoldItalic",
"Helvetica-Narrow": "NimbusSansNarrow-Regular",
"Helvetica-Narrow-Bold": "NimbusSansNarrow-Bold",
"Helvetica-Narrow-BoldOblique": "NimbusSansNarrow-BdOblique",
"Helvetica-Narrow-Oblique": "NimbusSansNarrow-Oblique",
"Helvetica-Oblique": "NimbusSans-Italic",
"NewCenturySchlbk-Bold": "C059-Bold",
"NewCenturySchlbk-BoldItalic": "C059-BdIta",
"NewCenturySchlbk-Italic": "C059-Italic",
"NewCenturySchlbk-Roman": "C059-Roman",
"Palatino-Bold": "P052-Bold",
"Palatino-BoldItalic": "P052-BoldItalic",
"Palatino-Italic": "P052-Italic",
"Palatino-Roman": "P052-Roman",
"Symbol": "StandardSymbolsPS",
"Times-Bold": "NimbusRoman-Bold",
"Times-BoldItalic": "NimbusRoman-BoldItalic",
"Times-Italic": "NimbusRoman-Italic",
"Times-Roman": "NimbusRoman-Regular",
"ZapfChancery-MediumItalic": "Z003-MediumItalic",
"ZapfDingbats": "D050000L",
}


# fmt: on
def get_fonts():
return {
font: Path(FONT_DATA_DIR, "{0}.ttf".format(fname))
for font, fname in fonts_mapping.items()
}


def _get_fonts_block():
xpdfrc_fonts = [
"# 35 PostScript Level 2 base fonts".upper(),
]
for font_name, font_file in get_fonts().items():
xpdfrc_fonts.append("fontFile {0} {1}".format(font_name, font_file.absolute()))
# extra blank line at end
xpdfrc_fonts.append("")
return xpdfrc_fonts
Binary file added pyxpdf_data/fonts/C059-BdIta.ttf
Binary file not shown.
Binary file added pyxpdf_data/fonts/C059-Bold.ttf
Binary file not shown.
Binary file added pyxpdf_data/fonts/C059-Italic.ttf
Binary file not shown.
Binary file added pyxpdf_data/fonts/C059-Roman.ttf
Binary file not shown.
Loading

0 comments on commit 8d02a11

Please sign in to comment.