Skip to content

Commit

Permalink
Add playwright async support, fix Create async backend for playwright #…
Browse files Browse the repository at this point in the history
  • Loading branch information
PaleNeutron committed Sep 13, 2024
1 parent 527edaf commit 51691f4
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 9 deletions.
2 changes: 1 addition & 1 deletion dataframe_image/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from ._convert import convert
from ._pandas_accessor import export
from ._pandas_accessor import export, export_async
from ._version import __version__
85 changes: 79 additions & 6 deletions dataframe_image/_pandas_accessor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import inspect
import io
from pathlib import Path
from typing import Literal
Expand All @@ -7,6 +8,7 @@
from PIL import Image

from dataframe_image.converter.browser import (
AsyncPlayWrightConverter,
ChromeConverter,
Html2ImageConverter,
PlayWrightConverter,
Expand Down Expand Up @@ -50,11 +52,11 @@ def export(
"selenium": SeleniumConverter,
"html2image": Html2ImageConverter,
"playwright": PlayWrightConverter,
"async_playwright": AsyncPlayWrightConverter,
}


def export(
obj: pd.DataFrame,
def prepare_converter(
filename,
fontsize=14,
max_rows=None,
Expand All @@ -66,8 +68,6 @@ def export(
dpi=None,
use_mathjax=False,
):
is_styler = isinstance(obj, Styler)
df = obj.data if is_styler else obj
if table_conversion in BROWSER_CONVERTER_DICT:
converter = BROWSER_CONVERTER_DICT[table_conversion](
max_rows=max_rows,
Expand Down Expand Up @@ -98,6 +98,17 @@ def export(
format=extension,
).run

return converter


def generate_html(
obj: pd.DataFrame,
filename,
max_rows=None,
max_cols=None,
):
is_styler = isinstance(obj, Styler)
df = obj.data if is_styler else obj
if df.shape[0] > MAX_ROWS and max_rows is None:
error_msg = (
f"Your DataFrame has more than {MAX_ROWS} rows and will produce a huge "
Expand Down Expand Up @@ -141,11 +152,13 @@ def export(
html = styler2html(obj)
else:
html = obj.to_html(max_rows=max_rows, max_cols=max_cols, notebook=True)
return html


def save_image(img_str, filename):
# swap back to original value
pre_limit = Image.MAX_IMAGE_PIXELS
Image.MAX_IMAGE_PIXELS = None
img_str = converter(html)
# swap back to original value
Image.MAX_IMAGE_PIXELS = pre_limit

try:
Expand All @@ -156,6 +169,66 @@ def export(
filename.write(img_str)
else:
raise ex

def export(
obj: pd.DataFrame,
filename,
fontsize=14,
max_rows=None,
max_cols=None,
table_conversion: Literal[
"chrome", "matplotlib", "html2image", "playwright", "selenium"
] = "chrome",
chrome_path=None,
dpi=None,
use_mathjax=False,
):
converter = prepare_converter(
filename,
fontsize,
max_rows,
max_cols,
table_conversion,
chrome_path,
dpi,
use_mathjax,
)
html = generate_html(obj, filename, max_rows, max_cols)
img_str = converter(html)
save_image(img_str, filename)


async def export_async(
obj: pd.DataFrame,
filename,
fontsize=14,
max_rows=None,
max_cols=None,
table_conversion: Literal[
"chrome", "matplotlib", "html2image", "playwright_async", "selenium"
] = "chrome",
chrome_path=None,
dpi=None,
use_mathjax=False,
):
converter = prepare_converter(
filename,
fontsize,
max_rows,
max_cols,
table_conversion,
chrome_path,
dpi,
use_mathjax,
)
html = generate_html(obj, filename, max_rows, max_cols)
# check if converter is async
if inspect.iscoroutinefunction(converter):
img_str = await converter(html)
else:
img_str = converter(html)
# TODO: use async file writing
save_image(img_str, filename)


setattr(Styler, "export_png", export)
Expand Down
2 changes: 1 addition & 1 deletion dataframe_image/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.4.dev10+gcd96327.d20240717"
__version__ = "0.2.5.dev1+g527edaf.d20240913"
2 changes: 1 addition & 1 deletion dataframe_image/converter/browser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .chrome_converter import ChromeConverter
from .html2image_converter import Html2ImageConverter
from .playwright_converter import PlayWrightConverter
from .playwright_converter import AsyncPlayWrightConverter, PlayWrightConverter
from .selenium_converter import SeleniumConverter
54 changes: 54 additions & 0 deletions dataframe_image/converter/browser/playwright_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,57 @@ def screenshot(self, html):
screenshot_bytes = page.screenshot(full_page=True)
im = Image.open(BytesIO(screenshot_bytes))
return im


class AsyncPlayWrightConverter(BrowserConverter):

async def run(self, html: str) -> bytes:
im = await self.screenshot(html)
temp_img = self.crop(im)
image_bytes = self.finalize_image(temp_img)
return image_bytes

async def screenshot(self, html):
try:
from playwright.async_api import Error, async_playwright
except ImportError as ex:
raise ImportError(
"Playwright is not installed. Install it with 'pip install playwright' "
"and make sure you have a chromium browser installed."
) from ex
async with async_playwright() as p:
channels = ["chromium", "chrome", "msedge", None]
for c in channels:
try:
browser = await p.chromium.launch(
channel=c, args=["--disable-web-security"]
)
break
except Error:
pass
else:
raise Error(
"Could not find any chromium based browser. Make sure you have a "
"chromium browser installed. Or install it by "
"`playwright install chromium`."
)

context = await browser.new_context(
device_scale_factor=self.device_scale_factor, bypass_csp=True
)
page = await context.new_page()
await page.set_content(self.get_css() + html)
if self.use_mathjax:
mj = page.locator("mjx-container math")
try:
mj.wait_for(timeout=10000)
except Error:
logger.warning(
"MathJax did not render in time. Formula in dataframe may not "
"be rendered correctly."
)
pass
page.wait_for_timeout(200)
screenshot_bytes = await page.screenshot(full_page=True)
im = Image.open(BytesIO(screenshot_bytes))
return im
3 changes: 3 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
asyncio_mode = auto
asyncio_default_fixture_loop_scope = "function"
12 changes: 12 additions & 0 deletions tests/test_df_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ def test_styled2(document_name, converter):
df_styled = df.style.set_table_styles([col_headers]).set_caption("This is a caption")
dfi.export(df_styled, f"tests/test_output/{document_name}.png", table_conversion=converter)

@pytest.mark.asyncio
async def test_styled2_async(document_name):
col_headers = {
"selector": ".col_heading, thead",
"props": "color: white; background-color: #1d5632; font-size: 11px"
}

df = pd.DataFrame(np.random.rand(6, 4))
df_styled = df.style.set_table_styles([col_headers]).set_caption("This is a caption")
await dfi.export_async(df_styled, f"tests/test_output/{document_name}_playwright_async.png", table_conversion="playwright_async")
await dfi.export_async(df_styled, f"tests/test_output/{document_name}_matplotlib_async.png", table_conversion="matplotlib")



@pytest.mark.parametrize("dpi", test_dpi_values)
Expand Down

0 comments on commit 51691f4

Please sign in to comment.