Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synthetic image generator #751

Merged
merged 23 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import base64
from enum import Enum, auto
from io import BytesIO
from pathlib import Path

Check notice

Code scanning / CodeQL

Unused import Note

Import of 'Path' is not used.
from typing import List, Optional, Tuple, cast

Check notice

Code scanning / CodeQL

Unused import Note

Import of 'List' is not used.

import numpy as np
from genai_perf.exceptions import GenAIPerfException

Check notice

Code scanning / CodeQL

Unused import Note

Import of 'GenAIPerfException' is not used.
from PIL import Image


class ImageFormat(Enum):
JPEG = auto()
PNG = auto()


class RandomFormatBase64Encoder:
def __init__(self, image_format: ImageFormat = ImageFormat.PNG):
self.image_format = image_format

def __call__(self, image):
buffered = BytesIO()
image.save(buffered, format=self.image_format.name)
data = base64.b64encode(buffered.getvalue()).decode("utf-8")
prefix = f"data:image/{self.image_format.name.lower()};base64"
return f"{prefix},{data}"


def images_from_file_generator(image_path: Path):
if not image_path.exists():
raise GenAIPerfException(f"File not found: {image_path}")

image = Image.open(image_path)
while True:
yield image
mwawrzos marked this conversation as resolved.
Show resolved Hide resolved


def white_images_generator():
white_image = Image.new("RGB", (100, 100), color="white")
while True:
yield white_image
mwawrzos marked this conversation as resolved.
Show resolved Hide resolved


def build_synthetic_image_generator(
mwawrzos marked this conversation as resolved.
Show resolved Hide resolved
mean_size: Tuple[int, int],
dimensions_stddev: Tuple[int, int],
image_path: Optional[Path] = None,
image_format: ImageFormat = ImageFormat.PNG,
):
if image_path is None:
image_iterator = white_images_generator()
else:
image_path = cast(Path, image_path)
image_iterator = images_from_file_generator(image_path)

image_generator = SyntheticImageGenerator(
mean_size=mean_size,
dimensions_stddev=dimensions_stddev,
image_iterator=image_iterator,
)
base64_encode = RandomFormatBase64Encoder(image_format)
return (base64_encode(image) for image in image_generator)


class SyntheticImageGenerator:
def __init__(
self,
mean_size,
dimensions_stddev,
mwawrzos marked this conversation as resolved.
Show resolved Hide resolved
image_iterator,
):
self.image_iterator = image_iterator
self.mean_size = mean_size
self.dimensions_stddev = dimensions_stddev
mwawrzos marked this conversation as resolved.
Show resolved Hide resolved

def __iter__(self):
return self

def _sample_random_positive_pair(
self, mean: Tuple[int, int], stddev: Tuple[int, int]
) -> Tuple[int, int]:
new_size = np.array([-1, -1])
while any(int(dim) <= 0 for dim in new_size):
new_size = np.random.normal(self.mean_size, self.dimensions_stddev)
return tuple(new_size.astype(int))

def random_resize(self, image):
new_size = self._sample_random_positive_pair(
self.mean_size, self.dimensions_stddev
)
return image.resize(new_size)
mwawrzos marked this conversation as resolved.
Show resolved Hide resolved

def __next__(self):
image = next(self.image_iterator)
image = self.random_resize(image)
return image
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import base64
from io import BytesIO
from pathlib import Path

Check notice

Code scanning / CodeQL

Unused import Note test

Import of 'Path' is not used.
from unittest.mock import patch

Check notice

Code scanning / CodeQL

Unused import Note test

Import of 'patch' is not used.

import numpy as np
import pytest
from genai_perf.exceptions import GenAIPerfException

Check notice

Code scanning / CodeQL

Unused import Note test

Import of 'GenAIPerfException' is not used.
from genai_perf.llm_inputs.synthetic_image_generator import (
ImageFormat,
RandomFormatBase64Encoder,
SyntheticImageGenerator,
images_from_file_generator,
white_images_generator,
)
from PIL import Image


@pytest.mark.parametrize(
"image_size",
mwawrzos marked this conversation as resolved.
Show resolved Hide resolved
[
(100, 100),
(200, 200),
],
)
def test_different_image_size(image_size):
sut = SyntheticImageGenerator(
mean_size=image_size,
dimensions_stddev=[0, 0],
image_iterator=white_images_generator(),
)

image = next(sut)

assert isinstance(image, Image.Image), "generator produces unexpected type of data"
assert image.size == image_size, "image not resized to the target size"


def test_negative_size_is_not_selected():
sut = SyntheticImageGenerator(
mean_size=(-1, -1),
dimensions_stddev=[10, 10],
image_iterator=white_images_generator(),
)

# exception is raised, when PIL.Image.resize is called with negative values
next(sut)


@patch("pathlib.Path.exists", return_value=False)
def test_images_from_file_raises_when_file_not_found(mock_exists):
DUMMY_PATH = Path("dummy-image.png")
sut = images_from_file_generator(DUMMY_PATH)

with pytest.raises(GenAIPerfException):
next(sut)


DUMMY_IMAGE = Image.new("RGB", (100, 100), color="blue")


@patch("pathlib.Path.exists", return_value=True)
@patch(
"PIL.Image.open",
return_value=DUMMY_IMAGE,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return_value=DUMMY_IMAGE,
return_value=Image.new("RGB", (100, 100), color="blue"),

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm testing against the DUMMY_IMAGE in an assertion below, so I prefer to keep it named, but I moved the variable definition from the global to the local scope.

)
def test_images_from_file_generates_multiple_times(mock_file, mock_exists):
DUMMY_PATH = Path("dummy-image.png")
sut = images_from_file_generator(DUMMY_PATH)

image = next(sut)
mock_exists.assert_called_once()
mock_file.assert_called_once_with(DUMMY_PATH)
assert image == DUMMY_IMAGE, "unexpected image produced"

image = next(sut)
assert image == DUMMY_IMAGE, "unexpected image produced"


def test_white_images_generator():
sut = white_images_generator()

image = next(sut)
assert isinstance(image, Image.Image), "generator produces unexpected type of data"
white_pixel = np.array([[[255, 255, 255]]])
assert (np.array(image) == white_pixel).all(), "not all pixels are white"


@pytest.mark.parametrize("image_format", [ImageFormat.PNG, ImageFormat.JPEG])
def test_base64_encoding_with_different_formats(image_format):
image = Image.new("RGB", (100, 100))
sut = RandomFormatBase64Encoder(image_format=image_format)

base64String = sut(image)

base64prefix = f"data:image/{image_format.name.lower()};base64,"
assert base64String.startswith(base64prefix), "unexpected prefix"
data = base64String[len(base64prefix) :]

# test if generator encodes to base64
img_data = base64.b64decode(data)
img_bytes = BytesIO(img_data)
# test if an image is encoded
image = Image.open(img_bytes)

assert image.format == image_format.name
Loading