Skip to content

Commit

Permalink
fix: parametize basic test, use rng for ramdom datasets for independe…
Browse files Browse the repository at this point in the history
…nt results, merge previous deleted gpu test to complex test
  • Loading branch information
yue.jiao committed Jan 6, 2025
1 parent 822e614 commit c6bf0bd
Showing 1 changed file with 199 additions and 126 deletions.
325 changes: 199 additions & 126 deletions sklearnex/manifold/tests/test_tsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,136 +48,209 @@ def test_sklearnex_tsne_import(dataframe, queue):
assert tsne.n_components == 2, "TSNE 'n_components' attribute is incorrect."


@pytest.mark.parametrize(
"description,X_generator,n_components,perplexity,expected_shape,should_raise",
[
(
"Test basic functionality",
lambda rng: np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]),
2,
2.0,
(4, 2),
False,
),
(
"Test with random data",
lambda rng: rng.random((100, 10)),
2,
30.0,
(100, 2),
False,
),
(
"Test reproducibility",
lambda rng: rng.random((50, 10)),
2,
5.0,
(50, 2),
False,
),
(
"Test large data",
lambda rng: rng.random((1000, 50)),
2,
50.0,
(1000, 2),
False,
),
(
"Test valid minimal data",
lambda rng: np.array([[0, 0], [1, 1], [2, 2]]),
2,
2.0,
(3, 2),
False,
),
(
"Edge case: constant data",
lambda rng: np.ones((10, 10)),
2,
5.0,
(10, 2),
False,
),
(
"Edge case: empty data",
lambda rng: np.empty((0, 10)),
2,
5.0,
None,
True,
),
(
"Edge case: data with NaN or infinite values",
lambda rng: np.array([[0, 0], [1, np.nan], [2, np.inf]]),
2,
5.0,
None,
True,
),
(
"Edge Case: Sparse-Like High-Dimensional Data",
lambda rng: rng.random((50, 500)) * (rng.random((50, 500)) > 0.99),
2,
30.0,
(50, 2),
False,
),
(
"Edge Case: Extremely Low Perplexity",
lambda rng: rng.random((10, 5)),
2,
0.5,
(10, 2),
False,
),
],
)
@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
def test_tsne_functionality_and_edge_cases(dataframe, queue, dtype):
def test_tsne_functionality_and_edge_cases(
description,
X_generator,
n_components,
perplexity,
expected_shape,
should_raise,
dataframe,
queue,
dtype,
):
"""
TSNE test covering basic functionality and edge cases using get_dataframes_and_queues.
TSNE test covering multiple functionality and edge cases using parameterization.
"""
# Test basic functionality
X_basic = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]], dtype=dtype)
X_basic_df = _convert_to_dataframe(X_basic, sycl_queue=queue, target_df=dataframe)
tsne_basic = TSNE(n_components=2, perplexity=2.0, random_state=42)
embedding_basic = tsne_basic.fit_transform(X_basic_df)
assert embedding_basic.shape == (4, 2)

# Test with random data
X_random = np.random.rand(100, 10).astype(dtype)
X_random_df = _convert_to_dataframe(X_random, sycl_queue=queue, target_df=dataframe)
tsne_random = TSNE(n_components=2, perplexity=30.0, random_state=42)
embedding_random = tsne_random.fit_transform(X_random_df)
assert embedding_random.shape == (100, 2)

# Test reproducibility
X_repro = np.random.rand(50, 10).astype(dtype)
X_repro_df = _convert_to_dataframe(X_repro, sycl_queue=queue, target_df=dataframe)
tsne_repro_1 = TSNE(n_components=2, random_state=42).fit_transform(X_repro_df)
tsne_repro_2 = TSNE(n_components=2, random_state=42).fit_transform(X_repro_df)
tsne_repro_1_np = _as_numpy(tsne_repro_1)
tsne_repro_2_np = _as_numpy(tsne_repro_2)
assert_allclose(tsne_repro_1_np, tsne_repro_2_np, rtol=1e-5)

# Test large data
X_large = np.random.rand(1000, 50).astype(dtype)
X_large_df = _convert_to_dataframe(X_large, sycl_queue=queue, target_df=dataframe)
tsne_large = TSNE(n_components=2, perplexity=50.0, random_state=42)
embedding_large = tsne_large.fit_transform(X_large_df)
assert embedding_large.shape == (1000, 2)

# Test valid minimal data
X_valid = np.array([[0, 0], [1, 1], [2, 2]], dtype=dtype)
X_valid_df = _convert_to_dataframe(X_valid, sycl_queue=queue, target_df=dataframe)
tsne_valid = TSNE(n_components=2, perplexity=2, random_state=42)
embedding_valid = tsne_valid.fit_transform(X_valid_df)
assert embedding_valid.shape == (3, 2)

# Edge case: constant data
X_constant = np.ones((10, 10), dtype=dtype)
X_constant_df = _convert_to_dataframe(
X_constant, sycl_queue=queue, target_df=dataframe
)
tsne_constant = TSNE(n_components=2, perplexity=5, random_state=42)
embedding_constant = tsne_constant.fit(X_constant_df).embedding_
assert embedding_constant.shape == (10, 2)

# Edge case: empty data
X_empty = np.empty((0, 10), dtype=dtype)
with pytest.raises(ValueError):
TSNE(n_components=2).fit(
_convert_to_dataframe(X_empty, sycl_queue=queue, target_df=dataframe)
)

# Edge case: data with NaN or infinite values
X_invalid = np.array([[0, 0], [1, np.nan], [2, np.inf]], dtype=dtype)
with pytest.raises(ValueError):
TSNE(n_components=2).fit(
_convert_to_dataframe(X_invalid, sycl_queue=queue, target_df=dataframe)
)

# Edge Case: Sparse-Like High-Dimensional Data
np.random.seed(42)
X_sparse_like = np.random.rand(50, 500).astype(dtype) * (
np.random.rand(50, 500) > 0.99
)
X_sparse_like_df = _convert_to_dataframe(
X_sparse_like, sycl_queue=queue, target_df=dataframe
)
try:
tsne = TSNE(n_components=2, perplexity=30.0)
tsne.fit(X_sparse_like_df)
except Exception as e:
pytest.fail(f"TSNE failed on sparse-like high-dimensional data: {e}")

# Edge Case: Extremely Low Perplexity
X_low_perplexity = np.random.rand(10, 5).astype(dtype)
X_low_perplexity_df = _convert_to_dataframe(
X_low_perplexity, sycl_queue=queue, target_df=dataframe
)
try:
tsne_low_perplexity = TSNE(n_components=2, perplexity=0.5)
tsne_low_perplexity.fit(X_low_perplexity_df)
except Exception as e:
pytest.fail(f"TSNE failed with low perplexity: {e}")
rng = np.random.default_rng(
seed=42
) # Use generator to ensure independent dataset per test
X = X_generator(rng)
X = X.astype(dtype) if X.size > 0 else X
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)

if should_raise:
with pytest.raises(ValueError):
TSNE(n_components=n_components, perplexity=perplexity).fit_transform(X_df)
else:
tsne = TSNE(n_components=n_components, perplexity=perplexity, random_state=42)
embedding = tsne.fit_transform(X_df)
assert (
embedding.shape == expected_shape
), f"{description}: Incorrect embedding shape."

@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
def test_tsne_with_specific_complex_dataset(dataframe, queue, dtype):
"""Test TSNE with a specific, highly diverse dataset."""
complex_array = np.array(
[
[0, 0, 0, 0],
[1, 1, 1, 1],
[-1e-9, 1e-9, -1e-9, 1e-9],
[-1e9, 1e9, -1e9, 1e9],
[1e-3, 1e3, -1e3, -1e-3],
[0, 1e9, -1e-9, 1],
[1, -1, 1, -1],
[42, 42, 42, 42],
[0, 0, 1, -1],
[-1e5, 0, 1e5, -1],
[2e9, 2e-9, -2e9, -2e-9],
[3, -3, 3e3, -3e-3],
[5e-5, 5e5, -5e-5, -5e5],
[1, 0, -1e8, 1e8],
[9e-7, -9e7, 9e-7, -9e7],
[4e-4, 4e4, -4e-4, -4e4],
[6e-6, -6e6, 6e6, -6e-6],
[8, -8, 8e8, -8e-8],
],
dtype=dtype,
)

complex_array_df = _convert_to_dataframe(
complex_array, sycl_queue=queue, target_df=dataframe
)

try:
tsne = TSNE(n_components=2, perplexity=5.0, random_state=42)
embedding = tsne.fit_transform(complex_array_df)
assert embedding.shape == (
complex_array.shape[0],

@pytest.mark.parametrize(
"description,X,n_components,perplexity,expected_shape,device_filter",
[
(
"Specific complex dataset (CPU/GPU)",
np.array(
[
[0, 0, 0, 0],
[1, 1, 1, 1],
[-1e-9, 1e-9, -1e-9, 1e-9],
[-1e9, 1e9, -1e9, 1e9],
[1e-3, 1e3, -1e3, -1e-3],
[0, 1e9, -1e-9, 1],
[1, -1, 1, -1],
[42, 42, 42, 42],
[0, 0, 1, -1],
[-1e5, 0, 1e5, -1],
[2e9, 2e-9, -2e9, -2e-9],
[3, -3, 3e3, -3e-3],
[5e-5, 5e5, -5e-5, -5e5],
[1, 0, -1e8, 1e8],
[9e-7, -9e7, 9e-7, -9e7],
[4e-4, 4e4, -4e-4, -4e4],
[6e-6, -6e6, 6e6, -6e-6],
[8, -8, 8e8, -8e-8],
]
),
2,
5.0,
(18, 2),
"cpu,gpu",
),
(
"GPU validation dataset",
np.array(
[
[0, 0, 0, 0],
[1, 1, 1, 1],
[-1e9, 1e9, -1e9, 1e9],
[1e-3, 1e3, -1e3, -1e-3],
[1, -1, 1, -1],
[0, 1e9, -1e-9, 1],
[-7e11, 7e11, -7e-11, 7e-11],
[4e-4, 4e4, -4e-4, -4e4],
[6e-6, -6e6, 6e6, -6e-6],
[0, 0, 0, 0],
[1, 1, 1, 1],
]
),
2,
), "TSNE embedding shape is incorrect."
except Exception as e:
pytest.fail(f"TSNE failed on the specific complex dataset: {e}")
3.0,
(11, 2),
"gpu",
),
],
)
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
def test_tsne_complex_and_gpu_validation(
description, X, n_components, perplexity, expected_shape, device_filter, dtype
):
"""
TSNE test covering specific complex datasets and GPU validation using parameterization.
"""
dataframes_and_queues = get_dataframes_and_queues(device_filter_=device_filter)
for param in dataframes_and_queues:
dataframe, queue = param.values
# Convert dataset to specified dtype
X = X.astype(dtype)
X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)

try:
tsne = TSNE(n_components=n_components, perplexity=perplexity, random_state=42)
embedding = tsne.fit_transform(X_df)

# Validate results
assert (
embedding.shape == expected_shape
), f"{description}: Incorrect embedding shape."
if device_filter == "gpu":
assert np.all(
np.isfinite(embedding)
), f"{description}: Embedding contains NaN or infinite values."
assert np.any(
embedding != 0
), f"{description}: Embedding contains only zeros."
except Exception as e:
pytest.fail(f"TSNE failed on {description}: {e}")

0 comments on commit c6bf0bd

Please sign in to comment.