Skip to content

Commit

Permalink
Add docker tests for fsspec
Browse files Browse the repository at this point in the history
  • Loading branch information
ssyssy committed Mar 15, 2024
1 parent 444f0e4 commit 12430af
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 2 deletions.
38 changes: 36 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
import time
from urllib.parse import urlparse

import fsspec
import pytest
import requests

from alluxiofs import AlluxioClient
from alluxiofs import AlluxioFileSystem

LOGGER = logging.getLogger("alluxio_test")
TEST_ROOT = os.getenv("TEST_ROOT", "file:///opt/alluxio/ufs/")
Expand Down Expand Up @@ -119,7 +121,7 @@ def stop_alluxio_dockers(with_etcd=False):
stop_docker(ETCD_CONTAINER)


@pytest.fixture(scope="module")
@pytest.fixture(scope="session")
def docker_alluxio():
if "ALLUXIO_URL" in os.environ:
# assume we already have a server already set up
Expand All @@ -130,7 +132,7 @@ def docker_alluxio():
stop_alluxio_dockers()


@pytest.fixture(scope="module")
@pytest.fixture(scope="session")
def docker_alluxio_with_etcd():
if "ALLUXIO_URL" in os.environ:
# assume we already have a server already set up
Expand Down Expand Up @@ -160,3 +162,35 @@ def etcd_alluxio_client(docker_alluxio_with_etcd):
host = parsed_url.hostname
etcd_alluxio_client = AlluxioClient(etcd_hosts=host)
yield etcd_alluxio_client


@pytest.fixture
def alluxio_file_system(docker_alluxio):
LOGGER.debug(f"get AlluxioFileSystem connect to {docker_alluxio}")
parsed_url = urlparse(docker_alluxio)
host = parsed_url.hostname
fsspec.register_implementation("alluxio", AlluxioFileSystem, clobber=True)
alluxio_file_system = fsspec.filesystem(
"alluxio",
worker_hosts=host,
target_protocol="file",
preload_path=ALLUXIO_FILE_PATH,
)
yield alluxio_file_system


@pytest.fixture
def etcd_alluxio_file_system(docker_alluxio_with_etcd):
LOGGER.debug(
f"get etcd AlluxioFileSystem connect to {docker_alluxio_with_etcd}"
)
parsed_url = urlparse(docker_alluxio_with_etcd)
host = parsed_url.hostname
fsspec.register_implementation("alluxio", AlluxioFileSystem, clobber=True)
etcd_alluxio_file_system = fsspec.filesystem(
"alluxio",
etcd_hosts=host,
target_protocol="file",
preload_path=ALLUXIO_FILE_PATH,
)
yield etcd_alluxio_file_system
9 changes: 9 additions & 0 deletions tests/fs/test_docker_alluxio_fsspec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from tests.conftest import TEST_ROOT


def test_simple_fsspec(alluxio_file_system):
alluxio_file_system.ls(TEST_ROOT) # no error


def test_simple_etcd_fsspec(etcd_alluxio_file_system):
etcd_alluxio_file_system.ls(TEST_ROOT) # no error
87 changes: 87 additions & 0 deletions tests/fs/test_docker_fsspec_cat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import os
import random

from alluxiofs import AlluxioFileSystem
from tests.conftest import ALLUXIO_FILE_PATH
from tests.conftest import LOCAL_FILE_PATH

NUM_TESTS = 10

import logging

LOGGER = logging.getLogger(__name__)


def validate_read_range(
alluxio_file_system: AlluxioFileSystem,
alluxio_file_path,
local_file_path,
offset,
length,
):
alluxio_data = alluxio_file_system.cat_file(
alluxio_file_path, offset, offset + length
)

with open(local_file_path, "rb") as local_file:
local_file.seek(offset)
local_data = local_file.read(length)

try:
assert alluxio_data == local_data
except AssertionError:
error_message = (
f"Data mismatch between Alluxio and local file\n"
f"Alluxio file path: {alluxio_file_path}\n"
f"Local file path: {local_file_path}\n"
f"Offset: {offset}\n"
f"Length: {length}\n"
f"Alluxio data: {alluxio_data}\n"
f"Local data: {local_data}"
)
raise AssertionError(error_message)


def test_alluxio_fsspec_cat_file(alluxio_file_system: AlluxioFileSystem):
file_size = os.path.getsize(LOCAL_FILE_PATH)

alluxio_file_system.ls(ALLUXIO_FILE_PATH)

# Validate normal case
max_length = 13 * 1024
for _ in range(NUM_TESTS):
offset = random.randint(0, file_size - 1)
length = min(random.randint(1, file_size - offset), max_length)
validate_read_range(
alluxio_file_system,
ALLUXIO_FILE_PATH,
LOCAL_FILE_PATH,
offset,
length,
)

LOGGER.debug(
f"Data matches between Alluxio file and local source file for {NUM_TESTS} times"
)

special_test_cases = [
(file_size - 1, -1),
(file_size - 1, file_size + 1),
(file_size, 100),
]

for offset, length in special_test_cases:
validate_read_range(
alluxio_file_system,
ALLUXIO_FILE_PATH,
LOCAL_FILE_PATH,
offset,
length,
)
LOGGER.debug("Passed corner test cases")


def test_etcd_alluxio_fsspec_cat_file(
etcd_alluxio_file_system: AlluxioFileSystem,
):
test_alluxio_fsspec_cat_file(etcd_alluxio_file_system)

0 comments on commit 12430af

Please sign in to comment.