diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat index ffe8b388f93df..a686215b93dad 100755 --- a/ci/scripts/python_wheel_windows_test.bat +++ b/ci/scripts/python_wheel_windows_test.bat @@ -58,12 +58,5 @@ py -0p @REM Validate wheel contents %PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\repaired_wheels || exit /B 1 -@rem Download IANA Timezone Database for ORC C++ -curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B -mkdir %USERPROFILE%\Downloads\test\tzdata -arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata || exit /B -set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo -dir %TZDIR% - @REM Execute unittest %PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1 diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index 63518beebae95..53b54bb494da6 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -55,6 +55,22 @@ set_timezone_db_path(tzdata_set_path) +# GH-45295: For ORC, try to populate TZDIR env var from tzdata package resource +# path. +# +# Note this is a different kind of database than what we allow to be set by +# `PYARROW_TZDATA_PATH` and passed to set_timezone_db_path. +if sys.platform == 'win32': + if os.environ.get('TZDIR', None) is None: + from importlib import resources + try: + os.environ['TZDIR'] = os.path.join(resources.files('tzdata'), 'zoneinfo') + except ModuleNotFoundError: + print( + 'Package "tzdata" not found. Not setting TZDIR environment variable.' + ) + + def pytest_addoption(parser): # Create options to selectively enable test groups def bool_env(name, default=None): diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index 89780da10f7cf..5878d1f902627 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -230,6 +230,20 @@ def _break_traceback_cycle_from_frame(frame): refs = frame = this_frame = None +def _download_urllib(url, out_path): + from urllib.request import urlopen + with urlopen(url) as response: + with open(out_path, 'wb') as f: + f.write(response.read()) + + +def _download_requests(url, out_path): + import requests + with requests.get(url) as response: + with open(out_path, 'wb') as f: + f.write(response.content) + + def download_tzdata_on_windows(): r""" Download and extract latest IANA timezone database into the @@ -240,19 +254,23 @@ def download_tzdata_on_windows(): import tarfile + tzdata_url = "https://data.iana.org/time-zones/tzdata-latest.tar.gz" tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata") - tzdata_compressed = os.path.join(tzdata_path, "tzdata.tar.gz") + tzdata_compressed_path = os.path.join(tzdata_path, "tzdata.tar.gz") + windows_zones_url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml" # noqa + windows_zones_path = os.path.join(tzdata_path, "windowsZones.xml") os.makedirs(tzdata_path, exist_ok=True) - from urllib.request import urlopen - with urlopen('https://data.iana.org/time-zones/tzdata-latest.tar.gz') as response: - with open(tzdata_compressed, 'wb') as f: - f.write(response.read()) - - assert os.path.exists(tzdata_compressed) + # Try to download the files with requests and then fall back to urllib. This + # works around possible issues in certain older environment (GH-45295) + try: + _download_requests(tzdata_url, tzdata_compressed_path) + _download_requests(windows_zones_url, windows_zones_path) + except ImportError: + _download_urllib(tzdata_url, tzdata_compressed_path) + _download_urllib(windows_zones_url, windows_zones_path) - tarfile.open(tzdata_compressed).extractall(tzdata_path) + assert os.path.exists(tzdata_compressed_path) + assert os.path.exists(windows_zones_path) - with urlopen('https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml') as response_zones: # noqa - with open(os.path.join(tzdata_path, "windowsZones.xml"), 'wb') as f: - f.write(response_zones.read()) + tarfile.open(tzdata_compressed_path).extractall(tzdata_path) diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt index c051efb0aaab1..ad81586d37efa 100644 --- a/python/requirements-wheel-test.txt +++ b/python/requirements-wheel-test.txt @@ -4,6 +4,7 @@ hypothesis pytest pytz pyuwsgi; sys.platform != 'win32' and python_version < '3.13' +requests; sys_platform == 'win32' tzdata; sys_platform == 'win32' # We generally test with the oldest numpy version that supports a given Python