Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-45295: [Python][CI] Make download_tzdata_on_windows more robust and use tzdata package for tzinfo database on Windows for ORC #45425

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
7 changes: 0 additions & 7 deletions ci/scripts/python_wheel_windows_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,5 @@ py -0p
@REM Validate wheel contents
%PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\repaired_wheels || exit /B 1

@rem Download IANA Timezone Database for ORC C++
curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
mkdir %USERPROFILE%\Downloads\test\tzdata
arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata || exit /B
set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo
dir %TZDIR%
amoeba marked this conversation as resolved.
Show resolved Hide resolved

@REM Execute unittest
%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1
10 changes: 10 additions & 0 deletions python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@
if tzdata_set_path:
set_timezone_db_path(tzdata_set_path)

# GH-45295: Try to populate TZDIR env var from tzdata package resource path
amoeba marked this conversation as resolved.
Show resolved Hide resolved
if os.environ.get('TZDIR', None) is None:
from importlib import resources
try:
os.environ['TZDIR'] = os.path.join(resources.files('tzdata'), 'zoneinfo')
except ModuleNotFoundError:
print(
'Package "tzdata" not found. Not setting TZDIR environment variable.'
)


def pytest_addoption(parser):
# Create options to selectively enable test groups
Expand Down
40 changes: 29 additions & 11 deletions python/pyarrow/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,20 @@ def _break_traceback_cycle_from_frame(frame):
refs = frame = this_frame = None


def download_urllib(url, out_path):
amoeba marked this conversation as resolved.
Show resolved Hide resolved
from urllib.request import urlopen
with urlopen(url) as response:
with open(out_path, 'wb') as f:
f.write(response.read())


def download_requests(url, out_path):
import requests
with requests.get(url) as response:
with open(out_path, 'wb') as f:
f.write(response.content)


def download_tzdata_on_windows():
r"""
Download and extract latest IANA timezone database into the
Expand All @@ -240,19 +254,23 @@ def download_tzdata_on_windows():

import tarfile

tzdata_url = "https://data.iana.org/time-zones/tzdata-latest.tar.gz"
tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
tzdata_compressed = os.path.join(tzdata_path, "tzdata.tar.gz")
tzdata_compressed_path = os.path.join(tzdata_path, "tzdata.tar.gz")
windows_zones_url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml" # noqa
windows_zones_path = os.path.join(tzdata_path, "windowsZones.xml")
os.makedirs(tzdata_path, exist_ok=True)

from urllib.request import urlopen
with urlopen('https://data.iana.org/time-zones/tzdata-latest.tar.gz') as response:
with open(tzdata_compressed, 'wb') as f:
f.write(response.read())

assert os.path.exists(tzdata_compressed)
# Try to download the files with requests and then fall back to urllib. This
# works around possible issues in certain older environment (GH-45295)
try:
download_requests(tzdata_url, tzdata_compressed_path)
download_requests(windows_zones_url, windows_zones_path)
except ImportError:
download_urllib(tzdata_url, tzdata_compressed_path)
download_urllib(windows_zones_url, windows_zones_path)

tarfile.open(tzdata_compressed).extractall(tzdata_path)
assert os.path.exists(tzdata_compressed_path)
assert os.path.exists(windows_zones_path)

with urlopen('https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml') as response_zones: # noqa
with open(os.path.join(tzdata_path, "windowsZones.xml"), 'wb') as f:
f.write(response_zones.read())
tarfile.open(tzdata_compressed_path).extractall(tzdata_path)
1 change: 1 addition & 0 deletions python/requirements-wheel-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ hypothesis
pytest
pytz
pyuwsgi; sys.platform != 'win32' and python_version < '3.13'
requests; sys_platform == 'win32'
tzdata; sys_platform == 'win32'

# We generally test with the oldest numpy version that supports a given Python
Expand Down
Loading