Skip to content

Commit

Permalink
ARROW-10068: [C++] Add bundled external project for aws-sdk-cpp
Browse files Browse the repository at this point in the history
Closes apache#8304 from kszucs/awsforneal

Lead-authored-by: Neal Richardson <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Co-authored-by: Krisztián Szűcs <[email protected]>
Signed-off-by: Neal Richardson <[email protected]>
  • Loading branch information
3 people committed Oct 5, 2020
1 parent 72a0e96 commit 105873e
Show file tree
Hide file tree
Showing 33 changed files with 402 additions and 104 deletions.
29 changes: 19 additions & 10 deletions .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,23 +186,24 @@ jobs:
strategy:
fail-fast: false
env:
ARROW_BUILD_TESTS: ON
ARROW_DATASET: ON
ARROW_FLIGHT: ON
ARROW_GANDIVA: ON
ARROW_HDFS: ON
ARROW_HOME: /usr/local
ARROW_JEMALLOC: ON
# TODO(kszucs): link error in the tests
ARROW_DATASET: ON
ARROW_ORC: OFF
ARROW_FLIGHT: ON
ARROW_HDFS: ON
ARROW_PLASMA: ON
ARROW_GANDIVA: ON
ARROW_PARQUET: ON
ARROW_WITH_ZLIB: ON
ARROW_WITH_LZ4: ON
ARROW_PLASMA: ON
ARROW_S3: ON
ARROW_WITH_BROTLI: ON
ARROW_WITH_BZ2: ON
ARROW_WITH_ZSTD: ON
ARROW_WITH_LZ4: ON
ARROW_WITH_SNAPPY: ON
ARROW_WITH_BROTLI: ON
ARROW_BUILD_TESTS: ON
ARROW_WITH_ZLIB: ON
ARROW_WITH_ZSTD: ON
steps:
- name: Checkout Arrow
uses: actions/checkout@v2
Expand Down Expand Up @@ -371,6 +372,14 @@ jobs:
run: |
export CMAKE_BUILD_PARALLEL_LEVEL=$NUMBER_OF_PROCESSORS
ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build"
- name: Download MinIO
shell: msys2 {0}
run: |
mkdir -p /usr/local/bin
wget \
--output-document /usr/local/bin/minio.exe \
https://dl.min.io/server/minio/release/windows-amd64/minio.exe
chmod +x /usr/local/bin/minio.exe
- name: Test
shell: msys2 {0}
run: |
Expand Down
5 changes: 4 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,16 @@ jobs:
ARCH: s390x
ARROW_CI_MODULES: "CPP"
DOCKER_IMAGE_ID: ubuntu-cpp
# Can't use CMAKE_UNITIFY_BUILD=ON because of compiler crash
# Can't use CMAKE_UNITIFY_BUILD=ON because of compiler crash.
# Can't enable ARROW_S3 because compiler is killed while compiling
# aws-sdk-cpp.
DOCKER_RUN_ARGS: >-
"
-e ARROW_BUILD_STATIC=OFF
-e ARROW_FLIGHT=ON
-e ARROW_ORC=OFF
-e ARROW_PARQUET=OFF
-e ARROW_S3=OFF
-e PARQUET_BUILD_EXAMPLES=OFF
-e PARQUET_BUILD_EXECUTABLES=OFF
-e Protobuf_SOURCE=BUNDLED
Expand Down
10 changes: 9 additions & 1 deletion ci/docker/debian-10-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

ARG arch=amd64
FROM ${arch}/debian:10
ARG arch

ENV DEBIAN_FRONTEND noninteractive

Expand All @@ -26,7 +27,7 @@ RUN \

ARG llvm
RUN apt-get update -y -q && \
apt-get install -y -q --no-install-recommends \
apt-get install -y -q --no-install-recommends \
apt-transport-https \
ca-certificates \
gnupg \
Expand All @@ -49,6 +50,7 @@ RUN apt-get update -y -q && \
libbrotli-dev \
libbz2-dev \
libc-ares-dev \
libcurl4-openssl-dev \
libgflags-dev \
libgmock-dev \
libgoogle-glog-dev \
Expand All @@ -71,6 +73,10 @@ RUN apt-get update -y -q && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY ci/scripts/install_minio.sh \
/arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local

ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_DATASET=ON \
Expand All @@ -80,13 +86,15 @@ ENV ARROW_BUILD_TESTS=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_PLASMA=ON \
ARROW_S3=ON \
ARROW_USE_CCACHE=ON \
ARROW_WITH_BROTLI=ON \
ARROW_WITH_BZ2=ON \
ARROW_WITH_LZ4=ON \
ARROW_WITH_SNAPPY=ON \
ARROW_WITH_ZLIB=ON \
ARROW_WITH_ZSTD=ON \
AWSSDK_SOURCE=BUNDLED \
cares_SOURCE=BUNDLED \
CC=gcc \
CXX=g++ \
Expand Down
11 changes: 10 additions & 1 deletion ci/docker/fedora-32-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@

ARG arch
FROM ${arch}/fedora:32
ARG arch

# install dependencies
RUN dnf update -y && \
dnf install -y \
dnf install -y \
autoconf \
boost-devel \
brotli-devel \
Expand All @@ -29,6 +30,7 @@ RUN dnf update -y && \
ccache \
clang-devel \
cmake \
curl-devel \
flatbuffers-devel \
java-1.8.0-openjdk-devel \
java-1.8.0-openjdk-headless \
Expand All @@ -54,9 +56,14 @@ RUN dnf update -y && \
snappy-devel \
thrift-devel \
utf8proc-devel \
wget \
which \
zlib-devel

COPY ci/scripts/install_minio.sh \
/arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local

# * gRPC 1.26 in Fedora 32 may have a problem. arrow-flight-test is stuck.
ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
Expand All @@ -67,13 +74,15 @@ ENV ARROW_BUILD_TESTS=ON \
ARROW_HOME=/usr/local \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
ARROW_USE_CCACHE=ON \
ARROW_WITH_BROTLI=ON \
ARROW_WITH_BZ2=ON \
ARROW_WITH_LZ4=ON \
ARROW_WITH_SNAPPY=ON \
ARROW_WITH_ZLIB=ON \
ARROW_WITH_ZSTD=ON \
AWSSDK_SOURCE=BUNDLED \
CC=gcc \
CXX=g++ \
gRPC_SOURCE=BUNDLED \
Expand Down
6 changes: 6 additions & 0 deletions ci/docker/linux-apt-r.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

ARG base
FROM ${base}
ARG arch

# Build R
# [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04
Expand Down Expand Up @@ -70,6 +71,10 @@ COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
COPY r/DESCRIPTION /arrow/r/
RUN /arrow/ci/scripts/r_deps.sh /arrow

COPY ci/scripts/install_minio.sh \
/arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local

# Set up Python 3 and its dependencies
RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
ln -s /usr/bin/pip3 /usr/local/bin/pip
Expand All @@ -89,6 +94,7 @@ ENV \
ARROW_PARQUET=ON \
ARROW_PLASMA=OFF \
ARROW_PYTHON=ON \
ARROW_S3=ON \
ARROW_USE_CCACHE=ON \
ARROW_USE_GLOG=OFF \
LC_ALL=en_US.UTF-8
4 changes: 4 additions & 0 deletions ci/docker/linux-r.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@ FROM ${base}
ARG r_bin=R
ENV R_BIN=${r_bin}

ARG r_dev=FALSE
ENV ARROW_R_DEV=${r_dev}

# Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile)
ENV PATH "${RPREFIX}/bin:${PATH}"

# Patch up some of the docker images
COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/
COPY ci/etc/rprofile /arrow/ci/etc/
COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/r_docker_configure.sh

COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
Expand Down
3 changes: 3 additions & 0 deletions ci/docker/ubuntu-18.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ RUN apt-get update -y -q && \
libboost-system-dev \
libbrotli-dev \
libbz2-dev \
libcurl4-openssl-dev \
libgflags-dev \
libgoogle-glog-dev \
liblz4-dev \
Expand All @@ -96,6 +97,7 @@ RUN apt-get update -y -q && \
# - libgtest-dev only provide sources
# - libprotobuf-dev only provide sources
# - thrift is too old
# - s3 tests would require boost-asio that is included since Boost 1.66.0
ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_DATASET=ON \
Expand All @@ -117,6 +119,7 @@ ENV ARROW_BUILD_TESTS=ON \
ARROW_WITH_SNAPPY=ON \
ARROW_WITH_ZLIB=ON \
ARROW_WITH_ZSTD=ON \
AWSSDK_SOURCE=BUNDLED \
GTest_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PARQUET_BUILD_EXECUTABLES=ON \
Expand Down
11 changes: 10 additions & 1 deletion ci/docker/ubuntu-20.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

ARG base=amd64/ubuntu:20.04
FROM ${base}
ARG arch

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

Expand Down Expand Up @@ -57,6 +58,7 @@ RUN apt-get update -y -q && \
libbrotli-dev \
libbz2-dev \
libgflags-dev \
libcurl4-openssl-dev \
libgoogle-glog-dev \
liblz4-dev \
libprotobuf-dev \
Expand All @@ -72,10 +74,15 @@ RUN apt-get update -y -q && \
pkg-config \
protobuf-compiler \
rapidjson-dev \
tzdata && \
tzdata \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists*

COPY ci/scripts/install_minio.sh \
/arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local

# Prioritize system packages and local installation
# The following dependencies will be downloaded due to missing/invalid packages
# provided by the distribution:
Expand All @@ -85,6 +92,7 @@ RUN apt-get update -y -q && \
# - libprotobuf-dev only provide sources
ENV ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_S3=ON \
ARROW_DATASET=ON \
ARROW_FLIGHT=OFF \
ARROW_GANDIVA=ON \
Expand All @@ -104,6 +112,7 @@ ENV ARROW_BUILD_TESTS=ON \
ARROW_WITH_SNAPPY=ON \
ARROW_WITH_ZLIB=ON \
ARROW_WITH_ZSTD=ON \
AWSSDK_SOURCE=BUNDLED \
GTest_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PARQUET_BUILD_EXAMPLES=ON \
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ cmake -G "${CMAKE_GENERATOR:-Ninja}" \
-DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \
-DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \
-DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \
-DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \
-Dbenchmark_SOURCE=${benchmark_SOURCE:-} \
-DBOOST_SOURCE=${BOOST_SOURCE:-} \
-DBrotli_SOURCE=${Brotli_SOURCE:-} \
Expand Down
12 changes: 0 additions & 12 deletions ci/scripts/cpp_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,6 @@ case "$(uname)" in
exclude_tests="${exclude_tests}|gandiva-literal-test"
exclude_tests="${exclude_tests}|gandiva-null-validity-test"
fi
# TODO: Enable this when we can use aws-sdk-cpp as a shared
# library. The current aws-sdk-cpp MSYS2 package provides only
# static library. If we use aws-sdk-cpp as a static library, we
# can't use aws-sdk-cpp directly in
# cpp/src/arrow/filesystem/s3fs_test.c. Because aws-sdk-cpp uses
# static variables to keep process wide objects. If we aws-sdk-cpp
# as a static library, we have two aws-sdk-cpp libraries (in
# libarrow.dll and
# arrow-s3fs-test.exe). arrow::fs::EnsureS3Initialized() only
# initializes aws-sdk-cpp in libarrow.dll. It doesn't initialize
# aws-sdk-cpp in arrow-s3fs-test.exe.
exclude_tests="${exclude_tests}|arrow-s3fs-test"
ctest_options+=(--exclude-regex "${exclude_tests}")
;;
*)
Expand Down
14 changes: 8 additions & 6 deletions ci/scripts/install_minio.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
set -e

declare -A archs
archs=([amd64]=amd64)
archs=([amd64]=amd64
[arm64v8]=arm64
[s390x]=s390x)

declare -A platforms
platforms=([macos]=darwin
[linux]=linux)
platforms=([linux]=linux
[macos]=darwin)

arch=${archs[$1]}
platform=${platforms[$2]}
Expand All @@ -34,16 +36,16 @@ prefix=$4
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <architecture> <platform> <version> <prefix>"
exit 1
elif [[ -z ${archs[$1]} ]]; then
elif [[ -z ${arch} ]]; then
echo "Unexpected architecture: ${1}"
exit 1
elif [[ -z ${platforms[$2]} ]]; then
elif [[ -z ${platform} ]]; then
echo "Unexpected platform: ${2}"
exit 1
elif [[ ${version} != "latest" ]]; then
echo "Cannot fetch specific versions of minio, only latest is supported."
exit 1
fi

wget -nv -P ${prefix}/bin https://dl.min.io/server/minio/release/linux-${arch}/minio
wget -nv -P ${prefix}/bin https://dl.min.io/server/minio/release/${platform}-${arch}/minio
chmod +x ${prefix}/bin/minio
2 changes: 1 addition & 1 deletion ci/scripts/r_deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ source_dir=${1}/r
pushd ${source_dir}

# Install R package dependencies
${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck'))"
${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys'))"
${R_BIN} -e "remotes::install_deps(dependencies = TRUE)"

popd
19 changes: 19 additions & 0 deletions ci/scripts/r_docker_configure.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,25 @@ if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then
rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
fi

# Install openssl for S3 support
if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then
if [ "`which dnf`" ]; then
dnf install -y libcurl-devel openssl-devel
elif [ "`which yum`" ]; then
yum install -y libcurl-devel openssl-devel
elif [ "`which zypper`" ]; then
zypper install -y libcurl-devel libopenssl-devel
else
apt-get update
apt-get install -y libcurl4-openssl-dev libssl-dev
fi

# The Dockerfile should have put this file here
if [ -f "/arrow/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then
/arrow/ci/scripts/install_minio.sh amd64 linux latest /usr/local
fi
fi

# Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786
Rscript -e 'x <- file.path(R.home("doc"), "html"); if (!file.exists(x)) {dir.create(x, recursive=TRUE); file.copy(system.file("html/R.css", package="stats"), x)}'

Expand Down
Loading

0 comments on commit 105873e

Please sign in to comment.