Skip to content

Enable AWSSDK on Linux by statically linking OpenSSL and cURL #421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 39 additions & 28 deletions .github/workflows/_build_test_upload.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
wheel_build_test:
needs: get_release_type
runs-on: ${{ matrix.os }}
# container: ${{ startsWith( matrix.os, 'ubuntu' ) && 'quay.io/pypa/manylinux2014_x86_64' || null }}
container: ${{ startsWith( matrix.os, 'ubuntu' ) && 'pytorch/manylinux-cpu' || null }}
strategy:
fail-fast: false
matrix:
Expand All @@ -69,7 +69,7 @@ jobs:
- "3.10"
steps:
- name: Setup Python ${{ matrix.python-version }}
# if: ${{ ! startsWith( matrix.os, 'ubuntu' ) }}
if: ${{ ! startsWith( matrix.os, 'ubuntu' ) }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
Expand All @@ -88,61 +88,72 @@ jobs:
arch: x64
- name: Install Build Dependency
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# yum -y install ninja-build
# yum -y install openssl-devel openssl-static curl-devel zlib-devel
# else
pip install cmake ninja
echo "/home/runner/.local/bin" >> $GITHUB_PATH
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
yum -y install ninja-build zlib-static
# Docker path is /__w by default
export WORKSPACE="/__w"
# Install static OpenSSL/libcrypto library
./packaging/manylinux/install_openssl_curl.sh
else
pip install cmake ninja
echo "/home/runner/.local/bin" >> $GITHUB_PATH
fi
- name: Install PyTorch and Build TorchData Wheel
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
PYTORCH_VERSION: ${{ inputs.pytorch_version }}
BUILD_S3: ${{ startsWith( matrix.os, 'ubuntu' ) && 'OFF' || 'ON' }}
BUILD_S3: 1
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# # See: https://github.com/actions/checkout/issues/760
# git config --global --add safe.directory /__w/data/data
# source packaging/manylinux_wheel_helper.sh
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
# See: https://github.com/actions/checkout/issues/760
git config --global --add safe.directory "$WORKSPACE/data/data"
# AWSSDK uses $CMAKE_PREFIX_PATH to find openssl
export OPENSSL_ROOT_DIR="/__w/ssl"
export CURL_ROOT_DIR="/__w/curl"
export CMAKE_PREFIX_PATH="$OPENSSL_ROOT_DIR:$CURL_ROOT_DIR:$CMAKE_PREFIX_PATH"
export STATIC_DEPS=TRUE
fi
packaging/build_wheel.sh
- name: Validate TorchData Wheel
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# source packaging/manylinux_wheel_helper.sh
# pip3 install auditwheel
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
pip3 install auditwheel
fi
pip3 install pkginfo
for pkg in dist/torchdata*.whl; do
echo "PkgInfo of $pkg:"
pkginfo $pkg
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# auditwheel show $pkg
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
auditwheel show $pkg
fi
done
- name: Install TorchData Wheel
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# source packaging/manylinux_wheel_helper.sh
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
fi
pip3 install dist/torchdata*.whl
- name: Run DataPipes Tests with pytest
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# source packaging/manylinux_wheel_helper.sh
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
fi
pip3 install -r test/requirements.txt
pytest --no-header -v test --ignore=test/test_period.py --ignore=test/test_text_examples.py --ignore=test/test_audio_examples.py
- name: Upload Wheels to Github
Expand Down
9 changes: 3 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ jobs:
- 3.7
- 3.8
- 3.9
with-s3:
- 1
- 0
steps:
- name: Setup additional system libraries
if: startsWith( matrix.os, 'ubuntu' )
Expand All @@ -44,10 +41,10 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- name: Setup msbuild on Windows
if: matrix.with-s3 == 1 && matrix.os == 'windows-latest'
if: matrix.os == 'windows-latest'
uses: microsoft/[email protected]
- name: Set up Visual Studio shell
if: matrix.with-s3 == 1 && matrix.os == 'windows-latest'
if: matrix.os == 'windows-latest'
uses: egor-tensin/vs-shell@v2
with:
arch: x64
Expand All @@ -65,7 +62,7 @@ jobs:
run: |
python setup.py install
env:
BUILD_S3: ${{ matrix.with-s3 }}
BUILD_S3: 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any situation where we do not enable BUILD_S3?

It looks like only when the setup is Windows with conda, why is that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For binary releases, it's just because there are too many Error that Idk how to solve with AWS enabled for conda on windows.

For CI testing, you can treat it as wheels, we could enable testing with awssdk.

Generally, the reason I think it makes sense to always enable AWSSDK in our testing CI is the >95% tests are the same with or without AWSSDK enabled.

- name: Install test requirements
run: pip3 install -r test/requirements.txt
- name: Run DataPipes tests with pytest
Expand Down
13 changes: 13 additions & 0 deletions packaging/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,16 @@ PYTHON_VERSION=3.9 PYTORCH_VERSION=1.11.0 packaging/build_wheel.sh
```bash
PYTHON_VERSION=3.9 packaging/build_wheel.sh
```
## [`AWSSDK`](https://github.com/aws/aws-sdk-cpp)

The following table is the corresponding `torchdata` binaries with pre-compiled `AWSSDK` extension on different operating systems.

| `torchdata` | `Wheel` | `Conda` |
| ------------------ | ------------------ | ------------------ |
| Linux | :heavy_check_mark: | :heavy_check_mark: |
| Windows | :heavy_check_mark: | :x: |
| MacOS | :heavy_check_mark: | :heavy_check_mark: |

### Manylinux

`AWSSDK` requires OpenSSL and cURL. In order to provide `manylinux2014_x86_64` wheels with `AWSSDK` enabled, `torchdata` distributions are bundled with OpenSSL(1.1.1o) and cURL(7.38.1). If anything is out of date, please open an issue to request upgrading them.
58 changes: 58 additions & 0 deletions packaging/manylinux/install_openssl_curl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

OPENSSL_URL="https://www.openssl.org/source/"
OPENSSL_NAME="openssl-1.1.1o"
OPENSSL_SHA256="9384a2b0570dd80358841464677115df785edb941c71211f75076d72fe6b438f"
OPENSSL_BUILD_FLAGS="no-ssl2 no-zlib no-shared no-comp no-dynamic-engine enable-ec_nistp_64_gcc_128"

CURL_URL="https://github.com/curl/curl/releases/download"
CURL_NAME="curl-7.83.1"
CURL_BUILD_FLAGS="--disable-shared"

function check_sha256sum {
local fname=$1
local sha256=$2
echo "${sha256} ${fname}" > ${fname}.sha256
sha256sum -c ${fname}.sha256
rm ${fname}.sha256
}

yum erase -y openssl-devel curl-devel

pushd ${WORKSPACE}

# OpenSSL
curl -fsSL -o ${OPENSSL_NAME}.tar.gz ${OPENSSL_URL}/${OPENSSL_NAME}.tar.gz
check_sha256sum ${OPENSSL_NAME}.tar.gz ${OPENSSL_SHA256}
tar zxf ${OPENSSL_NAME}.tar.gz

pushd ${OPENSSL_NAME}

./config $OPENSSL_BUILD_FLAGS --prefix=${WORKSPACE}/ssl --openssldir=${WORKSPACE}/ssl
make -j4 > /dev/null
# avoid installing the docs
# https://github.com/openssl/openssl/issues/6685#issuecomment-403838728
make install_sw > /dev/null

popd
rm -rf ${OPENSSL_NAME} ${OPENSSL_NAME}.tar.gz

# cURL
curl -fsSL -o ${CURL_NAME}.tar.gz ${CURL_URL}/${CURL_NAME//./_}/${CURL_NAME}.tar.gz
tar zxf ${CURL_NAME}.tar.gz

pushd ${CURL_NAME}

./configure ${CURL_BUILD_FLAGS} --with-openssl=${WORKSPACE}/ssl --prefix=${WORKSPACE}/curl
make -j4 > /dev/null
make install > /dev/null

popd
rm -rf ${CURL_NAME} ${CURL_NAME}.tar.gz

popd
5 changes: 2 additions & 3 deletions packaging/torchdata/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,8 @@ test:
# The following packages are not on the default conda channel
# - iopath
# - rarfile
# TODO: Re-enable it after #386 is landed
# commands:
# - pytest --no-header -v test --ignore=test/test_period.py --ignore=test/test_text_examples.py --ignore=test/test_audio_examples.py
commands:
- pytest --no-header -v test --ignore=test/test_period.py --ignore=test/test_text_examples.py --ignore=test/test_audio_examples.py

about:
home: https://github.com/pytorch/data
Expand Down
7 changes: 1 addition & 6 deletions third_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,8 @@ else()
endif()
endforeach()

if(WIN32)
list(APPEND AWSSDK_LIBRARIES "Userenv;version;ws2_32;Bcrypt;Wininet;winhttp;Crypt32;Secur32;NCrypt;Shlwapi")
elseif(APPLE)
list(APPEND AWSSDK_LIBRARIES "pthread;curl")
else()
if(UNIX AND NOT APPLE)
list(APPEND AWSSDK_LIBRARIES "${aws_cpp_sdk_INSTALL}/lib/libs2n.a")
list(APPEND AWSSDK_LIBRARIES "pthread;crypto;ssl;z;curl")
endif()

include(ExternalProject)
Expand Down
24 changes: 22 additions & 2 deletions torchdata/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,32 @@ if(BUILD_S3)
# see https://github.com/actions/setup-python/issues/121#issuecomment-1014500503
set(Python_FIND_FRAMEWORK "LAST")

if (WIN32)
if(WIN32)
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Interpreter Development)
set(ADDITIONAL_ITEMS Python3::Python)
else()
find_package(Python3 COMPONENTS Interpreter Development)
endif()

# AWSSDK Dependencies
if(WIN32)
set(AWSSDK_DEP_LIBRARIES "Userenv;version;ws2_32;Bcrypt;Wininet;winhttp;Crypt32;Secur32;NCrypt;Shlwapi")
elseif(APPLE)
set(AWSSDK_DEP_LIBRARIES pthread curl)
elseif(UNIX)
set(AWSSDK_DEP_LIBRARIES pthread)
if(STATIC_DEPS)
set(OPENSSL_USE_STATIC_LIBS TRUE)
endif()
include(FindZLIB)
list(APPEND AWSSDK_DEP_LIBRARIES ${ZLIB_LIBRARIES})
include(FindCURL)
list(APPEND AWSSDK_DEP_LIBRARIES ${CURL_LIBRARIES})
include(FindOpenSSL)
list(APPEND AWSSDK_DEP_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES})
endif()
message(STATUS "AWSSDK DEPENDENCIES AWSSDK_DEP_LIBRARIES: ${AWSSDK_DEP_LIBRARIES}")

set(CMAKE_POSITION_INDEPENDENT_CODE ON)

set(
Expand All @@ -44,9 +63,10 @@ if(BUILD_S3)
target_link_libraries(
_torchdata
PRIVATE
${AWSSDK_LIBRARIES}
${Python_LIBRARIES}
${ADDITIONAL_ITEMS}
${AWSSDK_LIBRARIES}
${AWSSDK_DEP_LIBRARIES}
)

set_target_properties(_torchdata PROPERTIES PREFIX "")
Expand Down