Skip to content

Commit 8a7fd64

Browse files
rraminendnikolaev-amd
authored andcommitted
CONSOLIDATED COMMITS: Centos stream9 PyTorch support
==================================================== [SOW MS3] Centos stream9 PyTorch image support (#1090) * changes to build Centos stream 9 images * Added scripts for centos and centos stream images * Added an extra line * Add ninja installation * Optimized code * Fixes * Add comment * Optimized code * Added AMDGPU mapping for ROCm 5.2 and invalid-url for rocm_baseurl Co-authored-by: Jithun Nair <[email protected]> Updated to latest conda for CentOS stream 9 [CS9] Updates to CentOS stream 9 build (#1326) - Add missing common_utils.sh - Update the install vision part - Move to amdgpu rhel 9.3 builds - Update to pick python from conda path - Add a missing package - Add ROCM_PATH and magma - Updated repo radeon path (cherry picked from commit 51ce1cc) [rocm6.4_internal_testing] Update missing changes for CentOS9 (#1813) To fix, https://ontrack-internal.amd.com/browse/SWDEV-505385 and https://ontrack-internal.amd.com/browse/SWDEV-507301 (cherry picked from commit 956c145) delete .ci/docker/common/install_db.sh
1 parent 1f8eb33 commit 8a7fd64

File tree

6 files changed

+211
-29
lines changed

6 files changed

+211
-29
lines changed

.ci/docker/build.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,12 @@ if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
460460
fi
461461
fi
462462

463+
if [[ "$image" == *centos9* ]]; then
464+
DOCKERFILE_NAME="Dockerfile.centos.stream"
465+
else
466+
DOCKERFILE_NAME="Dockerfile"
467+
fi
468+
463469
no_cache_flag=""
464470
progress_flag=""
465471
# Do not use cache and progress=plain when in CI
@@ -509,7 +515,7 @@ docker build \
509515
--build-arg "ACL=${ACL:-}" \
510516
--build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
511517
--build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
512-
-f $(dirname ${DOCKERFILE})/Dockerfile \
518+
-f $(dirname ${DOCKERFILE})/${DOCKERFILE_NAME} \
513519
-t "$tmp_tag" \
514520
"$@" \
515521
.
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
ARG CENTOS_VERSION
2+
3+
FROM quay.io/centos/centos:stream${CENTOS_VERSION}
4+
5+
6+
# Set AMD gpu targets to build for
7+
ARG PYTORCH_ROCM_ARCH
8+
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
9+
10+
# Install required packages to build Caffe2
11+
12+
# Install common dependencies (so that this step can be cached separately)
13+
ARG EC2
14+
COPY ./common/install_base.sh install_base.sh
15+
RUN bash ./install_base.sh && rm install_base.sh
16+
17+
#Install langpack
18+
RUN yum install -y glibc-langpack-en
19+
20+
# Update CentOS git version
21+
RUN yum -y remove git
22+
RUN yum -y remove git-*
23+
RUN yum install -y git
24+
25+
# Install devtoolset
26+
RUN dnf install -y rpmdevtools
27+
ENV BASH_ENV "/etc/profile"
28+
29+
# Install ninja
30+
RUN dnf --enablerepo=crb install -y ninja-build
31+
32+
# (optional) Install non-default glibc version
33+
ARG GLIBC_VERSION
34+
COPY ./common/install_glibc.sh install_glibc.sh
35+
RUN if [ -n "${GLIBC_VERSION}" ]; then bash ./install_glibc.sh; fi
36+
RUN rm install_glibc.sh
37+
38+
# Install user
39+
COPY ./common/install_user.sh install_user.sh
40+
RUN bash ./install_user.sh && rm install_user.sh
41+
42+
# Install conda and other packages (e.g., numpy, pytest)
43+
ARG ANACONDA_PYTHON_VERSION
44+
ARG CONDA_CMAKE
45+
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
46+
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
47+
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
48+
COPY ./common/install_conda.sh install_conda.sh
49+
COPY ./common/common_utils.sh common_utils.sh
50+
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
51+
52+
# (optional) Install protobuf for ONNX
53+
ARG PROTOBUF
54+
COPY ./common/install_protobuf.sh install_protobuf.sh
55+
RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
56+
RUN rm install_protobuf.sh
57+
ENV INSTALLED_PROTOBUF ${PROTOBUF}
58+
59+
# (optional) Install database packages like LMDB and LevelDB
60+
ARG DB
61+
COPY ./common/install_db.sh install_db.sh
62+
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
63+
RUN rm install_db.sh
64+
ENV INSTALLED_DB ${DB}
65+
66+
# (optional) Install vision packages like OpenCV and ffmpeg
67+
ARG VISION
68+
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
69+
RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
70+
RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
71+
ENV INSTALLED_VISION ${VISION}
72+
73+
# Install rocm
74+
ARG ROCM_VERSION
75+
COPY ./common/install_rocm.sh install_rocm.sh
76+
RUN bash ./install_rocm.sh
77+
RUN rm install_rocm.sh
78+
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
79+
RUN bash ./install_rocm_magma.sh
80+
RUN rm install_rocm_magma.sh
81+
82+
ENV ROCM_PATH /opt/rocm
83+
ENV PATH /opt/rocm/bin:$PATH
84+
ENV PATH /opt/rocm/hcc/bin:$PATH
85+
ENV PATH /opt/rocm/hip/bin:$PATH
86+
ENV PATH /opt/rocm/opencl/bin:$PATH
87+
ENV PATH /opt/rocm/llvm/bin:$PATH
88+
ENV MAGMA_HOME /opt/rocm/magma
89+
ENV LANG en_US.utf8
90+
ENV LC_ALL en_US.utf8
91+
92+
# (optional) Install non-default CMake version
93+
ARG CMAKE_VERSION
94+
COPY ./common/install_cmake.sh install_cmake.sh
95+
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
96+
RUN rm install_cmake.sh
97+
98+
# (optional) Install non-default Ninja version
99+
ARG NINJA_VERSION
100+
COPY ./common/install_ninja.sh install_ninja.sh
101+
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
102+
RUN rm install_ninja.sh
103+
104+
ARG TRITON
105+
# Install triton, this needs to be done before sccache because the latter will
106+
# try to reach out to S3, which docker build runners don't have access
107+
ENV CMAKE_C_COMPILER cc
108+
ENV CMAKE_CXX_COMPILER c++
109+
COPY ./common/install_triton.sh install_triton.sh
110+
COPY ./common/common_utils.sh common_utils.sh
111+
COPY ci_commit_pins/triton.txt triton.txt
112+
COPY triton_version.txt triton_version.txt
113+
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
114+
RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt
115+
116+
# Install ccache/sccache (do this last, so we get priority in PATH)
117+
COPY ./common/install_cache.sh install_cache.sh
118+
ENV PATH /opt/cache/bin:$PATH
119+
RUN bash ./install_cache.sh && rm install_cache.sh
120+
121+
# Include BUILD_ENVIRONMENT environment variable in image
122+
ARG BUILD_ENVIRONMENT
123+
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
124+
125+
USER jenkins
126+
CMD ["bash"]

.ci/docker/common/install_base.sh

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,21 @@ install_ubuntu() {
9595
install_centos() {
9696
# Need EPEL for many packages we depend on.
9797
# See http://fedoraproject.org/wiki/EPEL
98-
yum --enablerepo=extras install -y epel-release
98+
# extras repo is not there for CentOS 9 and epel-release is already part of repo list
99+
if [[ $OS_VERSION == 9 ]]; then
100+
yum install -y epel-release
101+
ALLOW_ERASE="--allowerasing"
102+
else
103+
yum --enablerepo=extras install -y epel-release
104+
ALLOW_ERASE=""
105+
fi
99106

100107
ccache_deps="asciidoc docbook-dtds docbook-style-xsl libxslt"
101108
numpy_deps="gcc-gfortran"
102109
# Note: protobuf-c-{compiler,devel} on CentOS are too old to be used
103110
# for Caffe2. That said, we still install them to make sure the build
104111
# system opts to build/use protoc and libprotobuf from third-party.
105-
yum install -y \
112+
yum install -y $ALLOW_ERASE \
106113
$ccache_deps \
107114
$numpy_deps \
108115
autoconf \
@@ -119,24 +126,34 @@ install_centos() {
119126
glibc-headers \
120127
glog-devel \
121128
libstdc++-devel \
122-
libsndfile-devel \
123129
make \
124-
opencv-devel \
125130
sudo \
126131
wget \
127132
vim \
128133
unzip \
129134
gdb
130135

136+
if [[ $OS_VERSION == 9 ]]
137+
then
138+
dnf --enablerepo=crb -y install libsndfile-devel
139+
yum install -y procps
140+
else
141+
yum install -y \
142+
opencv-devel \
143+
libsndfile-devel
144+
fi
145+
131146
# Cleanup
132147
yum clean all
133148
rm -rf /var/cache/yum
134149
rm -rf /var/lib/yum/yumdb
135150
rm -rf /var/lib/yum/history
136151
}
137152

138-
# Install base packages depending on the base OS
139153
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
154+
OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
155+
156+
# Install base packages depending on the base OS
140157
case "$ID" in
141158
ubuntu)
142159
install_ubuntu

.ci/docker/common/install_conda.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,13 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
4545

4646
# Prevent conda from updating to 4.14.0, which causes docker build failures
4747
# See https://hud.pytorch.org/pytorch/pytorch/commit/754d7f05b6841e555cea5a4b2c505dd9e0baec1d
48-
# Uncomment the below when resolved to track the latest conda update
49-
# as_jenkins conda update -y -n base conda
48+
# Uncomment the below when resolved to track the latest conda update,
49+
# but this is required for CentOS stream 9 builds
50+
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
51+
OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
52+
if [[ $ID == centos && $OS_VERSION == 9 ]]; then
53+
as_jenkins conda update -y -n base conda
54+
fi
5055

5156
if [[ $(uname -m) == "aarch64" ]]; then
5257
export SYSROOT_DEP="sysroot_linux-aarch64=2.17"

.ci/docker/common/install_rocm.sh

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -84,36 +84,56 @@ install_centos() {
8484
yum update -y
8585
yum install -y kmod
8686
yum install -y wget
87-
yum install -y openblas-devel
87+
88+
if [[ $OS_VERSION == 9 ]]; then
89+
dnf install -y openblas-serial
90+
dnf install -y dkms kernel-headers kernel-devel
91+
else
92+
yum install -y openblas-devel
93+
yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`
94+
fi
8895

8996
yum install -y epel-release
90-
yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`
9197

92-
# Add amdgpu repository
93-
local amdgpu_baseurl
98+
if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
99+
# Add amdgpu repository
100+
local amdgpu_baseurl
101+
if [[ $OS_VERSION == 9 ]]; then
102+
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/9.1/main/x86_64"
103+
else
104+
if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
105+
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
106+
else
107+
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
108+
fi
109+
fi
110+
echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
111+
echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
112+
echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
113+
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
114+
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
115+
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
116+
fi
117+
94118
if [[ $OS_VERSION == 9 ]]; then
95-
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.0/main/x86_64"
119+
local rocm_baseurl="invalid-url"
96120
else
97-
amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
121+
local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}/main"
98122
fi
99-
echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
100-
echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
101-
echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
102-
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
103-
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
104-
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
105-
106-
local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
107123
echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
108124
echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
109125
echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
110126
echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
111127
echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
112128
echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo
113129

114-
yum update -y
115-
116-
yum install -y \
130+
if [[ $OS_VERSION == 9 ]]; then
131+
yum update -y --nogpgcheck
132+
dnf --enablerepo=crb install -y perl-File-BaseDir python3-wheel
133+
yum install -y --nogpgcheck rocm-ml-sdk rocm-developer-tools
134+
else
135+
yum update -y
136+
yum install -y \
117137
rocm-dev \
118138
rocm-utils \
119139
rocm-libs \
@@ -144,6 +164,8 @@ install_centos() {
144164
rm -rf /var/lib/yum/history
145165
}
146166

167+
OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
168+
147169
# Install Python packages depending on the base OS
148170
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
149171
case "$ID" in

.ci/docker/common/install_vision.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,14 @@ install_ubuntu() {
1515
install_centos() {
1616
# Need EPEL for many packages we depend on.
1717
# See http://fedoraproject.org/wiki/EPEL
18-
yum --enablerepo=extras install -y epel-release
19-
20-
yum install -y \
21-
opencv-devel
18+
if [[ $OS_VERSION == 9 ]]; then
19+
yum install -y epel-release
20+
else
21+
yum --enablerepo=extras install -y epel-release
22+
yum install -y \
23+
opencv-devel \
24+
ffmpeg-devel
25+
fi
2226

2327
# Cleanup
2428
yum clean all
@@ -27,6 +31,8 @@ install_centos() {
2731
rm -rf /var/lib/yum/history
2832
}
2933

34+
OS_VERSION=$(grep -oP '(?<=^VERSION_ID=).+' /etc/os-release | tr -d '"')
35+
3036
# Install base packages depending on the base OS
3137
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
3238
case "$ID" in

0 commit comments

Comments
 (0)