1
0
mirror of https://github.com/ROCm/jax.git synced 2025-04-19 13:26:06 +00:00

[ROCm]: Dockerfile updates

This commit is contained in:
Rahul Batra 2023-12-15 21:13:20 +00:00
parent b512b576ae
commit b7a7f0bd80
5 changed files with 173 additions and 97 deletions

@ -1,107 +1,37 @@
################################################################################
FROM rocm/dev-ubuntu-20.04:5.4-complete as rt_build
MAINTAINER Rahul Batra<rahbatra@amd.com>
ARG BASE_DOCKER=ubuntu:20.04
FROM $BASE_DOCKER as rt_build
################################################################################
ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/5.6/
ARG ROCM_BUILD_NAME=ubuntu
ARG ROCM_BUILD_NUM=main
ARG ROCM_PATH=/opt/rocm-5.6.0
ARG DEBIAN_FRONTEND=noninteractive
ARG PYTHON_VERSION=3.9.0
ENV HOME /root/
ENV ROCM_PATH=$ROCM_PATH
# Add target file to help determine which device(s) to build for
ARG GPU_DEVICE_TARGETS="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
RUN apt-get --allow-unauthenticated update && apt install -y wget software-properties-common
RUN apt-get clean all
RUN wget -qO - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -;
RUN bin/bash -c 'if [[ $ROCM_DEB_REPO == http://repo.radeon.com/rocm/* ]] ; then \
echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \
else \
echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \
fi'
RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
software-properties-common \
clang-6.0 \
clang-format-6.0 \
curl \
g++-multilib \
git \
vim \
libnuma-dev \
virtualenv \
python3-pip \
pciutils \
python-is-python3 \
libffi-dev \
libssl-dev \
build-essential \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev curl \
libncursesw5-dev \
xz-utils \
tk-dev \
libxml2-dev \
libxmlsec1-dev \
libffi-dev \
liblzma-dev \
hipblaslt-dev \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Add to get ppa
RUN apt-get update
RUN apt-get install -y software-properties-common
# Install rocm pkgs
RUN apt-get update --allow-insecure-repositories && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
rocm-dev rocm-libs rccl && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install ROCM
ARG ROCM_VERSION=6.0.0
ARG CUSTOM_INSTALL
ARG ROCM_PATH=/opt/rocm-${ROCM_VERSION}
ENV ROCM_PATH=${ROCM_PATH}
COPY ${CUSTOM_INSTALL} /${CUSTOM_INSTALL}
COPY setup.rocm.sh /setup.rocm.sh
RUN /setup.rocm.sh $ROCM_VERSION
# Set up paths
ENV HCC_HOME=$ROCM_PATH/hcc
ENV HIP_PATH=$ROCM_PATH/hip
ENV HIP_PATH=$ROCM_PATH/
ENV OPENCL_ROOT=$ROCM_PATH/opencl
ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}"
ENV PATH="$ROCM_PATH/bin:${PATH}"
ENV PATH="$OPENCL_ROOT/bin:${PATH}"
# Add target file to help determine which device(s) to build for
RUN bash -c 'echo -e "gfx900\ngfx906\ngfx908\ngfx90a\ngfx1030" >> ${ROCM_PATH}/bin/target.lst'
# Need to explicitly create the $ROCM_PATH/.info/version file to workaround what seems to be a bazel bug
# The env vars being set via --action_env in .bazelrc and .tf_configure.bazelrc files are sometimes
# not getting set in the build command being spawned by bazel (in theory this should not happen)
# As a consequence ROCM_PATH is sometimes not set for the hipcc commands.
# When hipcc incokes hcc, it specifies $ROCM_PATH/.../include dirs via the `-isystem` options
# If ROCM_PATH is not set, it defaults to /opt/rocm, and as a consequence a dependency is generated on the
# header files included within `/opt/rocm`, which then leads to bazel dependency errors
# Explicitly creating the $ROCM_PATH/.info/version allows ROCM path to be set correrctly, even when ROCM_PATH
# is not explicitly set, and thus avoids the eventual bazel dependency error.
# The bazel bug needs to be root-caused and addressed, but that is out of our control and may take a long time
# to come to fruition, so implementing the workaround to make do till then
# Filed https://github.com/bazelbuild/bazel/issues/11163 for tracking this
RUN touch ${ROCM_PATH}/.info/version
ENV PATH="/root/bin:/root/.local/bin:$PATH"
# Install python3.9
# Install pyenv with different python versions
ARG PYTHON_VERSION=3.10.0
RUN git clone https://github.com/pyenv/pyenv.git /pyenv
ENV PYENV_ROOT /pyenv
ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
RUN pyenv install $PYTHON_VERSION
RUN eval "$(pyenv init -)" && pyenv local ${PYTHON_VERSION} && pip3 install --upgrade --force-reinstall setuptools pip && pip install numpy setuptools build wheel six auditwheel scipy pytest pytest-rerunfailures matplotlib absl-py
RUN eval "$(pyenv init -)" && pyenv local ${PYTHON_VERSION} && pip3 install --upgrade --force-reinstall setuptools pip && pip install numpy setuptools build wheel six auditwheel scipy pytest pytest-rerunfailures matplotlib absl-py flatbuffers hypothesis

@ -52,13 +52,15 @@ fi
#Export JAX_ROCM_VERSION so that it is appened in the wheel name
export JAXLIB_RELEASE=1
rocm_version=$(cat /opt/rocm/.info/version | cut -d "-" -f 1)
export JAX_ROCM_VERSION=${rocm_version//./}
#Build and install wheel
python3 ./build/build.py --enable_rocm --rocm_path=${ROCM_PATH} --bazel_options=--override_repository=xla=${XLA_CLONE_DIR}
JAX_RELEASE=1 python -m build
pip3 install --force-reinstall dist/*.whl # installs jaxlib (includes XLA)
pip3 install --force-reinstall . # installs jax
#This is for CI to read without having to start the container again
if [ -v CI_RUN ]; then

@ -26,7 +26,7 @@
#
# COMMAND: Command to be executed in the docker container
#
# ROCM_DEB_REPO_VERSION: ROCm debian repo version
# ROCM_VERSION: ROCm repo version
#
# ROCM_PATH: ROCM path in the docker container
#
@ -48,8 +48,13 @@ DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile.ms"
DOCKER_CONTEXT_PATH="${SCRIPT_DIR}"
KEEP_IMAGE="--rm"
KEEP_CONTAINER="--rm"
ROCM_DEB_REPO_VERSION="5.6" #default for now is 5.6
ROCM_PATH="/opt/rocm-5.6.0"
PYTHON_VERSION="3.10.0"
ROCM_VERSION="6.0.0" #Point to latest release
BASE_DOCKER="ubuntu:20.04"
CUSTOM_INSTALL=""
#BASE_DOCKER="compute-artifactory.amd.com:5000/rocm-plus-docker/compute-rocm-rel-6.0:91-ubuntu-20.04-stg2"
#CUSTOM_INSTALL="custom_install_dummy.sh"
#ROCM_PATH="/opt/rocm-5.6.0"
POSITIONAL_ARGS=()
RUNTIME_FLAG=1
@ -77,14 +82,14 @@ while [[ $# -gt 0 ]]; do
KEEP_CONTAINER=""
shift 1
;;
--rocm_deb_repo_version)
ROCM_DEB_REPO_VERSION="$2"
shift 2
;;
--rocm_path)
ROCM_PATH="$2"
--rocm_version)
ROCM_VERSION="$2"
shift 2
;;
#--rocm_path)
# ROCM_PATH="$2"
# shift 2
# ;;
*)
POSITIONAL_ARGS+=("$1")
@ -133,12 +138,15 @@ echo "Python Version (${PYTHON_VERSION})"
if [[ "${RUNTIME_FLAG}" -eq 1 ]]; then
echo "Building (runtime) container (${DOCKER_IMG_NAME}) with Dockerfile($DOCKERFILE_PATH)..."
docker build --target rt_build --tag ${DOCKER_IMG_NAME} \
--build-arg PYTHON_VERSION=$PYTHON_VERSION --build-arg ROCM_DEB_REPO="http://repo.radeon.com/rocm/apt/"$ROCM_DEB_REPO_VERSION --build-arg ROCM_PATH=$ROCM_PATH\
--build-arg PYTHON_VERSION=$PYTHON_VERSION --build-arg ROCM_VERSION=$ROCM_VERSION \
--build-arg CUSTOM_INSTALL=$CUSTOM_INSTALL \
--build-arg BASE_DOCKER=$BASE_DOCKER \
-f "${DOCKERFILE_PATH}" "${DOCKER_CONTEXT_PATH}"
else
echo "Building (CI) container (${DOCKER_IMG_NAME}) with Dockerfile($DOCKERFILE_PATH)..."
docker build --target ci_build --tag ${DOCKER_IMG_NAME} \
--build-arg PYTHON_VERSION=$PYTHON_VERSION \
--build-arg BASE_DOCKER=$BASE_DOCKER \
-f "${DOCKERFILE_PATH}" "${DOCKER_CONTEXT_PATH}"
fi

@ -0,0 +1,36 @@
chmod 1777 /tmp
DEBIAN_FRONTEND=noninteractive apt-get --allow-unauthenticated update
DEBIAN_FRONTEND=noninteractive apt install -y wget software-properties-common
DEBIAN_FRONTEND=noninteractive apt-get clean all
apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
software-properties-common \
clang-6.0 \
clang-format-6.0 \
curl \
g++-multilib \
git \
vim \
libnuma-dev \
virtualenv \
python3-pip \
pciutils \
python-is-python3 \
libffi-dev \
libssl-dev \
build-essential \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev curl \
libncursesw5-dev \
xz-utils \
tk-dev \
libxml2-dev \
libxmlsec1-dev \
libffi-dev \
liblzma-dev \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

100
build/rocm/setup.rocm.sh Executable file

@ -0,0 +1,100 @@
#!/usr/bin/env bash
#==============================================================================
#
# setup.rocm.sh: Prepare the ROCM installation on the container.
# Usage: setup.rocm.sh <ROCM_VERSION>
set -x
# Add the ROCm package repo location
ROCM_VERSION=$1 # e.g. 5.7.0
ROCM_PATH=${ROCM_PATH:-/opt/rocm-${ROCM_VERSION}}
ROCM_DEB_REPO_HOME=https://repo.radeon.com/rocm/apt/
ROCM_BUILD_NAME=ubuntu
ROCM_BUILD_NUM=main
# Adjust the ROCM repo location
# Intial release don't have the trialing '.0'
# For example ROCM 5.7.0 is at https://repo.radeon.com/rocm/apt/5.7/
if [ ${ROCM_VERSION##*[^0-9]} -eq '0' ]; then
ROCM_VERS=${ROCM_VERSION%.*}
else
ROCM_VERS=$ROCM_VERSION
fi
ROCM_DEB_REPO=${ROCM_DEB_REPO_HOME}${ROCM_VERS}/
if [ ! -f "/${CUSTOM_INSTALL}" ]; then
# Add rocm repository
chmod 1777 /tmp
DEBIAN_FRONTEND=noninteractive apt-get --allow-unauthenticated update
DEBIAN_FRONTEND=noninteractive apt install -y wget software-properties-common
DEBIAN_FRONTEND=noninteractive apt-get clean all
wget -qO - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -;
if [[ $ROCM_DEB_REPO == https://repo.radeon.com/rocm/* ]] ; then \
echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \
else \
echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \
fi
#Install rocm and other packages
apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
software-properties-common \
clang-6.0 \
clang-format-6.0 \
curl \
g++-multilib \
git \
vim \
libnuma-dev \
virtualenv \
python3-pip \
pciutils \
python-is-python3 \
libffi-dev \
libssl-dev \
build-essential \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev curl \
libncursesw5-dev \
xz-utils \
tk-dev \
libxml2-dev \
libxmlsec1-dev \
libffi-dev \
liblzma-dev \
wget \
rocm-dev \
rocm-libs \
miopen-hip \
miopen-hip-dev \
rocblas \
rocblas-dev \
rocsolver-dev \
rocrand-dev \
rocfft-dev \
hipfft-dev \
hipblas-dev \
rocprim-dev \
hipcub-dev \
rccl-dev \
hipsparse-dev \
hipsolver-dev \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
else
bash "/${CUSTOM_INSTALL}"
fi
echo $ROCM_VERSION
echo $ROCM_REPO
echo $ROCM_PATH
echo $GPU_DEVICE_TARGETS
# Ensure the ROCm target list is set up
GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100"}
printf '%s\n' ${GPU_DEVICE_TARGETS} | tee -a "$ROCM_PATH/bin/target.lst"
touch "${ROCM_PATH}/.info/version"