Merge pull request #13613 from ROCmSoftwarePlatform:rocm_rt_build

PiperOrigin-RevId: 510440289
This commit is contained in:
jax authors 2023-02-17 08:40:28 -08:00
commit edff87eb07
4 changed files with 114 additions and 21 deletions

80
build/rocm/Dockerfile.ms Normal file
View File

@ -0,0 +1,80 @@
################################################################################
FROM rocm/dev-ubuntu-20.04:5.4-complete as rt_build
MAINTAINER Rahul Batra<rahbatra@amd.com>
################################################################################
ARG ROCM_PATH=/opt/rocm-5.4.0
ARG DEBIAN_FRONTEND=noninteractive
ENV HOME /root/
ENV ROCM_PATH=$ROCM_PATH
RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \
software-properties-common \
clang-6.0 \
clang-format-6.0 \
curl \
g++-multilib \
git \
vim \
libnuma-dev \
virtualenv \
python3-pip \
pciutils \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Set up paths
ENV HCC_HOME=$ROCM_PATH/hcc
ENV HIP_PATH=$ROCM_PATH/hip
ENV OPENCL_ROOT=$ROCM_PATH/opencl
ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}"
ENV PATH="$ROCM_PATH/bin:${PATH}"
ENV PATH="$OPENCL_ROOT/bin:${PATH}"
# Add target file to help determine which device(s) to build for
RUN bash -c 'echo -e "gfx900\ngfx906\ngfx908\ngfx90a\ngfx1030" >> ${ROCM_PATH}/bin/target.lst'
# Need to explicitly create the $ROCM_PATH/.info/version file to workaround what seems to be a bazel bug
# The env vars being set via --action_env in .bazelrc and .tf_configure.bazelrc files are sometimes
# not getting set in the build command being spawned by bazel (in theory this should not happen)
# As a consequence ROCM_PATH is sometimes not set for the hipcc commands.
# When hipcc incokes hcc, it specifies $ROCM_PATH/.../include dirs via the `-isystem` options
# If ROCM_PATH is not set, it defaults to /opt/rocm, and as a consequence a dependency is generated on the
# header files included within `/opt/rocm`, which then leads to bazel dependency errors
# Explicitly creating the $ROCM_PATH/.info/version allows ROCM path to be set correrctly, even when ROCM_PATH
# is not explicitly set, and thus avoids the eventual bazel dependency error.
# The bazel bug needs to be root-caused and addressed, but that is out of our control and may take a long time
# to come to fruition, so implementing the workaround to make do till then
# Filed https://github.com/bazelbuild/bazel/issues/11163 for tracking this
RUN touch ${ROCM_PATH}/.info/version
ENV PATH="/root/bin:/root/.local/bin:$PATH"
# Install python3.9
RUN add-apt-repository ppa:deadsnakes/ppa && \
apt update && \
apt install -y python3.9-dev \
python3-pip \
python3.9-distutils \
python-is-python3
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
RUN pip3 install --upgrade --force-reinstall setuptools pip
RUN pip3 install absl-py numpy==1.20.0 scipy wheel six setuptools pytest pytest-rerunfailures matplotlib
# Get jax and build it with ROCm
RUN git clone https://github.com/google/jax.git
################################################################################
FROM rt_build as ci_build
################################################################################
WORKDIR /jax
RUN ./build/rocm/build_rocm.sh
RUN ./build/rocm/run_single_gpu.py
RUN ./build/rocm/run_multi_gpu.sh

View File

@ -1,23 +1,23 @@
# JAX Builds on ROCm
This directory contains files and setup instructions t0 build and test JAX for ROCm in Docker environment. You can build, test and run JAX on ROCm yourself!
This directory contains files and setup instructions to build and test JAX for ROCm in Docker environment (runtime and CI). You can build, test and run JAX on ROCm yourself!
***
### Build JAX-ROCm in docker
### Build JAX-ROCm in docker for the runtime
1. Install Docker: Follow the [instructions on the docker website](https://docs.docker.com/engine/installation/).
2. Build JAX by running the following command from JAX root folder.
2. Build a runtime JAX-ROCm docker container and keep this image by running the following command.
./build/rocm/ci_build.sh --keep_image bash -c "./build/rocm/build_rocm.sh"
./build/rocm/ci_build.sh --keep_image --runtime bash -c "./build/rocm/build_rocm.sh"
3. Launch a container: If the build was successful, there should be a docker image with name "jax_ci.rocm" in list of docker images (use "docker images" command to list them).
```
sudo docker run -it --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --entrypoint /bin/bash jax_ci.rocm:latest
```
3. To launch a JAX-ROCm container: If the build was successful, there should be a docker image with name "jax-rocm:latest" in list of docker images (use "docker images" command to list them).
```
sudo docker run -it --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --entrypoint /bin/bash jax-rocm:latest
```
***
### Build and Test JAX-ROCm in docker (suitable for CI jobs)
### Build and Test JAX-ROCm in docker for CI jobs
This folder has all the scripts necessary to build and run tests for JAX-ROCm.
The following command will build JAX on ROCm and run all the tests inside docker (script should be called from JAX root folder).
```
./build/rocm/ci_build.sh bash -c "./build/rocm/build_rocm.sh&&./build/rocm/run_single_gpu.py&&build/rocm/run_multi_gpu.sh"
./build/rocm/ci_build.sh
```

View File

@ -29,6 +29,7 @@ then
cd -
fi
python3 ./build/build.py --enable_rocm --rocm_path=${ROCM_PATH} --bazel_options=--override_repository=org_tensorflow=/tmp/tensorflow-upstream
pip3 install --force-reinstall dist/*.whl # installs jaxlib (includes XLA)
pip3 install --force-reinstall . # installs jax

View File

@ -30,11 +30,13 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "${SCRIPT_DIR}/build_common.sh"
CONTAINER_TYPE="rocm"
DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile.rocm"
DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile.ms"
DOCKER_CONTEXT_PATH="${SCRIPT_DIR}"
KEEP_IMAGE="--rm"
POSITIONAL_ARGS=()
RUNTIME_FLAG=0
while [[ $# -gt 0 ]]; do
case $1 in
--dockerfile)
@ -46,6 +48,10 @@ while [[ $# -gt 0 ]]; do
KEEP_IMAGE=""
shift 1
;;
--runtime)
RUNTIME_FLAG=1
shift 1
;;
*)
POSITIONAL_ARGS+=("$1")
shift
@ -67,13 +73,12 @@ function upsearch (){
cd .. && upsearch "$1"
}
# Set up WORKSPACE and BUILD_TAG. Jenkins will set them for you or we pick
# reasonable defaults if you run it outside of Jenkins.
# Set up WORKSPACE.
WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}"
BUILD_TAG="${BUILD_TAG:-jax_ci}"
BUILD_TAG="${BUILD_TAG:-jax}"
# Determine the docker image name
DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}"
# Determine the docker image name and BUILD_TAG.
DOCKER_IMG_NAME="${BUILD_TAG}_${CONTAINER_TYPE}"
# Under Jenkins matrix build, the build tag may contain characters such as
# commas (,) and equal signs (=), which are not valid inside docker image names.
@ -89,9 +94,15 @@ echo "BUILD_TAG: ${BUILD_TAG}"
echo " (docker container name will be ${DOCKER_IMG_NAME})"
echo ""
echo "Building container (${DOCKER_IMG_NAME})..."
docker build -t ${DOCKER_IMG_NAME} \
-f "${DOCKERFILE_PATH}" "${DOCKER_CONTEXT_PATH}"
if [[ "${RUNTIME_FLAG}" -eq 1 ]]; then
echo "Building (runtime) container (${DOCKER_IMG_NAME}) with Dockerfile($DOCKERFILE_PATH)..."
docker build --target rt_build --tag ${DOCKER_IMG_NAME} \
-f "${DOCKERFILE_PATH}" "${DOCKER_CONTEXT_PATH}"
else
echo "Building (CI) container (${DOCKER_IMG_NAME}) with Dockerfile($DOCKERFILE_PATH)..."
docker build --target ci_build --tag ${DOCKER_IMG_NAME} \
-f "${DOCKERFILE_PATH}" "${DOCKER_CONTEXT_PATH}"
fi
# Check docker build status
if [[ $? != "0" ]]; then
@ -115,7 +126,8 @@ if [[ "${KEEP_IMAGE}" != "--rm" ]] && [[ $? == "0" ]]; then
echo "Committing the docker container as jax-rocm"
docker stop ${DOCKER_IMG_NAME}
docker commit ${DOCKER_IMG_NAME} jax-rocm
docker rm ${DOCKER_IMG_NAME}
docker rm ${DOCKER_IMG_NAME} # remove this temp container
docker rmi ${DOCKER_IMG_NAME} # remote this temp image
fi
echo "ROCm build was successful!"
echo "Jax-ROCm build was successful!"