From 245913e964d41f9ab5ab0336252cc119fdcb610c Mon Sep 17 00:00:00 2001 From: ben0i0d Date: Sat, 9 Nov 2024 07:25:03 +0800 Subject: [PATCH] add rocm --- .gitea/workflows/Flow.yaml | 21 +++++++++++++++++++-- README.md | 16 ++++++++++++---- README_CN.md | 14 +++++++++++--- cuda/base/Dockerfile | 4 +++- rocm/.devcontainer/devcontainer.json | 19 +++++++++++++++++++ rocm/Dockerfile | 25 +++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 10 deletions(-) create mode 100644 rocm/.devcontainer/devcontainer.json create mode 100644 rocm/Dockerfile diff --git a/.gitea/workflows/Flow.yaml b/.gitea/workflows/Flow.yaml index 89a3a24..c483fad 100644 --- a/.gitea/workflows/Flow.yaml +++ b/.gitea/workflows/Flow.yaml @@ -12,6 +12,8 @@ jobs: uses: https://eoelab.org:1027/actions/build-push-action@v6 with: context: cuda/base + build-args: | + DEBIAN_MIRROR=mirrors.ustc.edu.cn tags: eoelab.org:1027/${{ gitea.repository }}:cuda_base CUDA_Runtime: @@ -32,15 +34,30 @@ jobs: steps: - name: checkout code uses: https://eoelab.org:1027/actions/checkout@v4 - - name: CUDA_Runtime build + - name: CUDA_Devel build uses: https://eoelab.org:1027/actions/build-push-action@v6 with: context: cuda/devel tags: eoelab.org:1027/${{ gitea.repository }}:cuda_devel + ROCM: + runs-on: runner + steps: + - name: checkout code + uses: https://eoelab.org:1027/actions/checkout@v4 + - name: ROCM build + uses: https://eoelab.org:1027/actions/build-push-action@v6 + with: + context: rocm + build-args: | + UBUNTU_MIRROR=mirrors.ustc.edu.cn + http_proxy=http://192.168.2.2:7890 + https_proxy=http://192.168.2.2:7890 + tags: eoelab.org:1027/${{ gitea.repository }}:rocm + Push: runs-on: runner - needs: [CUDA_Devel] + needs: [CUDA_Devel, ROCM] steps: - name: login gitea_registry uses: https://eoelab.org:1027/actions/login-action@v3 diff --git a/README.md b/README.md index abcc2ed..f1121e0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Denv +# gpu English | [中文](README_CN.md) **THIS doc for non-CN USER** @@ -7,17 +7,25 @@ English | [中文](README_CN.md) **Our Gitea: https://eoelab.org:1027/ben0i0d/gpu** +## Prerequisites +* ROCM : amdgpu-dkms -> Docker containers share the kernel with the host OS. Therefore, the ROCm kernel-mode driver (amdgpu-dkms) must be installed on the host. +* CUDA : nvidia-container-toolkit/nvidia-docker2 + ## Upstream * NVIDIA: https://gitlab.com/nvidia/container-images/cuda * ROCM: https://github.com/ROCm/ROCm-docker ## Platform -* OS : debian 12 +* OS : debian 12 / ubuntu 24.04 * ARCH : x86_64 ## Version * cuda: 12.4 * rocm: 6.2 -**Mirror source** -* apt ustc:https://mirrors.ustc.edu.cn/help/debian.html \ No newline at end of file +## Usage +* ROCM `docker run --device /dev/kfd --device /dev/dri --security-opt seccomp=unconfined IMAGE` + +## Mirror source +* debian ustc:https://mirrors.ustc.edu.cn/help/debian.html +* ubuntu ustc:https://mirrors.ustc.edu.cn/help/ubuntu.html \ No newline at end of file diff --git a/README_CN.md b/README_CN.md index 3a7554d..c58f453 100644 --- a/README_CN.md +++ b/README_CN.md @@ -1,4 +1,4 @@ -# nvidia-cuda-image +# gpu English | [中文](README_CN.md) **为中国用户的文档** @@ -7,19 +7,27 @@ English | [中文](README_CN.md) **Gitea: https://eoelab.org:1027/ben0i0d/gpu** +## 先决条件 +* ROCM : amdgpu-dkms -> Docker 容器与主机操作系统共享内核。因此,主机上必须安装 ROCm 内核模式驱动程序 ( amdgpu-dkms ) +* CUDA : nvidia-container-toolkit/nvidia-docker2 + ## 上游 * NVIDIA: https://gitlab.com/nvidia/container-images/cuda * ROCM: https://github.com/ROCm/ROCm-docker ## 目标平台 -* OS : debian 12(bookworm) +* OS : debian 12 / ubuntu 24.04 * ARCH : x86_64 ## 版本 * cuda: 12.4 * rocm: 6.2 +## 使用方法 +* ROCM `docker run --device /dev/kfd --device /dev/dri --security-opt seccomp=unconfined IMAGE` + ## 镜像源 -* apt ustc:https://mirrors.ustc.edu.cn/help/debian.html +* debian ustc:https://mirrors.ustc.edu.cn/help/debian.html +* ubuntu ustc:https://mirrors.ustc.edu.cn/help/ubuntu.html diff --git a/cuda/base/Dockerfile b/cuda/base/Dockerfile index 8a117da..cc81edc 100644 --- a/cuda/base/Dockerfile +++ b/cuda/base/Dockerfile @@ -1,5 +1,7 @@ FROM debian:bookworm-slim +ARG DEBIAN_MIRROR + ARG NV_CUDA_CUDART_VERSION=12.4.127-1 \ NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-4 \ NVARCH=x86_64 @@ -10,7 +12,7 @@ ENV NVIDIA_VISIBLE_DEVICES=all \ PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} \ LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH} -RUN sed -i 's/deb.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list.d/debian.sources && \ +RUN sed -i "s/deb.debian.org/${DEBIAN_MIRROR}/g" /etc/apt/sources.list.d/debian.sources && \ apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && \ curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${NVARCH}/cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb &&\ diff --git a/rocm/.devcontainer/devcontainer.json b/rocm/.devcontainer/devcontainer.json new file mode 100644 index 0000000..46d2e41 --- /dev/null +++ b/rocm/.devcontainer/devcontainer.json @@ -0,0 +1,19 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/debian +{ + "name": "rocm", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "eoelab.org:1027/ben0i0d/gpu:rocm", + "runArgs": ["--device","/dev/kfd", "--device","/dev/dri", "--security-opt", "seccomp=unconfined"] + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Configure tool-specific properties. + // "customizations": {}, + + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} \ No newline at end of file diff --git a/rocm/Dockerfile b/rocm/Dockerfile new file mode 100644 index 0000000..4e1046d --- /dev/null +++ b/rocm/Dockerfile @@ -0,0 +1,25 @@ +FROM ubuntu:24.04 + +ARG UBUNTU_MIRROR + +ARG VERSION=6.2.4 + +ENV PATH="${PATH}:/opt/rocm/bin" \ + DEBIAN_FRONTEND=noninteractive + + # switch to mirror +RUN sed -i "s@//.*archive.ubuntu.com@//${UBUNTU_MIRROR}@g" /etc/apt/sources.list.d/ubuntu.sources && \ + apt-get update && apt-get install -y --no-install-recommends ca-certificates wget gnupg2 && \ + # add ROCm GPG key + mkdir --parents --mode=0755 /etc/apt/keyrings && \ + wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \ + # add ROCm repository + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] http://repo.radeon.com/amdgpu/${VERSION}/ubuntu noble main" | tee /etc/apt/sources.list.d/amdgpu.list && \ + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] http://repo.radeon.com/rocm/apt/${VERSION} noble main" | tee --append /etc/apt/sources.list.d/rocm.list && \ + echo "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" | tee /etc/apt/preferences.d/rocm-pin-600 && \ + # Install ROCm-ml-libraries + apt-get update && apt-get install -y --no-install-recommends rocm-ml-libraries && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ + # Configure the system linker by indicating where to find the shared objects + echo "/opt/rocm/lib\n/opt/rocm/lib64" > /etc/ld.so.conf.d/rocm.conf +