From 3175f13c5957317e15e1654bd48c1fd9957f0797 Mon Sep 17 00:00:00 2001 From: Mathew Odden Date: Thu, 25 Jul 2024 15:08:15 -0500 Subject: [PATCH] Add internal release support to get_rocm.py --- build/rocm/tools/get_rocm.py | 277 +++++++++++++++++++++++++++++++---- 1 file changed, 248 insertions(+), 29 deletions(-) diff --git a/build/rocm/tools/get_rocm.py b/build/rocm/tools/get_rocm.py index 4cc4a4682..60ecb7f01 100644 --- a/build/rocm/tools/get_rocm.py +++ b/build/rocm/tools/get_rocm.py @@ -7,14 +7,27 @@ import argparse +import json import logging +import os +import sys import subprocess +import urllib.request LOG = logging.getLogger(__name__) -def which_linux(): +def latest_rocm(): + dat = urllib.request.urlopen( + "https://api.github.com/repos/rocm/rocm/releases/latest" + ).read() + rd = json.loads(dat) + _, ver_str = rd["tag_name"].split("-") + return ver_str + + +def os_release_meta(): try: os_rel = open("/etc/os-release").read() @@ -25,33 +38,229 @@ def which_linux(): v = v.strip('"') kvs[k] = v - print(kvs) + return kvs except OSError: pass -rocm_package_names = [ - "libdrm-amdgpu", - "rocm-dev", - "rocm-ml-sdk", - "miopen-hip ", - "miopen-hip-devel", - "rocblas", - "rocblas-devel", - "rocsolver-devel", - "rocrand-devel", - "rocfft-devel", - "hipfft-devel", - "hipblas-devel", - "rocprim-devel", - "hipcub-devel", - "rccl-devel", - "hipsparse-devel", - "hipsolver-devel", -] +class System(object): + + def __init__(self, pkgbin, rocm_package_list): + self.pkgbin = pkgbin + self.rocm_package_list = rocm_package_list + + def install_packages(self, package_specs): + cmd = [ + self.pkgbin, + "install", + "-y", + ] + cmd.extend(package_specs) + + env = dict(os.environ) + if self.pkgbin == "apt": + env["DEBIAN_FRONTEND"] = "noninteractive" + + LOG.info("Running %r" % cmd) + subprocess.check_call(cmd, env=env) + + def install_rocm(self): + self.install_packages(self.rocm_package_list) -def install_rocm_el8(rocm_version_str): +UBUNTU = System( + pkgbin="apt", + rocm_package_list=[ + "rocm-dev", + "rocm-libs", + ], +) + + +RHEL8 = System( + pkgbin="dnf", + rocm_package_list=[ + "libdrm-amdgpu", + "rocm-dev", + "rocm-ml-sdk", + "miopen-hip ", + "miopen-hip-devel", + "rocblas", + "rocblas-devel", + "rocsolver-devel", + "rocrand-devel", + "rocfft-devel", + "hipfft-devel", + "hipblas-devel", + "rocprim-devel", + "hipcub-devel", + "rccl-devel", + "hipsparse-devel", + "hipsolver-devel", + ], +) + + +def get_system(): + md = os_release_meta() + + if md["ID"] == "ubuntu": + return UBUNTU + + if md["ID"] in ["almalinux", "rhel", "fedora", "centos"]: + if md["PLATFORM_ID"] == "platform:el8": + return RHEL8 + + raise Exception("No system for %r" % md) + + +def _setup_internal_repo(system, rocm_version, job_name, build_num): + # wget is required by amdgpu-repo + system.install_packages(["wget"]) + + install_amdgpu_installer_internal(rocm_version) + + amdgpu_build = ( + urllib.request.urlopen( + "http://rocm-ci.amd.com/job/%s/%s/artifact/amdgpu_kernel_info.txt" + % (job_name, build_num) + ) + .read() + .decode("utf8") + .strip() + ) + + cmd = [ + "amdgpu-repo", + "--amdgpu-build=%s" % amdgpu_build, + "--rocm-build=%s/%s" % (job_name, build_num), + ] + LOG.info("Running %r" % cmd) + subprocess.check_call(cmd) + + cmd = [ + "amdgpu-install", + "--no-dkms", + "--usecase=rocm", + "-y", + ] + + env = dict(os.environ) + if system.pkgbin == "apt": + env["DEBIAN_FRONTEND"] = "noninteractive" + + LOG.info("Running %r" % cmd) + subprocess.check_call(cmd, env=env) + + +def install_rocm(rocm_version, job_name=None, build_num=None): + s = get_system() + + if job_name and build_num: + _setup_internal_repo(s, rocm_version, job_name, build_num) + else: + if s == RHEL8: + setup_repos_el8(rocm_version) + elif s == UBUNTU: + setup_repos_ubuntu(rocm_version) + else: + raise Exception("Platform not supported") + + s.install_rocm() + + +def install_amdgpu_installer_internal(rocm_version): + """ + Download and install the "amdgpu-installer" package from internal builds + on the current system. + """ + md = os_release_meta() + url, fn = _build_installer_url(rocm_version, md) + + try: + # download installer + LOG.info("Downloading from %s", url) + urllib.request.urlretrieve(url, filename=fn) + + system = get_system() + + cmd = [system.pkgbin, "install", "-y", "./%s" % fn] + subprocess.check_call(cmd) + finally: + try: + os.remove(fn) + except FileNotFoundError: + pass + + +def _build_installer_url(rocm_version, metadata): + md = metadata + + if isinstance(rocm_version, str): + parts = rocm_version.split(".") + rv = type("Version", (), {})() + rv.major = parts[0] + rv.minor = parts[1] + + if len(parts) > 2: + rv.rev = parts[2] + else: + rv = rocm_version + + base_url = "http://artifactory-cdn.amd.com/artifactory/list" + + if md["ID"] == "ubuntu": + fmt = "amdgpu-install-internal_%(rocm_major)s.%(rocm_minor)s-%(os_version)s-1_all.deb" + package_name = fmt % { + "rocm_major": rv.major, + "rocm_minor": rv.minor, + "os_version": md["VERSION_ID"], + } + + url = "%s/amdgpu-deb/%s" % (base_url, package_name) + elif md.get("PLATFORM_ID") == "platform:el8": + fmt = "amdgpu-install-internal-%(rocm_major)s.%(rocm_minor)s_%(os_version)s-1.noarch.rpm" + package_name = fmt % { + "rocm_major": rv.major, + "rocm_minor": rv.minor, + "os_version": "8", + } + + url = "%s/amdgpu-rpm/rhel/%s" % (base_url, package_name) + else: + raise Exception("Platform not supported: %r" % md) + + return url, package_name + + +def setup_repos_ubuntu(rocm_version_str): + + s = get_system() + s.install_packages(["wget", "sudo", "gnupg"]) + + md = os_release_meta() + codename = md["VERSION_CODENAME"] + + keyadd = "wget -qO - https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -" + subprocess.check_call(keyadd, shell=True) + + with open("/etc/apt/sources.list.d/amdgpu.list", "w") as fd: + fd.write( + ("deb [arch=amd64] " "https://repo.radeon.com/amdgpu/%s/ubuntu %s main\n") + % (rocm_version_str, codename) + ) + + with open("/etc/apt/sources.list.d/rocm.list", "w") as fd: + fd.write( + ("deb [arch=amd64] " "https://repo.radeon.com/rocm/apt/%s %s main\n") + % (rocm_version_str, codename) + ) + + # update indexes + subprocess.check_call(["apt-get", "update"]) + + +def setup_repos_el8(rocm_version_str): with open("/etc/yum.repos.d/rocm.repo", "w") as rfd: rfd.write( @@ -78,21 +287,31 @@ gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key """ ) - cmd = ["dnf", "install", "-y"] - cmd.extend(rocm_package_names) - LOG.info("Running %r" % cmd) - subprocess.run(cmd, check=True) - def parse_args(): p = argparse.ArgumentParser() - p.add_argument("--rocm-version", help="ROCm version to install", default="6.1.1") + p.add_argument("--rocm-version", help="ROCm version to install", default="latest") + p.add_argument("--job-name", default=None) + p.add_argument("--build-num", default=None) return p.parse_args() def main(): args = parse_args() - install_rocm_el8(args.rocm_version) + if args.rocm_version == "latest": + try: + rocm_version = latest_rocm() + print("Latest ROCm release: %s" % rocm_version) + except Exception: + print( + "Latest ROCm lookup failed. Please use '--rocm-version' to specify a version instead.", + file=sys.stderr, + ) + sys.exit(-1) + else: + rocm_version = args.rocm_version + + install_rocm(rocm_version, job_name=args.job_name, build_num=args.build_num) if __name__ == "__main__":