mirror of
https://github.com/ROCm/jax.git
synced 2025-04-16 11:56:07 +00:00
96 lines
3.5 KiB
YAML
96 lines
3.5 KiB
YAML
# Cloud TPU CI (presubmit)
|
|
#
|
|
# This job currently runs as a non-blocking presubmit. It is experimental and is currently being
|
|
# tested to get to a stable state before we enable it as a blocking presubmit.
|
|
name: CI - Cloud TPU (presubmit)
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
halt-for-connection:
|
|
description: 'Should this workflow run wait for a remote connection?'
|
|
type: choice
|
|
required: true
|
|
default: 'no'
|
|
options:
|
|
- 'yes'
|
|
- 'no'
|
|
pull_request:
|
|
branches:
|
|
- main
|
|
push:
|
|
branches:
|
|
- main
|
|
- 'release/**'
|
|
|
|
# This should also be set to read-only in the project settings, but it's nice to
|
|
# document and enforce the permissions here.
|
|
permissions:
|
|
contents: read
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
|
|
# Don't cancel in-progress jobs for main/release branches.
|
|
cancel-in-progress: ${{ !contains(github.ref, 'release/') && github.ref != 'main' }}
|
|
|
|
jobs:
|
|
cloud-tpu-test:
|
|
if: github.event.repository.fork == false
|
|
# Begin Presubmit Naming Check - name modification requires internal check to be updated
|
|
strategy:
|
|
fail-fast: false # don't cancel all jobs on failure
|
|
matrix:
|
|
tpu: [
|
|
{type: "v5e-8", cores: "8", runner: "linux-x86-ct5lp-224-8tpu"}
|
|
]
|
|
python-version: ["3.10"]
|
|
name: "TPU test (jaxlib=head, ${{ matrix.tpu.type }})"
|
|
# End Presubmit Naming Check github-tpu-presubmits
|
|
env:
|
|
JAXCI_PYTHON: python${{ matrix.python-version }}
|
|
JAXCI_TPU_CORES: ${{ matrix.tpu.cores }}
|
|
|
|
runs-on: ${{ matrix.tpu.runner }}
|
|
container: "us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest"
|
|
|
|
timeout-minutes: 60
|
|
|
|
defaults:
|
|
run:
|
|
shell: bash -ex {0}
|
|
steps:
|
|
# https://opensource.google/documentation/reference/github/services#actions
|
|
# mandates using a specific commit for non-Google actions. We use
|
|
# https://github.com/sethvargo/ratchet to pin specific versions.
|
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
# Checkout XLA at head, if we're building jaxlib at head.
|
|
- name: Checkout XLA at head
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
with:
|
|
repository: openxla/xla
|
|
path: xla
|
|
# We need to mark the GitHub workspace as safe as otherwise git commands will fail.
|
|
- name: Mark GitHub workspace as safe
|
|
run: |
|
|
git config --global --add safe.directory "$GITHUB_WORKSPACE"
|
|
- name: Install JAX test requirements
|
|
run: |
|
|
$JAXCI_PYTHON -m uv pip install -U -r build/test-requirements.txt -r build/collect-profile-requirements.txt
|
|
- name: Build jaxlib at head with latest XLA
|
|
run: |
|
|
# Build and install jaxlib at head
|
|
$JAXCI_PYTHON build/build.py build --wheels=jaxlib \
|
|
--python_version=${{ matrix.python-version }} \
|
|
--bazel_options=--config=rbe_linux_x86_64 \
|
|
--local_xla_path="$(pwd)/xla" \
|
|
--verbose
|
|
|
|
# Install libtpu
|
|
$JAXCI_PYTHON -m uv pip install --pre libtpu \
|
|
-f https://storage.googleapis.com/jax-releases/libtpu_releases.html
|
|
# Halt for testing
|
|
- name: Wait For Connection
|
|
uses: google-ml-infra/actions/ci_connection@main
|
|
with:
|
|
halt-dispatch-input: ${{ inputs.halt-for-connection }}
|
|
- name: Install jaxlib wheel and run tests
|
|
run: ./ci/run_pytest_tpu.sh |