Add core count to tpu nightly fix v5 job

The current job assumes a 4 core TPU.   Modify the matrix to enable defining the core count for each tpu
This commit is contained in:
Michael Hudgins 2024-05-15 13:46:40 -04:00 committed by GitHub
parent 4ccac4c6ce
commit 3015699966
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -25,12 +25,16 @@ jobs:
fail-fast: false # don't cancel all jobs on failure
matrix:
jaxlib-version: ["pypi_latest", "nightly", "nightly+oldest_supported_libtpu"]
tpu-type: ["v3-8", "v4-8", "v5e-4"]
name: "TPU test (jaxlib=${{ matrix.jaxlib-version }}, ${{ matrix.tpu-type }})"
tpu: [
{type: "v3-8", core: "4"},
{type: "v4-8", core: "4"},
{type: "v5e-8", core: "8"}
]
name: "TPU test (jaxlib=${{ matrix.jaxlib-version }}, ${{ matrix.tpu.type }})"
env:
LIBTPU_OLDEST_VERSION_DATE: 20240228
ENABLE_PJRT_COMPATIBILITY: ${{ matrix.jaxlib-version == 'nightly+oldest_supported_libtpu' }}
runs-on: ["self-hosted", "tpu", "${{ matrix.tpu-type }}"]
runs-on: ["self-hosted", "tpu", "${{ matrix.tpu.type }}"]
timeout-minutes: 120
defaults:
run:
@ -84,7 +88,7 @@ jobs:
PY_COLORS: 1
run: |
# Run single-accelerator tests in parallel
JAX_ENABLE_TPU_XDIST=true python3 -m pytest -n=4 --tb=short \
JAX_ENABLE_TPU_XDIST=true python3 -m pytest -n={{ matrix.tpu.core }} --tb=short \
--maxfail=20 -m "not multiaccelerator" tests examples
# Run multi-accelerator across all chips
python3 -m pytest --tb=short --maxfail=20 -m "multiaccelerator" tests
@ -95,5 +99,5 @@ jobs:
curl --location --request POST '${{ secrets.BUILD_CHAT_WEBHOOK }}' \
--header 'Content-Type: application/json' \
--data-raw "{
'text': '\"$GITHUB_WORKFLOW\", jaxlib/libtpu version \"${{ matrix.jaxlib-version }}\", TPU type ${{ matrix.tpu-type }} job failed, timed out, or was cancelled: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID'
'text': '\"$GITHUB_WORKFLOW\", jaxlib/libtpu version \"${{ matrix.jaxlib-version }}\", TPU type ${{ matrix.tpu.type }} job failed, timed out, or was cancelled: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID'
}"