mirror of
https://github.com/ROCm/jax.git
synced 2025-04-14 10:56:06 +00:00

This change also marks multiaccelerator test files in a way pytest can understand (if pytest is installed). By running single-device tests on a single TPU chip, running the test suite goes from 1hr 45m to 35m (both timings are running slow tests). I tried using bazel at first, which already supported parallel execution across TPU cores, but somehow it still takes 2h 20m! I'm not sure why it's so slow. It appears that bazel creates many new test processes over time, vs. pytest reuses the number of processes initially specified, and starting and stopping the TPU runtime takes a few seconds so that may be adding up. It also appears that single-process bazel is slower than single-process pytest, which I haven't looked into yet.
58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
# Copyright 2021 The JAX Authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# https://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Tests for cross host device transfer."""
|
|
|
|
from absl.testing import absltest
|
|
import contextlib
|
|
import unittest
|
|
import numpy as np
|
|
|
|
import jax
|
|
from jax._src import test_util as jtu
|
|
|
|
from jax.config import config
|
|
|
|
config.parse_flags_with_absl()
|
|
|
|
with contextlib.suppress(ImportError):
|
|
import pytest
|
|
pytestmark = pytest.mark.multiaccelerator
|
|
|
|
|
|
class RemoteTransferTest(jtu.JaxTestCase):
|
|
|
|
# TODO(jheek): this test crashes on multi-GPU.
|
|
@jtu.skip_on_devices("gpu")
|
|
def test_remote_transfer(self):
|
|
if jax.device_count() < 2:
|
|
raise unittest.SkipTest("Remote transfer requires at least 2 devices")
|
|
if config.jax_array:
|
|
raise unittest.SkipTest("Array does not have xla_shape method since "
|
|
"it is deprecated.")
|
|
dev_a, dev_b = jax.local_devices()[:2]
|
|
if "libtpu" in jax.local_devices()[0].client.platform_version:
|
|
raise unittest.SkipTest("Test does not yet work on cloud TPU")
|
|
send_buf = jax.device_put(np.ones((32,)), dev_a)
|
|
shapes = [send_buf.xla_shape()]
|
|
(tag, recv_buf), = dev_b.client.make_cross_host_receive_buffers(
|
|
shapes, dev_b)
|
|
status, dispatched = send_buf.copy_to_remote_device(tag)
|
|
self.assertIsNone(status)
|
|
self.assertTrue(dispatched)
|
|
self.assertArraysEqual(send_buf, recv_buf)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
absltest.main(testLoader=jtu.JaxTestLoader())
|