rocm_jax/tests/transfer_guard_test.py
Hyeontaek Lim beaa00c460 Implement the JAX transfer guard API
Adds `--jax_transfer_guard` flag and `jax.transfer_guard()` context manager that allows logging or disallowing unintended transfers.

The API distinguishes between two types of transfers:
* explicit transfers: `jax.device_put*()` and `jax.device_get()` calls.
* implicit transfers: Other transfers (e.g., printing a `DeviceArray`).

The transfer guard can take an action based on its guard level:

* "allow": Silently allow all transfers (default; same as the previous behavior).
* "log": Log and allow implicit transfers. Silently allow explicit transfers.
* "disallow": Disallow implicit transfers. Silently allow explicit transfers.
* "log_explicit": Log and allow all transfers.
* "disallow_explicit": Disallow all transfers.

The API also allows fine-control the transfer guard level of individual transfer directions. Their flag and context manager names are suffixed with the transfer direction:

* "host_to_device": Converting a Python value into a `DeviceBuffer`.
* "device_to_device": Copying a `DeviceBuffer` to a different device.
* "device_to_host": Fetching the value of a `DeviceBuffer`.

Example:
```
x = jnp.array(1)
y = jnp.array(2)
z = jnp.array(3)

print(x)  # No error
with jax.transfer_guard("disallow"):
  print(x)  # No error; x is already fetched
  print(jax.device_get(y))  # No error
  print(z)  # Error!
```

PiperOrigin-RevId: 428590081
2022-02-14 13:11:49 -08:00

250 lines
8.7 KiB
Python

# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for transfer guards."""
import contextlib
import pickle
from absl.testing import absltest
from absl.testing import parameterized
import numpy as np
import jax
import jax._src.test_util as jtu
import jax.numpy as jnp
from jax.config import config
config.parse_flags_with_absl()
def _host_to_device_funcs():
"""Generates host-to-device transfer functions."""
return [
# (function name, is an explicit transfer?, function)
("host_to_device_jax_device_put", True,
lambda: jax.device_put(np.ones(10))),
("host_to_device_jax_jit", False, lambda: jax.jit(lambda x: x)
(np.ones(1))),
("host_to_device_jnp_one", False, lambda: jnp.ones(1)),
]
def _device_to_device_funcs():
"""Generates device-to-device transfer functions."""
if len(jax.local_devices()) < 2:
# device-to-device tests require at least 2 devices.
return []
with jax.transfer_guard_host_to_device("allow"):
device_arrays = [jnp.ones(1) for _ in range(2)]
return [
# (function name, is an explicit transfer?, function)
("device_to_device_jax_device_put", True,
lambda: jax.device_put(device_arrays[0], device=jax.local_devices()[1])),
("device_to_device_jax_jit", False,
lambda: jax.jit(lambda x: x, device=jax.local_devices()[1])
(device_arrays[1])),
]
def _device_to_host_funcs():
"""Generates device-to-host transfer functions."""
if jax.default_backend() == "cpu":
# device-to-host does not incur transfer on the CPU backend.
return []
with jax.transfer_guard_host_to_device("allow"):
device_arrays = [jnp.ones(1) for _ in range(6)]
return [
# (function name, is an explicit transfer?, function)
("device_to_host_jax_device_get", True,
lambda: jax.device_get(device_arrays[0])),
("device_to_host_np_asarray", False,
lambda: np.asarray(device_arrays[1])),
("device_to_host_copy_to_host_async", False,
lambda: device_arrays[2].copy_to_host_async()),
("device_to_host_np_add", False, lambda: np.add(device_arrays[3], 1)),
("device_to_host_str", False, lambda: str(device_arrays[4])),
("device_to_host_pickle_dumps", False,
lambda: pickle.dumps(device_arrays[5])),
]
def _all_funcs():
"""Generates all transfer functions."""
return (_host_to_device_funcs() + _device_to_device_funcs() +
_device_to_host_funcs())
# List of test parameters shared by multiple tests.
_COMMON_TEST_PARAMETERS = [
("host_to_device", _host_to_device_funcs,
jax.transfer_guard_host_to_device),
("device_to_device", _device_to_device_funcs,
jax.transfer_guard_device_to_device),
("device_to_host", _device_to_host_funcs,
jax.transfer_guard_device_to_host),
("all", _all_funcs, jax.transfer_guard),
]
if jax._src.lib.xla_extension_version < 58:
class TransferGuardTest(jtu.JaxTestCase):
pass
else:
class TransferGuardTest(jtu.JaxTestCase):
# `_default_config` is used by `jtu.JaxTestCase` to update the JAX config
# for every test case. TransferGuardTest disables `--jax_enable_checks`
# because it can prematurely fetch the value of device arrays and make
# device-to-host tests to incur no transfers unexpectedly.
_default_config = {"jax_enable_checks": False}
@contextlib.contextmanager
def assertAllows(self, func_name):
"""Asserts that a transfer in the context is allowed."""
try:
yield
except Exception as e: # pylint: disable=broad-except
raise RuntimeError(
f"Expected a transfer to be allowed while running: {func_name}"
) from e
@contextlib.contextmanager
def assertLogs(self, func_name):
"""Asserts that a transfer in the context is logged and allowed."""
# Only check if the transfer is allowed until Abseil provides an
# interface to capture logs.
with self.assertAllows(func_name):
yield
@contextlib.contextmanager
def assertDisallows(self, func_name):
"""Asserts that a transfer in the context is disallowed."""
try:
with self.assertRaises(Exception):
yield
except Exception as e: # pylint: disable=broad-except
raise RuntimeError(
f"Expected a transfer to be disallowed while running: {func_name}"
) from e
def test_simple(self):
"""Simple transfer guard tests."""
with jax.transfer_guard("allow"):
with self.assertAllows("host_to_device_jnp_ones"):
jnp.ones(1)
with jax.transfer_guard("log"):
with self.assertLogs("host_to_device_jnp_ones"):
jnp.ones(1)
with jax.transfer_guard("disallow"):
with self.assertDisallows("host_to_device_jnp_ones"):
jnp.ones(1)
def test_nesting(self):
with jax.transfer_guard("disallow"):
with jax.transfer_guard("allow"):
with self.assertAllows("host_to_device_jnp_ones"):
jnp.ones(1)
with self.assertDisallows("host_to_device_jnp_ones"):
jnp.ones(1)
def test_mixed_nesting(self):
with jax.transfer_guard_host_to_device("disallow"):
with jax.transfer_guard("allow"):
with self.assertAllows("host_to_device_jnp_ones"):
jnp.ones(1)
with self.assertDisallows("host_to_device_jnp_ones"):
jnp.ones(1)
with jax.transfer_guard("disallow"):
with jax.transfer_guard_host_to_device("allow"):
with self.assertAllows("host_to_device_jnp_ones"):
jnp.ones(1)
with self.assertDisallows("host_to_device_jnp_ones"):
jnp.ones(1)
@parameterized.named_parameters(*_COMMON_TEST_PARAMETERS)
def test_allow_by_default(self, func_generator, _):
for func_name, _, func in func_generator():
with self.assertAllows(func_name):
func()
@parameterized.named_parameters(*_COMMON_TEST_PARAMETERS)
def test_allow(self, func_generator, jax_transfer_guard):
for func_name, _, func in func_generator():
with jax_transfer_guard("allow"):
with self.assertAllows(func_name):
func()
@parameterized.named_parameters(*_COMMON_TEST_PARAMETERS)
def test_log(self, func_generator, jax_transfer_guard):
for func_name, explicit, func in func_generator():
with jax_transfer_guard("log"):
if explicit:
with self.assertAllows(func_name):
func()
else:
with self.assertLogs(func_name):
func()
@parameterized.named_parameters(*_COMMON_TEST_PARAMETERS)
def test_disallow(self, func_generator, jax_transfer_guard):
for func_name, explicit, func in func_generator():
with jax_transfer_guard("disallow"):
if explicit:
with self.assertAllows(func_name):
func()
else:
with self.assertDisallows(func_name):
func()
@parameterized.named_parameters(
("device_to_host", _device_to_host_funcs,
jax.transfer_guard_device_to_host),
("all", _device_to_host_funcs, jax.transfer_guard),
)
def test_disallow_ignores_arrays_on_cpu(self, func_generator,
jax_transfer_guard):
for func_name, _, func in func_generator():
with jax_transfer_guard("allow"):
# Transfer the device array to host.
func()
with jax_transfer_guard("disallow"):
with self.assertAllows(func_name):
# No error because the array has a value on host and no new transfer
# will occur.
func()
@parameterized.named_parameters(*_COMMON_TEST_PARAMETERS)
def test_log_explicit(self, func_generator, jax_transfer_guard):
for func_name, _, func in func_generator():
with jax_transfer_guard("log_explicit"):
with self.assertLogs(func_name):
func()
@parameterized.named_parameters(*_COMMON_TEST_PARAMETERS)
def test_disallow_explicit(self, func_generator, jax_transfer_guard):
for func_name, _, func in func_generator():
with jax_transfer_guard("disallow_explicit"):
with self.assertDisallows(func_name):
func()
if __name__ == "__main__":
absltest.main(testLoader=jtu.JaxTestLoader())