# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # flake8: noqa from contextlib import contextmanager import functools import itertools as it import os import re import unittest from itertools import product, permutations from typing import (Tuple, List, NamedTuple, Dict, Generator, Sequence, Set, Any, Hashable, Iterable, Iterator, Union, Optional) from unittest import SkipTest, skip, skipIf import numpy as np from absl.testing import absltest from absl.testing import parameterized from functools import partial import jax import jax.numpy as jnp import jax.scipy as jscipy from jax import test_util as jtu from jax import vmap from jax import lax from jax import core from jax.core import NamedShape, JaxprTypeError from jax.experimental import maps from jax.experimental.maps import Mesh, mesh, xmap, serial_loop, SerialLoop from jax.errors import JAXTypeError from jax.lib import xla_bridge from jax._src.util import curry, unzip2, split_list, prod from jax._src.lax.lax import DotDimensionNumbers from jax._src.lax.parallel import pgather from jax.interpreters import batching, pxla from jax.config import config config.parse_flags_with_absl() # TODO(mattjj): de-duplicate setUpModule and tearDownModule with pmap_test.py # Run all tests with 8 CPU devices. def setUpModule(): global prev_xla_flags prev_xla_flags = os.getenv("XLA_FLAGS") flags_str = prev_xla_flags or "" # Don't override user-specified device count, or other XLA flags. if "xla_force_host_platform_device_count" not in flags_str: os.environ["XLA_FLAGS"] = (flags_str + " --xla_force_host_platform_device_count=8") # Clear any cached backends so new CPU backend will pick up the env var. xla_bridge.get_backend.cache_clear() # Reset to previous configuration in case other test modules will be run. def tearDownModule(): if prev_xla_flags is None: del os.environ["XLA_FLAGS"] else: os.environ["XLA_FLAGS"] = prev_xla_flags xla_bridge.get_backend.cache_clear() # -------------------- Itertools helpers -------------------- def partitions(s, k): for indices in product(range(k), repeat=len(s)): outs = [[] for _ in range(k)] for i, elt in zip(indices, s): outs[i].append(elt) yield outs def powerset(s): s = list(s) return it.chain.from_iterable(it.combinations(s, r) for r in range(len(s)+1)) # -------------------- vmap test helpers -------------------- ensure_bdim_p = core.Primitive('ensure_bdim') ensure_bdim_p.def_abstract_eval(lambda x, **kwargs: core.raise_to_shaped(x)) def _ensure_bdim_batcher(frame, vals_in, dims_in, axis_name, bdim): v, = vals_in d, = dims_in assert d is not batching.not_mapped return jnp.moveaxis(v, d, bdim), bdim batching.collective_rules[ensure_bdim_p] = _ensure_bdim_batcher batching.primitive_batchers[ensure_bdim_p] = lambda v, d: (v[0], d[0]) core.axis_substitution_rules[ensure_bdim_p] = partial(jax._src.lax.parallel._subst_all_names_in_param, 'axis_name') def ensure_bdim(x, axis_name, bdim): return ensure_bdim_p.bind(x, axis_name=(axis_name,), bdim=bdim) # -------------------- Axis resources generation -------------------- AxisResources = Dict[str, Union[str, Tuple[str, ...]]] def schedules(sizes: Dict[str, int] ) -> Generator[Tuple[AxisResources, jtu.MeshSpec], None, None]: """Test utility generating xmap parallel schedules from logical names & sizes. Args: sizes: dict mapping logical axis name to its corresponding size. Returns: A generator producing finitely many values, where each value is a pair in which the first element is a value suitable for xmap's axis_resources argument and the second element is a list of pairs with the first element representing a generated physical mesh axis name and the second element representing a corresponding generated mesh axis size. The generated mesh names/sizes can be used to define a physical mesh in tests. This function doesn't generate schedules which map distinct logical axis names to the same parallel resource name. It only generates parallel resources; the rest are implicitly left for vectorization. Parallel resource names are generated by prepending an 'r', 'r1', or 'r2' to the corresponding logical name. Examples: >>> for sched in schedules({'i': 2, 'j': 4}): ... print(sched) ({}, []) ({'i': 'ri'}, [('ri', 1)]) ({'i': 'ri'}, [('ri', 2)]) ({'i': ('r1i', 'r2i')}, [('r1i', 1), ('r2i', 1)]) ({'i': ('r1i', 'r2i')}, [('r1i', 1), ('r2i', 2)]) ({'i': ('r1i', 'r2i')}, [('r1i', 2), ('r2i', 1)]) ({'j': 'rj'}, [('rj', 1)]) ({'j': 'rj'}, [('rj', 2)]) ({'j': 'rj'}, [('rj', 4)]) ({'j': ('r1j', 'r2j')}, [('r1j', 1), ('r2j', 1)]) ({'j': ('r1j', 'r2j')}, [('r1j', 1), ('r2j', 2)]) ({'j': ('r1j', 'r2j')}, [('r1j', 1), ('r2j', 4)]) ({'j': ('r1j', 'r2j')}, [('r1j', 2), ('r2j', 1)]) ({'j': ('r1j', 'r2j')}, [('r1j', 2), ('r2j', 2)]) ({'j': ('r1j', 'r2j')}, [('r1j', 4), ('r2j', 1)]) ({'i': 'ri', 'j': 'rj'}, [('ri', 1), ('rj', 1)]) ({'i': 'ri', 'j': 'rj'}, [('ri', 1), ('rj', 2)]) ({'i': 'ri', 'j': 'rj'}, [('ri', 1), ('rj', 4)]) ({'i': 'ri', 'j': 'rj'}, [('ri', 2), ('rj', 1)]) ({'i': 'ri', 'j': 'rj'}, [('ri', 2), ('rj', 2)]) ({'i': 'ri', 'j': 'rj'}, [('ri', 2), ('rj', 4)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 1), ('r1j', 1), ('r2j', 1)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 1), ('r1j', 1), ('r2j', 2)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 1), ('r1j', 1), ('r2j', 4)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 1), ('r1j', 2), ('r2j', 1)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 1), ('r1j', 2), ('r2j', 2)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 1), ('r1j', 4), ('r2j', 1)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 2), ('r1j', 1), ('r2j', 1)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 2), ('r1j', 1), ('r2j', 2)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 2), ('r1j', 1), ('r2j', 4)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 2), ('r1j', 2), ('r2j', 1)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 2), ('r1j', 2), ('r2j', 2)]) ({'i': 'ri', 'j': ('r1j', 'r2j')}, [('ri', 2), ('r1j', 4), ('r2j', 1)]) ({'j': 'rj', 'i': ('r1i', 'r2i')}, [('rj', 1), ('r1i', 1), ('r2i', 1)]) ({'j': 'rj', 'i': ('r1i', 'r2i')}, [('rj', 1), ('r1i', 1), ('r2i', 2)]) ({'j': 'rj', 'i': ('r1i', 'r2i')}, [('rj', 1), ('r1i', 2), ('r2i', 1)]) ({'j': 'rj', 'i': ('r1i', 'r2i')}, [('rj', 2), ('r1i', 1), ('r2i', 1)]) ({'j': 'rj', 'i': ('r1i', 'r2i')}, [('rj', 2), ('r1i', 1), ('r2i', 2)]) ({'j': 'rj', 'i': ('r1i', 'r2i')}, [('rj', 2), ('r1i', 2), ('r2i', 1)]) ({'j': 'rj', 'i': ('r1i', 'r2i')}, [('rj', 4), ('r1i', 1), ('r2i', 1)]) ({'j': 'rj', 'i': ('r1i', 'r2i')}, [('rj', 4), ('r1i', 1), ('r2i', 2)]) ({'j': 'rj', 'i': ('r1i', 'r2i')}, [('rj', 4), ('r1i', 2), ('r2i', 1)]) """ def divisors(n: int) -> List[int]: return [m for m in range(1, n + 1) if not n % m] def divisors2(n: int) -> Iterator[Tuple[int, int]]: for k1 in divisors(n): for k2 in divisors(n // k1): yield (k1, k2) # choose a subset of logical axis names to map to parallel resources for names in powerset(sizes): # partition that set of logical axis names into two subsets: one subset to # map to one parallel resource axis and a second subset to map to two # parallel resource axes. for names1, names2 in partitions(names, 2): # to avoid generating too many complex cases, we skip generating cases # where more than one logical axis name is to be mapped to two parallel # resource axes. comment out this line to generate more complex tests. if len(names2) > 1: continue # make up parallel resource axis names for each logical axis axis_resources1 = ((name, 'r' + name) for name in names1) axis_resources2 = ((name, ('r1' + name, 'r2' + name)) for name in names2) axis_resources = dict(it.chain(axis_resources1, axis_resources2)) # make up sizes for each resource axis, where the size must divide the # corresponding logical axis for mesh_sizes1 in product(*(divisors(sizes[n]) for n in names1)): for mesh_sizes2 in product(*(divisors2(sizes[n]) for n in names2)): mesh_data1 = (('r' + name, size) for name, size in zip(names1, mesh_sizes1)) mesh_data2 = (pair for name, (size1, size2) in zip(names2, mesh_sizes2) for pair in [('r1' + name, size1), ('r2' + name, size2)]) mesh_data = list(it.chain(mesh_data1, mesh_data2)) yield axis_resources, mesh_data class XMapTestCase(jtu.BufferDonationTestCase): pass # A mixin that enables SPMD lowering tests class SPMDTestMixin: def setUp(self): if jtu.device_under_test() not in ['tpu', 'gpu']: raise SkipTest super().setUp() jax.experimental.maps.make_xmap_callable.cache_clear() self.old_lowering_flag = jax.experimental.maps.EXPERIMENTAL_SPMD_LOWERING jax.experimental.maps.EXPERIMENTAL_SPMD_LOWERING = True def tearDown(self): jax.experimental.maps.make_xmap_callable.cache_clear() jax.experimental.maps.EXPERIMENTAL_SPMD_LOWERING = self.old_lowering_flag class XMapTest(XMapTestCase): def testBasic(self): local_devices = list(jax.local_devices()) if len(local_devices) < 4: raise SkipTest("Test requires at least 4 local devices") def f(a, b): return a * 2, b * 4 devices = np.array(local_devices[:4]).reshape((2, 2)) with mesh(devices, ('x', 'y')): fm = xmap(f, in_axes=[{0: 'a', 1: 'b'}, ['c', ...]], out_axes=[{0: 'a', 1: 'b'}, ['c', ...]], axis_resources={'a': 'x', 'b': 'y', 'c': 'x'}) ashape = (16, 8, 5) a = jnp.arange(np.prod(ashape)).reshape(ashape) bshape = (2, 7) b = jnp.arange(np.prod(bshape)).reshape(bshape) c, d = fm(a, b) self.assertAllClose(c, a * 2) self.assertAllClose(d, b * 4) @jtu.with_mesh([('x', 2), ('y', 2)]) def testCollectiveReduce(self): fm = xmap(lambda a, b: (lax.psum(a * 2, 'a'), b * 4), in_axes=[['a', 'b', ...], {0: 'c'}], out_axes=[['b', ...], {0: 'c'}], axis_resources={'a': 'x', 'b': 'y', 'c': 'x'}) ashape = (16, 8, 5) a = jnp.arange(np.prod(ashape)).reshape(ashape) bshape = (2, 7) b = jnp.arange(np.prod(bshape)).reshape(bshape) c, d = fm(a, b) self.assertAllClose(c, (a * 2).sum(0)) self.assertAllClose(d, b * 4) @jtu.with_mesh([('x', 2), ('y', 2)]) def testCollectivePermute2D(self): perm = np.array([3, 1, 2, 0]) x = jnp.arange(4).reshape((2, 2)) result = xmap(lambda x: lax.pshuffle(x, ('i', 'j'), perm), in_axes=['i', 'j', ...], out_axes=['i', 'j', ...], axis_resources={'i': 'x', 'j': 'y'})(x).reshape((-1,)) self.assertAllClose(result, perm) def testCollectivePermute1D(self): perm = np.array([3, 1, 2, 0]) x = jnp.arange(4) result = xmap(lambda x: lax.pshuffle(x, 'i', perm), in_axes=['i', ...], out_axes=['i', ...])(x) self.assertAllClose(result, perm) def testCollectiveAllGather(self): x = jnp.arange(4) result = xmap(lambda x: lax.all_gather(x, 'i') + lax.axis_index('i'), in_axes=['i', ...], out_axes=['i', ...])(x) self.assertAllClose(result, x + x[jnp.newaxis].T) @jtu.with_mesh([('x', 2), ('y', 2)]) def testOneLogicalTwoMeshAxesBasic(self): def f(v): return lax.psum(v * 2, 'a'), v * 4 fm = xmap(f, in_axes=['a', ...], out_axes=[{}, {1: 'a'}], axis_resources={'a': ('x', 'y')}) vshape = (4, 5) v = jnp.arange(np.prod(vshape)).reshape(vshape) ans, ans2 = fm(v) self.assertAllClose(ans, (v * 2).sum(0)) self.assertAllClose(ans2, v.T * 4) @jtu.with_mesh([('x', 2), ('y', 2)]) def testOneLogicalTwoMeshAxesSharding(self): def f(v): return v * 4 fxy = xmap(f, in_axes=['a', ...], out_axes={1: 'a'}, axis_resources={'a': ('x', 'y')}) fyx = xmap(f, in_axes=['a', ...], out_axes={1: 'a'}, axis_resources={'a': ('y', 'x')}) vshape = (4, 5) v = jnp.arange(np.prod(vshape)).reshape(vshape) zxy = fxy(v) self.assertEqual( zxy.sharding_spec, pxla.ShardingSpec((pxla.NoSharding(), pxla.Chunked((2, 2))), (pxla.ShardedAxis(0), pxla.ShardedAxis(1)))) zyx = fyx(v) self.assertEqual( zyx.sharding_spec, pxla.ShardingSpec((pxla.NoSharding(), pxla.Chunked((2, 2))), (pxla.ShardedAxis(1), pxla.ShardedAxis(0)))) @jtu.with_mesh([('x', 2), ('y', 2)]) def testSkipFirstMeshDim(self): def run(axis_resources): return xmap(lambda x: x * 2, in_axes=['i', ...], out_axes=['i', ...], axis_resources=axis_resources)(jnp.ones((4,))) self.assertAllClose(run({'i': 'x'}), run({'i': 'y'})) def testCaching(self): def f(x): assert python_should_be_executing return x * 2 devices = np.array(jax.local_devices()[:2]) if devices.size < 2: raise SkipTest("Test requires 2 devices") x = np.arange(8).reshape((2, 2, 2)) with mesh(devices, ('x',)): python_should_be_executing = True xmap(f, in_axes=['a', ...], out_axes=['a', ...], axis_resources={'a': 'x'})(x) python_should_be_executing = False xmap(f, in_axes=['a', ...], out_axes=['a', ...], axis_resources={'a': 'x'})(x) with mesh(devices, ('x',)): python_should_be_executing = False xmap(f, in_axes=['a', ...], out_axes=['a', ...], axis_resources={'a': 'x'})(x) @parameterized.named_parameters( {"testcase_name": name, "mesh": mesh, "axis_resources": axis_resources} for name, mesh, axis_resources in ( ('OneToOne', (('x', 2), ('y', 2)), (('a', 'y'), ('b', 'x'))), ('Multiple', (('x', 2), ('y', 2), ('z', 2)), (('a', 'y'), ('b', ('x', 'z')))), )) @jtu.with_mesh_from_kwargs def testNestedMesh(self, mesh, axis_resources): @partial(xmap, in_axes={1: 'a'}, out_axes=({0: 'a'}, {}), axis_resources=dict([axis_resources[0]])) def f(x): y = x * 2 @partial(xmap, in_axes={0: 'b'}, out_axes=({1: 'b'}, {}), axis_resources=dict([axis_resources[1]])) def h(y): # Multiply by a constant array to better exercise the partial_eval rule return jnp.sin(y) * np.arange(y.size), lax.psum(y, ('a', 'b')) return h(y) xshape = (4, 2, 5) x = jnp.arange(np.prod(xshape)).reshape(xshape) y = f(x) self.assertAllClose(y, ((jnp.sin(x * 2) * np.arange(xshape[-1])).transpose((1, 2, 0)), (x * 2).sum((0, 1)))) self.assertEqual(y[0].sharding_spec.sharding, (pxla.Chunked([2]), pxla.NoSharding(), pxla.NoSharding())) self.assertEqual(y[0].sharding_spec.mesh_mapping, (pxla.Replicated(2), pxla.ShardedAxis(0)) + (pxla.Replicated(2),) * (len(mesh) - 2)) if maps.EXPERIMENTAL_SPMD_LOWERING: hlo = jax.xla_computation(f)(x).as_hlo_text() # Make sure that there are non-partial sharding specs in the HLO self.assertRegex(hlo, r"sharding={devices=\[[0-9,]+\][0-9,]+}") @jtu.with_and_without_mesh def testMultipleCalls(self, mesh, axis_resources): def f(x, y): assert x.shape == y.shape == (3, 5) return jnp.tensordot(x, y, axes=([1], [1])) f_mapped = xmap(f, in_axes=(['i', ...], ['j', ...]), out_axes=['i', 'j', ...], axis_resources=dict(axis_resources)) x = jnp.arange(30).reshape(2, 3, 5) expected = jnp.einsum('imk,jnk->ijmn', x, x) for i in range(10): self.assertAllClose(f_mapped(x, x), expected) @jtu.with_and_without_mesh @jtu.skip_on_devices("cpu") # In/out aliasing not supported on CPU. def testBufferDonation(self, mesh, axis_resources): shard = lambda x: x if axis_resources: shard = xmap(lambda x: x, in_axes=['i', ...], out_axes=['i', ...], axis_resources=dict(axis_resources)) f = xmap(lambda x, y: x + y * 4, in_axes=['i', ...], out_axes=['i', ...], axis_resources=dict(axis_resources), donate_argnums=0) # The multiplications below disable some optimizations that prevent reuse x = shard(jnp.zeros((2, 5)) * 4) y = shard(jnp.ones((2, 5)) * 2) f(x, y) self.assertNotDeleted(y) self.assertDeleted(x) def testControlFlow(self): x = jnp.arange(5) xmap(lambda x: lax.fori_loop(0, 10, lambda _, x: lax.psum(x, 'i'), x), in_axes=['i', ...], out_axes=['i', ...])(x) @jtu.with_and_without_mesh def testAxisSizes(self, mesh, axis_resources): result = xmap(lambda: lax.axis_index('i'), in_axes=(), out_axes=['i', ...], axis_sizes={'i': 6}, axis_resources=dict(axis_resources))() self.assertAllClose(result, jnp.arange(6, dtype=result.dtype)) def testCollectiveOverNoName(self): result = xmap(lambda: lax.psum(jnp.array(2) ** 2, 'i'), in_axes={}, out_axes={}, axis_sizes={'i': 4})() self.assertEqual(result, 16) def VmapOfXmapCases(s): xmap_in_axes = ([{}] + [{i: 'x'} for i in range(3)] + [{i: 'x', j: 'y'} for i in range(4) for j in range(4) if i != j]) for xmap_dim_x, xmap_dim_y in s(product(xmap_in_axes, repeat=2)): xmap_axes = sorted(set(xmap_dim_x.values()) | set(xmap_dim_y.values())) num_axes = len(xmap_axes) if xmap_axes is None: continue xmap_out_axes = [dict(zip(dims, xmap_axes)) for dims in permutations(range(2 + num_axes), num_axes)] for xmap_dim_z in s(xmap_out_axes): for vmap_dim_x in s([*range(2 + len(xmap_dim_x)), None]): for vmap_dim_y in s([*range(2 + len(xmap_dim_y)), None]): if vmap_dim_x is None and vmap_dim_y is None: continue for vmap_dim_result in s(range(3)): for vmap_dim_z in s(range(2 + len(xmap_axes))): for vmap_as_xmap in s([False, True]): yield {"testcase_name": f"_xin={(sorted(xmap_dim_x.items()), sorted(xmap_dim_y.items()))}_" f"xout={sorted(xmap_dim_z.items())}_vin={(vmap_dim_x, vmap_dim_y)}_" f"vout={vmap_dim_z}_vresult={vmap_dim_result}_vmap_as_xmap={vmap_as_xmap}", "xmap_in_axes": (xmap_dim_x, xmap_dim_y), "xmap_out_axes": xmap_dim_z, "vmap_in_axes": (vmap_dim_x, vmap_dim_y), "vmap_out_axes": vmap_dim_z, "vmap_result_axis": vmap_dim_result, "vmap_as_xmap": vmap_as_xmap} @parameterized.named_parameters(jtu.named_cases_from_sampler(VmapOfXmapCases)) def testNestedMap(self, xmap_in_axes, xmap_out_axes, vmap_in_axes, vmap_out_axes, vmap_result_axis, vmap_as_xmap): """Test various vmap(xmap) and xmap(xmap) combinations. The outer map always introduces a single dimension, the inner map introduces one or two. """ (xin_x, xin_y) = xmap_in_axes (vin_x, vin_y) = vmap_in_axes vmap_size = 7 xmap_sizes = {'x': 11, 'y': 13} xshape = [2, 3] yshape = [3, 5] zshape = [2, 5] xind = ['n', 'k'] yind = ['k', 'm'] zind = ['n', 'm'] f = lambda x, y: ensure_bdim(jnp.einsum('nk,km->nm', x, y), 'v', vmap_result_axis) for pos, name in sorted(xin_x.items()): xshape.insert(pos, xmap_sizes[name]) xind.insert(pos, name) for pos, name in sorted(xin_y.items()): yshape.insert(pos, xmap_sizes[name]) yind.insert(pos, name) for pos, name in sorted(xmap_out_axes.items()): zshape.insert(pos, xmap_sizes[name]) zind.insert(pos, name) if vin_x is not None: xshape.insert(vin_x, vmap_size) xind.insert(vin_x, 'v') if vin_y is not None: yshape.insert(vin_y, vmap_size) yind.insert(vin_y, 'v') zshape.insert(vmap_out_axes, vmap_size) zind.insert(vmap_out_axes, 'v') if vmap_as_xmap: do_vmap = partial(xmap, in_axes=({vin_x: 'v'} if vin_x is not None else {}, {vin_y: 'v'} if vin_y is not None else {}), out_axes={vmap_out_axes: 'v'}) else: do_vmap = partial(vmap, in_axes=vmap_in_axes, out_axes=vmap_out_axes, axis_name='v') fm = do_vmap(xmap(f, in_axes=xmap_in_axes, out_axes=xmap_out_axes)) fref = partial(jnp.einsum, f"{''.join(xind)},{''.join(yind)}->{''.join(zind)}") rng = np.random.RandomState(0) x = rng.randn(*xshape) y = rng.randn(*yshape) self.assertAllClose(fm(x, y), fref(x, y)) def testJVP(self): f = xmap(lambda x, y: jnp.cos(lax.dot(x, jnp.sin(y), precision=lax.Precision.HIGHEST)), in_axes=[['i', ...], {}], out_axes=['i', ...]) x = jnp.arange(12, dtype=jnp.float32).reshape((3, 4)) / 100 y = jnp.arange(20, dtype=jnp.float32).reshape((4, 5)) / 100 jtu.check_grads(f, (x, y), order=2, modes=['fwd']) @jtu.with_and_without_mesh def testNamedShape(self, mesh, axis_resources): x = np.arange(4,) y = 2 f = xmap(lambda x, y: (x + y, y * lax.axis_index('i')), in_axes=(['i', ...], {}), out_axes=(['i', ...], ['i', ...]), axis_resources=dict(axis_resources)) z, w = f(x, y) self.assertEqual(z.aval.named_shape, {}) self.assertEqual(w.aval.named_shape, {}) @jtu.with_and_without_mesh def testBroadcast(self, mesh, axis_resources): x = jnp.asarray(2.0) f = xmap(lambda x: x, in_axes={}, out_axes=['i'], axis_sizes={'i': 4}, axis_resources=dict(axis_resources)) self.assertAllClose(f(x), jnp.asarray([2.0, 2.0, 2.0, 2.0])) def testNestedBroadcast(self): x = jnp.asarray(2.0) f = xmap(lambda x: x, in_axes={}, out_axes=['i'], axis_sizes={'i': 4}) g = xmap(f, in_axes={}, out_axes=['j', ...], axis_sizes={'j': 7}) self.assertAllClose(g(x), jnp.tile(x.reshape((1, 1)), (7, 4))) @serial_loop('l', 4) def testLoopBasic(self): x = jnp.arange(16) y = xmap(lambda x: x + 4, in_axes=['i'], out_axes=['i'], axis_resources={'i': 'l'})(x) self.assertAllClose(y, x + 4) @jtu.with_mesh([('x', 2)]) @serial_loop('l', 4) def testLoopWithMesh(self): x = jnp.arange(16) y = xmap(lambda x: x + 4, in_axes=['i'], out_axes=['i'], axis_resources={'i': ('x', 'l')})(x) self.assertAllClose(y, x + 4) def testLoopAnonBasic(self): x = jnp.arange(16) y = xmap(lambda x: x + 4, in_axes=['i'], out_axes=['i'], axis_resources={'i': SerialLoop(4)})(x) self.assertAllClose(y, x + 4) @jtu.with_mesh([('x', 2)]) def testLoopAnonWithMesh(self): x = jnp.arange(16) y = xmap(lambda x: x + 4, in_axes=['i'], out_axes=['i'], axis_resources={'i': ('x', SerialLoop(4))})(x) self.assertAllClose(y, x + 4) class XMapTestSPMD(SPMDTestMixin, XMapTest): """Re-executes all basic tests with the SPMD partitioner enabled""" skipped_tests = { "CollectivePermute2D" # vmap of multidimensional permute not implemented yet } def setUp(self): for skipped_name in self.skipped_tests: if skipped_name in self._testMethodName: raise SkipTest super().setUp() @jtu.with_mesh([('x', 2), ('y', 2), ('z', 2)]) def testNestedMeshSPMD(self): h = xmap(lambda y: (jnp.sin(y) * np.arange(y.size), lax.psum(y, ('a', 'b', 'c'))), in_axes={0: 'c'}, out_axes=({1: 'c'}, {}), axis_resources={'c': 'z'}) f = xmap(lambda x: h(x * 2), in_axes=[None, 'a', 'b', ...], out_axes=(['a', 'b', ...], {}), axis_resources={'a': 'x', 'b': 'y'}) xshape = (8, 2, 4, 5) x = jnp.arange(np.prod(xshape)).reshape(xshape) y = f(x) hlo = jax.xla_computation(f)(x).as_hlo_text() match = re.search(r"sharding={devices=\[([0-9,]+)\][0-9,]+}", hlo) self.assertIsNot(match, None) tile_factors = [int(s) for s in match.group(1).split(',')] self.assertEqual(set(tile_factors), {1, 2}) class NamedNumPyTest(XMapTestCase): @parameterized.named_parameters(jtu.cases_from_list( {"testcase_name": f"_{reduction.__name__}_axes={axes}_i={mapped_axis}", "reduction": reduction, "axes": axes, "mapped_axis": mapped_axis} for reduction in (jnp.sum, jnp.max, jnp.min, jnp.mean, jnp.var, jnp.std, jscipy.special.logsumexp) for axes in (0, 'i', (1,), ('i',), (0, 1), (0, 'i'), ('i', 0)) for mapped_axis in range(3))) def testReductions(self, reduction, axes, mapped_axis): axes_t = axes if isinstance(axes, tuple) else (axes,) reduces_i = 'i' in axes_t ref_red = partial(reduction, axis=tuple(mapped_axis if a == 'i' else a + (a >= mapped_axis) for a in axes_t)) mapped_axis_after_red = mapped_axis - sum(axis < mapped_axis if axis != 'i' else 0 for axis in axes_t) xmap_red = xmap(lambda x: reduction(x, axes), in_axes={mapped_axis: 'i'}, out_axes=({} if 'i' in axes_t else {mapped_axis_after_red: 'i'})) rng = np.random.RandomState(0) x = rng.randn(2, 5, 6) self.assertAllClose(ref_red(x), xmap_red(x)) class NamedRandomTest(XMapTestCase): @curry def parameterize_by_sampler(extra, f, subset): if extra is None: extra = [("", {})] else: extra = list(extra) subset_fn = jtu.cases_from_list if subset else lambda x: x return parameterized.named_parameters(subset_fn( {"testcase_name": name + extra_name, "distr_sample": sample, **extra_kwargs} for name, sample in [ ("Uniform", jax.random.uniform), ("Normal", jax.random.normal), ("Bernoulli", partial(jax.random.bernoulli, p=0.5)), ("TruncatedNormal", partial(jax.random.truncated_normal, lower=-2, upper=2)), ] for extra_name, extra_kwargs in extra))(f) @parameterize_by_sampler(None, subset=False) def testSamplerSharding(self, distr_sample): def sample(shape, map_size): return xmap(lambda: distr_sample(jax.random.PRNGKey(0), shape=shape), in_axes=(), out_axes=[None, 'i', ...], axis_sizes={'i': map_size})() replicated = sample((3,), 4) self.assertTrue((replicated[:,[0]] == replicated).all()) sharded = sample(NamedShape(3, i=4), 4) self.assertFalse((sharded[:,[0]] == sharded[:,1:]).all(1).any()) error = "The shape of axis i was specified as 4, but it really is 5" with self.assertRaisesRegex(ValueError, error): sample(NamedShape(3, i=4), 5) @parameterize_by_sampler( ((f"_mesh={mesh}_resources={sorted(axis_resources.items())}", {"axis_resources": tuple(axis_resources.items()), "mesh": tuple(mesh)}) for axis_resources, mesh in schedules({'i': 4, 'j': 6})), subset=True) @jtu.with_mesh_from_kwargs def testSamplerResourceIndependence(self, distr_sample, axis_resources, mesh): def sample(axis_resources): return xmap(lambda: distr_sample(jax.random.PRNGKey(0), shape=NamedShape(3, i=4, j=6)), in_axes=(), out_axes=['i', 'j', ...], axis_sizes={'i': 4, 'j': 6}, axis_resources=axis_resources)() self.assertAllClose(sample({}), sample(dict(axis_resources))) class NamedNNTest(XMapTestCase): def testOneHot(self): f = xmap(lambda x: jax.nn.one_hot([1, 2, 0], 3, axis='i'), in_axes=['i', ...], out_axes=['i', ...]) expected = jnp.array([[0., 1., 0.], [0., 0., 1.], [1., 0., 0.]]).T self.assertAllClose(f(jnp.ones((3,))), expected) def testOneHotOutOfBound(self): f = xmap(lambda x: jax.nn.one_hot([-1, 3], 3, axis='i'), in_axes=['i', ...], out_axes=['i', ...]) self.assertAllClose(f(jnp.ones((3,))), jnp.zeros((3, 2))) def testOneHotAxisSizeMismatch(self): f = xmap(lambda x: jax.nn.one_hot([-1, 3], 3, axis='i'), in_axes=['i', ...], out_axes=['i', ...]) with self.assertRaisesRegex(ValueError, "to match the size of axis i, but 3 != 5"): f(jnp.ones((5,))) @parameterized.named_parameters(jtu.cases_from_list( {"testcase_name": f"_map_in={map_in}_map_out={map_out}_fan={fan}_distr={distr}", "map_in": map_in, "map_out": map_out, "fan": fan, "distr": distr} for map_in, map_out in [(True, False), (False, True), (True, True)] for fan in ['fan_in', 'fan_out', 'fan_avg'] for distr in ['uniform', 'normal', 'truncated_normal'])) def testVarianceScaling(self, map_in, map_out, fan, distr): shape = (80, 50, 7) fan_in, fan_out = jax._src.nn.initializers._compute_fans( NamedShape(*shape), 0, 1) key = jax.random.PRNGKey(0) base_scaling = partial(jax.nn.initializers.variance_scaling, 100, fan, distr) ref_sampler = lambda: base_scaling(in_axis=0, out_axis=1)(key, shape) if map_in and map_out: out_axes=['i', 'o', ...] named_shape = NamedShape(shape[2], i=shape[0], o=shape[1]) xmap_sampler = lambda: base_scaling(in_axis='i', out_axis='o')(key, named_shape) elif map_in: out_axes = ['i', ...] named_shape = NamedShape(shape[1], shape[2], i=shape[0]) xmap_sampler = lambda: base_scaling(in_axis='i', out_axis=0)(key, named_shape) elif map_out: out_axes = [None, 'o', ...] named_shape = NamedShape(shape[0], shape[2], o=shape[1]) xmap_sampler = lambda: base_scaling(in_axis=0, out_axis='o')(key, named_shape) mapped_sampler = xmap(xmap_sampler, in_axes=(), out_axes=out_axes, axis_sizes={'i': shape[0], 'o': shape[1]}) self.assertAllClose(jnp.var(mapped_sampler()), jnp.var(ref_sampler()), atol=1e-4, rtol=2e-2) class NewPrimitiveTest(XMapTestCase): def testGatherPositional(self): x = jnp.arange(27).reshape((9, 3)) idx = jnp.array([1, 2, 1, 0]).reshape((2, 2)) self.assertAllClose(pgather(x, idx, 0), x[idx.ravel()].reshape((2, 2, 3))) x_explode = x.reshape((3, 3, 3)) self.assertAllClose(pgather(x, idx, 0), pgather(x_explode, idx, (0, 1))) @jtu.with_and_without_mesh def testGather(self, mesh, axis_resources): if axis_resources and not jax.experimental.maps.EXPERIMENTAL_SPMD_LOWERING: raise SkipTest("pgather over mesh axes without SPMD lowering not implemented") x = jnp.arange(12, dtype=np.float32).reshape((4, 3)) y = jnp.arange(35).reshape((5, 7)) % 3 f = xmap(lambda src, idx: pgather(src, idx, 'j'), in_axes=(['i', 'j'], ['k', 'm']), out_axes=['i', 'k', 'm'], axis_resources=dict(axis_resources)) f_ref = lambda x, y: x[:, y.reshape((-1,))].reshape((4, 5, 7)) self.assertAllClose(f(x, y), f_ref(x, y)) class NewPrimitiveTestSPMD(SPMDTestMixin, NewPrimitiveTest): pass AxisIndices = Tuple[int, ...] MatchedAxisIndices = Tuple[AxisIndices, AxisIndices] AxisNames = Tuple[str, ...] class PdotTestSpec: # The axis indices stored by a PdotTestSpec are all positional indices # *before* taking mapping into account. map_cont: MatchedAxisIndices pos_cont: MatchedAxisIndices map_batch: MatchedAxisIndices pos_batch: MatchedAxisIndices all_names: AxisNames contract_names: AxisNames batch_names: AxisNames def __init__(self, map_cont, pos_cont, map_batch, pos_batch): self.map_cont = map_cont self.pos_cont = pos_cont self.map_batch = map_batch self.pos_batch = pos_batch names = gen_axis_names() self.contract_names = [next(names) for _ in range(len(map_cont[0]))] self.batch_names = [next(names) for _ in range(len(map_batch[0]))] self.all_names = self.contract_names + self.batch_names @property def dot_general_dim_nums(self): lhs_contract = (*self.map_cont[0], *self.pos_cont[0]) rhs_contract = (*self.map_cont[1], *self.pos_cont[1]) lhs_batch = (*self.map_batch[0], *self.pos_batch[0]) rhs_batch = (*self.map_batch[1], *self.pos_batch[1]) return (lhs_contract, rhs_contract), (lhs_batch, rhs_batch) @property def pos_contract_after_mapping(self): lhs = [i - sum(j < i for j in self._lhs_mapped) for i in self.pos_cont[0]] rhs = [i - sum(j < i for j in self._rhs_mapped) for i in self.pos_cont[1]] return (lhs, rhs) @property def pos_batch_after_mapping(self): lhs = [i - sum(j < i for j in self._lhs_mapped) for i in self.pos_batch[0]] rhs = [i - sum(j < i for j in self._rhs_mapped) for i in self.pos_batch[1]] return (lhs, rhs) @property def _lhs_mapped(self): return {*self.map_cont[0], *self.map_batch[0]} @property def _rhs_mapped(self): return {*self.map_cont[1], *self.map_batch[1]} @property def lhs_in_axes(self): axis_indices = [*self.map_cont[0], *self.map_batch[0]] return dict(zip(axis_indices, self.all_names)) @property def rhs_in_axes(self): axis_indices = [*self.map_cont[1], *self.map_batch[1]] return dict(zip(axis_indices, self.all_names)) def all_pdot_specs(lhs_shape, rhs_shape): for matching in axis_matchings(lhs_shape, rhs_shape): for lists in partitions(matching, 4): yield PdotTestSpec(*map(unzip2, lists)) def axis_matchings(lhs_shape, rhs_shape): def helper(start, exc1, exc2): yield () for i in range(start, len(lhs_shape)): d1 = lhs_shape[i] if i not in exc1: for j, d2 in enumerate(rhs_shape): if d1 == d2 and j not in exc2: for matches in helper(i + 1, exc1 | {i}, exc2 | {j}): yield ((i, j), *matches) return helper(0, set(), set()) def gen_axis_names(): names = 'ijkl' for n in it.count(1): for chars in product(names, repeat=n): yield ''.join(chars) def schedules_from_pdot_spec( spec: PdotTestSpec, lhs_shape: Tuple[int], rhs_shape: Tuple[int] ) -> Generator[Tuple[AxisResources, jtu.MeshSpec], None, None]: logical_sizes = { name: shape[ax] for shape, in_axes in [(lhs_shape, spec.lhs_in_axes), (rhs_shape, spec.rhs_in_axes)] for ax, name in in_axes.items()} yield from schedules(logical_sizes) class PDotTests(XMapTestCase): @jtu.with_mesh([('r1', 2)]) def testPdotBasic(self): def f(x, y): return lax.pdot(x, y, 'i') f_mapped = xmap(f, in_axes=[{1: 'i'}, {0: 'i'}], out_axes={}, axis_resources={'i': 'r1'}) rng = np.random.RandomState(0) x = rng.randn(3, 8) y = rng.randn(8, 5) z = f_mapped(x, y) self.assertAllClose(z, jnp.dot(x, y)) @jtu.with_mesh([('r1', 2)]) def testPdotBatching(self): def f(x, y): return lax.pdot(x, y, 'i') rng = np.random.RandomState(0) x = rng.randn(2, 3, 8) y = rng.randn(2, 8, 5) f_mapped = xmap(f, in_axes=[{0: 'j', 2: 'i'}, {0: 'j', 1: 'i'}], out_axes=['j', ...], axis_resources={'i': 'r1'}) z = f_mapped(x, y) self.assertAllClose(z, jnp.einsum('nij,njk->nik', x, y)) @jtu.with_mesh([('r1', 2)]) def testPdotBatchingShardUncontractedDim(self): def f(x, y): return lax.pdot(x, y, 'i') rng = np.random.RandomState(0) x = rng.randn(2, 3, 8) y = rng.randn(2, 8, 5) f_mapped = xmap(f, in_axes=[{0: 'j', 2: 'i'}, {0: 'j', 1: 'i'}], out_axes=['j', ...], axis_resources={'j': 'r1'}) z = f_mapped(x, y) self.assertAllClose(z, jnp.einsum('nij,njk->nik', x, y)) @parameterized.named_parameters(jtu.named_cases_from_sampler(lambda s: ({ "testcase_name": f"_{next(test_counter)}", "lhs_shape": lhs_shape, "rhs_shape": rhs_shape, "pdot_spec": pdot_spec, "axis_resources": axis_resources, "mesh_data": mesh_data } for test_counter in [it.count()] for lhs_shape, rhs_shape in s(product([(2,), (2, 4, 2, 1)], repeat=2)) for pdot_spec in s(all_pdot_specs(lhs_shape, rhs_shape)) for axis_resources, mesh_data in s(schedules_from_pdot_spec( pdot_spec, lhs_shape, rhs_shape)) ))) def testPdotSystematic(self, lhs_shape, rhs_shape, pdot_spec, axis_resources, mesh_data): rng = jtu.rand_default(self.rng()) lhs = rng(lhs_shape, np.float32) rhs = rng(rhs_shape, np.float32) def pdot_fun(x, y): # print(f'pdot(x:{x.aval.str_short()}, y:{y.aval.str_short()},\n' # f' axis_name={contract_names},\n' # f' pos_contract={spec.pos_contract_after_mapping}\n' # f' pos_batch={spec.pos_batch_after_mapping})') return jax.lax.pdot(x, y, axis_name=pdot_spec.contract_names, pos_batch=pdot_spec.pos_batch_after_mapping, pos_contract=pdot_spec.pos_contract_after_mapping) fun = xmap(pdot_fun, in_axes=[pdot_spec.lhs_in_axes, pdot_spec.rhs_in_axes], out_axes=[*pdot_spec.batch_names, ...], axis_resources=axis_resources) with jtu.with_mesh(mesh_data): result = fun(lhs, rhs) expected = lax.dot_general(lhs, rhs, pdot_spec.dot_general_dim_nums) tol = 1e-1 if jtu.device_under_test() == "tpu" else None self.assertAllClose(result, expected, check_dtypes=False, atol=tol, rtol=tol) @parameterized.named_parameters(jtu.named_cases_from_sampler(lambda s: ({ "testcase_name": f"_{next(test_counter)}", "lhs_shape": lhs_shape, "rhs_shape": rhs_shape, "pdot_spec": pdot_spec, "axis_resources": axis_resources, "mesh_data": mesh_data } for test_counter in [it.count()] for lhs_shape, rhs_shape in s(product([(2,), (2, 4, 2, 1)], repeat=2)) for pdot_spec in s(all_pdot_specs(lhs_shape, rhs_shape)) for axis_resources, mesh_data in s(schedules_from_pdot_spec( pdot_spec, lhs_shape, rhs_shape)) ))) def testPdotVJPSystematic(self, lhs_shape, rhs_shape, pdot_spec, axis_resources, mesh_data): rng = jtu.rand_default(self.rng()) lhs = rng(lhs_shape, np.float32) rhs = rng(rhs_shape, np.float32) expected_out, ref_vjp = jax.vjp( lambda x, y: lax.dot_general(x, y, pdot_spec.dot_general_dim_nums), lhs, rhs) out_bar = rng(expected_out.shape, np.float32) expected_lhs, expected_rhs = ref_vjp(out_bar) def pdot_fun(x, y, out_bar): pdot = partial(jax.lax.pdot, axis_name=pdot_spec.contract_names, pos_batch=pdot_spec.pos_batch_after_mapping, pos_contract=pdot_spec.pos_contract_after_mapping) _, pdot_vjp = jax.vjp(pdot, x, y) return pdot_vjp(out_bar) fun = xmap(pdot_fun, in_axes=[pdot_spec.lhs_in_axes, pdot_spec.rhs_in_axes, [*pdot_spec.batch_names, ...]], out_axes=(pdot_spec.lhs_in_axes, pdot_spec.rhs_in_axes), axis_resources=axis_resources) with jtu.with_mesh(mesh_data): lhs_bar, rhs_bar = fun(lhs, rhs, out_bar) tol = 1e-1 if jtu.device_under_test() == "tpu" else None self.assertAllClose(lhs_bar, expected_lhs, check_dtypes=False, atol=tol, rtol=tol) self.assertAllClose(rhs_bar, expected_rhs, check_dtypes=False, atol=tol, rtol=tol) def test_xeinsum_vector_dot(self): rng = np.random.RandomState(0) x = rng.randn(3) y = rng.randn(3) out = xmap(partial(jnp.einsum, '{i},{i}->'), in_axes=(['i'], ['i']), out_axes=[])(x, y) expected = np.einsum('i,i->', x, y) self.assertAllClose(out, expected, check_dtypes=False) def test_xeinsum_outer_product(self): rng = np.random.RandomState(0) x = rng.randn(3) y = rng.randn(3) out = xmap(partial(jnp.einsum, '{i},{j}->{i,j}'), in_axes=(['i'], ['j']), out_axes=['i', 'j'])(x, y) expected = np.einsum('i,j->ij', x, y) self.assertAllClose(out, expected, check_dtypes=True) def test_xeinsum_matmul(self): rng = np.random.RandomState(0) x = rng.randn(3, 4) y = rng.randn(4, 5) def check(spec): out = xmap(partial(jnp.einsum, spec), in_axes=(['i', 'j'], ['j', 'k']), out_axes=['i', 'k'])(x, y) expected = np.einsum('ij,jk->ik', x, y) tol = 1e-1 if jtu.device_under_test() == "tpu" else None self.assertAllClose(out, expected, check_dtypes=True, atol=tol, rtol=tol) check('{i,j},{j,k}->{i,k}') check('{i,j},{k,j}->{k,i}') # order of named axes in the spec doesn't matter! check('{j},{k,j}->{k}') check('{i,j},{j}->{i}') check('{j},{j}->{}') def test_xeinsum_no_named_axes_vector_dot(self): rng = np.random.RandomState(0) x = rng.randn(3) y = rng.randn(3) out = jnp.einsum('i,i->', x, y, _use_xeinsum=True) expected = np.einsum('i,i->', x, y) self.assertAllClose(out, expected, check_dtypes=False) def test_xeinsum_no_named_axes_batch_vector_dot(self): rng = np.random.RandomState(0) x = rng.randn(3, 2) y = rng.randn(3, 2) out = jnp.einsum('ij,ij->i', x, y, _use_xeinsum=True) expected = np.einsum('ij,ij->i', x, y) self.assertAllClose(out, expected, check_dtypes=True) def test_xeinsum_no_named_axes_reduce_sum(self): rng = np.random.RandomState(0) x = rng.randn(3) y = rng.randn() out = jnp.einsum('i,->', x, y, _use_xeinsum=True) expected = np.einsum('i,->', x, y) self.assertAllClose(out, expected, check_dtypes=True) class XMapErrorTest(jtu.JaxTestCase): @jtu.with_mesh([('x', 2)]) def testRepeatedAxisResource(self): def f(v): return v * 4 with self.assertRaisesRegex(ValueError, r"distinct resources.*specified \('x', 'x'\) for axis a"): fxy = xmap(f, in_axes=['a', ...], out_axes=['a', ...], axis_resources={'a': ('x', 'x')}) @jtu.with_mesh([('x', 2)]) def testNestedDifferentResources(self): @partial(xmap, in_axes={0: 'a'}, out_axes={0: 'a'}, axis_resources={'a': 'x'}) def f(x): with mesh(np.empty((), dtype=np.object_), ()): @partial(xmap, in_axes={0: 'b'}, out_axes={0: 'b'}) def h(x): return x return h(x) xshape = (2, 5, 6) x = jnp.arange(np.prod(xshape)).reshape(xshape) with self.assertRaisesRegex(RuntimeError, "Changing the physical mesh is not allowed.*"): f(x) def testEmptyArgumentTrees(self): with self.assertRaisesRegex(ValueError, "Failed to infer size of axes: i."): xmap(lambda x: x, in_axes=['i', ...], out_axes=['i', ...])({}) @jtu.with_mesh([('x', 2), ('y', 2)]) def testAxesNotDivisibleByResources(self): with self.assertRaisesRegex(ValueError, r"Size of axis i \(5\) is not divisible.*" r"\(\('x', 'y'\), 4 in total\)"): xmap(lambda x: x, in_axes=['i', ...], out_axes=['i', ...], axis_sizes={'i': 5}, axis_resources={'i': ('x', 'y')})({}) def testInconsistentAxisSizes(self): x5 = jnp.arange(5) x6 = jnp.arange(6) error = (r"The size of axis i was previously inferred to be 5, but found an " r"argument of shape \(6,\) with in_axes specification \['i', ...\]. " r"Shape mismatch occurs in dimension 0: 6 != 5") with self.assertRaisesRegex(ValueError, error): xmap(lambda x, y: x, in_axes=(['i', ...], ['i', ...]), out_axes=['i', ...])(x5, x6) with self.assertRaisesRegex(ValueError, error): xmap(lambda x: x, in_axes=['i', ...], out_axes=['i', ...], axis_sizes={'i': 5})(x6) def testInAxesRankError(self): error = (r"One of xmap arguments has an in_axes specification of \['i', 'j', ...\], " r"which implies that it has at least 2 dimensions, but the argument has rank 1") with self.assertRaisesRegex(ValueError, error): xmap(lambda x: x, in_axes=['i', 'j', ...], out_axes=['j', 'i', ...])(jnp.ones((5,))) def testOutAxesRankError(self): error = (r"One of xmap outputs has an out_axes specification of {1: 'i'}, " r"which requires the result of the xmapped function to have at least " r"1 positional dimensions, but it only has 0") with self.assertRaisesRegex(ValueError, error): xmap(lambda x: x, in_axes=['i', ...], out_axes={1: 'i'})(jnp.ones((5,))) def testNegativeAxes(self): with self.assertRaisesRegex(ValueError, "xmap doesn't support negative axes in in_axes"): xmap(lambda x: x, in_axes={-1: 'i'}, out_axes={0: 'i'})(jnp.ones((5,))) with self.assertRaisesRegex(ValueError, "xmap doesn't support negative axes in out_axes"): xmap(lambda x: x, in_axes={0: 'i'}, out_axes={-1: 'i'})(jnp.ones((5,))) def testDictOutAxes(self): # see issue #6410 out = xmap(lambda x: x, in_axes=[...], out_axes={"a": [...]})({"a": 1}) self.assertEqual(out, {"a": 1}) def testListAxesRankAssertion(self): error = (r"xmap argument has an in_axes specification of \['i', None\], which " r"asserts that it should be of rank 2, but the argument has rank 1 " r"\(and shape \(5,\)\)") with self.assertRaisesRegex(ValueError, error): xmap(lambda x: x, in_axes=['i', None], out_axes=['i', None])(jnp.ones((5,))) error = (r"xmap output has an out_axes specification of \['i', None\], which " r"asserts that it should be of rank 2, but the output has rank 3 " r"\(and shape \(5, 2, 2\)\)") with self.assertRaisesRegex(ValueError, error): xmap(lambda x: x.reshape((2, 2)), in_axes=['i', None], out_axes=['i', None])(jnp.ones((5, 4))) def testReturnExtraMappedAxes(self): fm = xmap(lambda x, y: x + y, in_axes=(['a', ...], ['b', ...]), out_axes=['a', ...]) x = np.arange(12).reshape((4, 3)) y = np.arange(6).reshape((2, 3)) error = (r"One of xmap results has an out_axes specification of \['a', ...\], but " r"is actually mapped along more axes defined by this xmap call: b") with self.assertRaisesRegex(TypeError, error): fm(x, y) @jtu.with_mesh([('x', 2)]) def testResourceConflictArgs(self): fm = xmap(lambda x: lax.psum(x, ('a', 'b')), in_axes=['a', 'b'], out_axes=[], axis_resources={'a': 'x', 'b': 'x'}) x = np.arange(16).reshape(4, 4) error = (r"Axes `a` and `b` are both mapped to the resource `x`, but they " r"coincide in the named_shape of an input to an xmapped function " r"") with self.assertRaisesRegex(JAXTypeError, error): fm(x) @jtu.with_mesh([('x', 2)]) def testResourceConflictInner(self): fm = xmap(lambda x, y: x + y, in_axes=(['a', ...], ['b', ...]), out_axes=['a', 'b', ...], axis_resources={'a': 'x', 'b': 'x'}) x = np.arange(12).reshape(4, 3) y = np.arange(6).reshape(2, 3) error = (r"Axes `a` and `b` are both mapped to the resource `x`, but they " r"coincide in the named_shape.*primitive add created at") with self.assertRaisesRegex(JAXTypeError, error): fm(x, y) @jtu.with_mesh([('x', 2)]) def testResourceConflictOut(self): fm = xmap(lambda x, y: x, in_axes=(['a', ...], ['b', ...]), out_axes=['a', 'b', ...], axis_resources={'a': 'x', 'b': 'x'}) x = np.arange(12).reshape(4, 3) y = np.arange(6).reshape(2, 3) error = (r"One of xmapped function \(\) outputs is broadcast along axis " r"`b` which is assigned to resources `x`, but the output is already " r"partitioned along `x`, because its named shape contains `a`") with self.assertRaisesRegex(JAXTypeError, error): fm(x, y) @jtu.with_mesh([('x', 2)]) def testResourceConflictNestArgs(self): f = xmap(lambda x: x, in_axes=['i'], out_axes=['i'], axis_resources={'i': 'x'}) h = xmap(f, in_axes=['j', ...], out_axes=['j', ...], axis_resources={'j': 'x'}) x = np.arange(16).reshape((4, 4)) error = (r"Axes `i` and `j` are both mapped to the resource `x`, but they " r"coincide in the named_shape of an input to an xmapped function " r" \(xmap called at .*\)") with self.assertRaisesRegex(JAXTypeError, error): h(x) @jtu.with_mesh([('x', 2)]) def testResourceConflictNestInner(self): f = xmap(lambda x: lax.axis_index('i') + x, in_axes=[], out_axes=['i'], axis_sizes={'i': 4}, axis_resources={'i': 'x'}) h = xmap(f, in_axes=['j', ...], out_axes=['j', ...], axis_resources={'j': 'x'}) x = np.arange(4) error = (r"Axes `i` and `j` are both mapped to the resource `x`, but they " r"coincide in the named_shape of a value returned from a primitive " r"add created at .*") with self.assertRaisesRegex(JAXTypeError, error): h(x) @jtu.with_mesh([('x', 2)]) def testResourceConflictNestOut(self): f = xmap(lambda x: x, in_axes=[], out_axes=['i'], axis_sizes={'i': 4}, axis_resources={'i': 'x'}) h = xmap(f, in_axes=['j', ...], out_axes=['j', ...], axis_resources={'j': 'x'}) x = np.arange(4) error = (r"One of xmapped function \(\) outputs is broadcast along " r"axis `i` which is assigned to resources `x`, but the output is " r"already partitioned along `x`, because its named shape contains `j`") with self.assertRaisesRegex(JAXTypeError, error): h(x) @serial_loop('l', 2) def testResourceConflictArgsLoop(self): fm = xmap(lambda x: x, in_axes=['a', 'b'], out_axes=['a', 'b'], axis_resources={'a': 'l', 'b': 'l'}) x = np.arange(16).reshape(4, 4) error = (r"Axes `a` and `b` are both mapped to the resource `l`, but they " r"coincide in the named_shape of an input to an xmapped function " r"") with self.assertRaisesRegex(JAXTypeError, error): fm(x) @serial_loop('l', 2) def testLoopCollectives(self): fm = xmap(lambda x: lax.psum(x, 'i'), in_axes=['i'], out_axes=[], axis_resources={'i': 'l'}) x = np.arange(16) error = (r"Named axes with loop resources assigned to them cannot be " r"referenced inside the xmapped computation \(e.g. in " r"collectives\), but `i` violates that rule") with self.assertRaisesRegex(RuntimeError, error): fm(x) class NamedAutodiffTests(jtu.JaxTestCase): def testVjpReduceAxes(self): def f(w, x): return jnp.sin(jnp.dot(x, w)) def vjp_f(w, x, gy): _, pullback = jax.vjp(f, w, x) return pullback(gy) def vjp_f_reduced(w, x, gy): _, pullback = jax.vjp(f, w, x, reduce_axes=('batch',)) return pullback(gy) w = np.arange(12, dtype=np.float32).reshape(3, 4) x = np.arange(6, dtype=np.float32).reshape(2, 3) gy = np.arange(8, dtype=np.float32).reshape(2, 4) # per-example error = (r"One of xmap results has an out_axes specification of {}, but is " r"actually mapped along more axes defined by this xmap call: " r"batch") with self.assertRaisesRegex(TypeError, error): xmap(vjp_f, in_axes=({}, {0: 'batch'}, {0: 'batch'}), out_axes=({}, {0: 'batch'}))(w, x, gy) out = xmap(vjp_f, in_axes=({}, {0: 'batch'}, {0: 'batch'}), out_axes=({0: 'batch'}, {0: 'batch'}))(w, x, gy) expected = vmap(vjp_f, in_axes=(None, 0, 0), out_axes=(0, 0))(w, x, gy) self.assertAllClose(out, expected, check_dtypes=True) # reduced out = xmap(vjp_f_reduced, in_axes=({}, {0: 'batch'}, {0: 'batch'}), out_axes=({}, {0: 'batch'}))(w, x, gy) # the reduced VJP is also the VJP when using a positional batch axis expected = vjp_f(w, x, gy) self.assertAllClose(out, expected, check_dtypes=True) def testVjpReduceAxesCollective(self): # lax.psum has the wrong transpose, so test with a corrected version for now @functools.partial(jax.custom_vjp, nondiff_argnums=(1,)) def psum_idrev(x, axis_name: Optional[AxisNames] = None): if axis_name is None: return x return jax.lax.psum(x, axis_name) def psum_idrev_fwd(x, axis_name): return psum_idrev(x, axis_name), None def psum_idrev_bwd(axis_name, res, g): del axis_name, res return (g,) psum_idrev.defvjp(psum_idrev_fwd, psum_idrev_bwd) def f_named(w, x): return psum_idrev(jnp.sin(jnp.dot(x, w)).sum(), 'batch') def f_positional(w, x): return jnp.sin(jnp.dot(x, w)).sum() w = np.arange(12, dtype=np.float32).reshape(3, 4) x = np.arange(6, dtype=np.float32).reshape(2, 3) # forward out = xmap(f_named, in_axes=({}, {0: 'batch'}), out_axes={})(w, x) expected = f_positional(w, x) self.assertAllClose(out, expected, check_dtypes=True) # gradient out = xmap(jax.grad(f_named, (0, 1), reduce_axes=('batch',)), in_axes=({}, {0: 'batch'}), out_axes=({}, {0: 'batch'}))(w, x) expected = jax.grad(f_positional, (0, 1))(w, x) self.assertAllClose(out, expected, check_dtypes=True) if __name__ == '__main__': absltest.main(testLoader=jtu.JaxTestLoader())