rocm_jax/jax/experimental/array.py

# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import operator as op
import numpy as np
from typing import (Sequence, Tuple, Callable, Union, Optional, cast, List,
                    NamedTuple, Mapping, TYPE_CHECKING)

from jax import core
from jax._src import abstract_arrays
from jax._src import ad_util
from jax._src import api_util
from jax._src import dispatch
from jax._src import dtypes
from jax._src.lax import lax as lax_internal
from jax._src.config import config
from jax._src.util import prod, safe_zip
from jax._src.lib import xla_client as xc
from jax._src.api import device_put
from jax._src.numpy.ndarray import ndarray
from jax.interpreters import pxla, xla, mlir
from jax.experimental.sharding import (
    Sharding, SingleDeviceSharding, XLACompatibleSharding, PmapSharding,
    device_replica_id_map)

Shape = Tuple[int, ...]
Device = xc.Device
DeviceArray = xc.Buffer
Index = Tuple[slice, ...]
ArrayLike = Union[np.ndarray, DeviceArray]


class _ArrayFastPathArgs(NamedTuple):
  devices_indices_map: Mapping[Device, Optional[Index]]
  addressable_device_assignment: Sequence[Device]


class Shard:
  """A single data shard of an Array.

  Attributes:
    device : Which device this shard resides on.
    index : The index into the global array of this shard.
    replica_id : Integer id indicating which replica of the global array this
      shard is part of. Always 0 for fully sharded data
      (i.e. when there’s only 1 replica).
    data : The data of this shard. None if ``device`` is non-local.
  """

  def __init__(self, device: Device, sharding: Sharding, global_shape: Shape,
               data: Optional[Array] = None,
               _fast_path_args: Optional[_ArrayFastPathArgs] = None):
    self.device = device
    self._sharding = sharding
    self._global_shape = global_shape
    self.data = data
    self._fast_path_args = _fast_path_args

  def __repr__(self):
    try:
      return (f'Shard(device={repr(self.device)}, index={self.index}, '
              f'replica_id={self.replica_id}, data={self.data})')
    except ValueError:
      return f'Shard(device={repr(self.device)}, data={self.data})'

  @property
  def index(self) -> Index:
    if self._fast_path_args is None:
      try:
        device_indices_map_fn = self._sharding.devices_indices_map
      except AttributeError:
        raise ValueError('Cannot calculate indices from sharding: '
                         f'{self._sharding}. Please create a device to index '
                         'mapping for your sharding.') from None
      index = device_indices_map_fn(self._global_shape)[self.device]
    else:
      index = self._fast_path_args.devices_indices_map[self.device]
    assert index is not None
    return index

  @property
  def replica_id(self) -> int:
    return device_replica_id_map(self._sharding, self._global_shape)[self.device]


def _reconstruct_array(fun, args, arr_state, aval_state):
  """Method to reconstruct a device array from a serialized state."""
  np_value = fun(*args)
  np_value.__setstate__(arr_state)
  jnp_value = device_put(np_value)
  jnp_value.aval = jnp_value.aval.update(**aval_state)
  return jnp_value


@pxla.use_cpp_class(xc.Array if xc._version >= 92 else None)
class Array:
  # TODO(yashkatariya): Add __slots__ here.

  @pxla.use_cpp_method
  def __init__(self, aval: core.ShapedArray, sharding: Sharding,
               arrays: Union[Sequence[DeviceArray], Sequence[Array]],
               committed: bool, _skip_checks: bool = False,
               _fast_path_args: Optional[_ArrayFastPathArgs] = None):
    # NOTE: the actual implementation of the constructor is moved to C++.

    self.aval = aval
    self._sharding = sharding
    # Extract DeviceArrays from arrays with `SingleDeviceSharding` to keep the
    # code handling `self._arrays` simpler.
    # TODO(yashkatariya): This will be slower as it will happen during
    # `__init__` on single controller environment. Make it lazy.
    self._arrays: List[DeviceArray] = [a if isinstance(a, DeviceArray) else a._arrays[0]
                                       for a in arrays]
    # See https://jax.readthedocs.io/en/latest/faq.html#controlling-data-and-computation-placement-on-devices
    # for what committed means.
    self._committed = committed
    # Optionally precomputed for performance.
    self._fast_path_args = _fast_path_args
    self._npy_value = None

    if not _skip_checks or config.jax_enable_checks:
      self._check()

    # Don't rearrange if skip_checks is enabled because this assumes that the
    # input buffers are already arranged properly. This usually happens when
    # Array's are created as output of a JAX transformation
    # (like pjit, xmap, etc).
    if not _skip_checks:
      self._rearrange()

  def _check(self):
    ss = self.sharding.shard_shape(self.shape)
    for db in self._arrays:
      if db.shape != ss:
        raise ValueError(
            f"Expected shard shape {ss} doesn't match the buffer "
            f"shape {db.shape} for buffer: {db}")

    for db in self._arrays:
      if db.dtype != self.dtype:
        raise ValueError(
            "Input buffers to `Array` must have matching dtypes. "
            f"Got {db.dtype}, expected {self.dtype} for buffer: {db}")

  def _rearrange(self):
    # Rearrange arrays based on the device assignment.
    # TODO(yashkatariya): Add a similar check for shardings that are not
    # XLACompatibleSharding. But leave the rearragement to XLACompatibleSharding
    # only.
    if isinstance(self.sharding, XLACompatibleSharding):
      if self._fast_path_args is None:
        addressable_da = cast(XLACompatibleSharding, self.sharding)._addressable_device_assignment
      else:
        addressable_da = self._fast_path_args.addressable_device_assignment
      if len(self._arrays) != len(addressable_da):
        raise ValueError(
            f"Expected {len(addressable_da)} per-device arrays "
            "(this is how many devices are addressable by the sharding), but "
            f"got {len(self._arrays)}")
      device_to_buffer = {db.device().id: db for db in self._arrays}
      try:
        self._arrays = [device_to_buffer[device.id]
                        for device in addressable_da]
      except KeyError as e:
        array_device_ids = set(a.device().id for a in self._arrays)
        addressable_device_ids = set(d.id for d in addressable_da)
        diff = set(array_device_ids) - set(addressable_device_ids)
        raise ValueError(
            f"Some per-device arrays are placed on devices {diff}, which are "
            f"not used in the specified sharding {self.sharding}") from e

  @property
  def shape(self) -> Shape:
    return self.aval.shape

  @property
  def dtype(self):
    return self.aval.dtype

  @property
  def ndim(self):
    return len(self.shape)

  @property
  def size(self):
    return prod(self.shape)

  @property
  def sharding(self):
    return self._sharding

  def __str__(self):
    return str(self._value)

  def __len__(self):
    try:
      return self.shape[0]
    except IndexError as err:
      raise TypeError("len() of unsized object") from err  # same as numpy error

  def __bool__(self):
    return bool(self._value)

  def __nonzero__(self):
    return bool(self._value)

  def __float__(self):
    return self._value.__float__()

  def __int__(self):
    return self._value.__int__()

  def __complex__(self):
    return self._value.__complex__()

  def __hex__(self):
    assert self.ndim == 0, 'hex only works on scalar values'
    return hex(self._value)  # type: ignore

  def __oct__(self):
    assert self.ndim == 0, 'oct only works on scalar values'
    return oct(self._value)  # type: ignore

  def __index__(self):
    return op.index(self._value)

  def tobytes(self, order="C"):
    return self._value.tobytes(order)

  def tolist(self):
    return self._value.tolist()

  def __format__(self, format_spec):
    # Simulates behavior of https://github.com/numpy/numpy/pull/9883
    if self.ndim == 0:
      return format(self._value[()], format_spec)
    else:
      return format(self._value, format_spec)

  def __getitem__(self, idx):
    from jax._src.numpy import lax_numpy
    self._check_if_deleted()

    if dispatch.is_single_device_sharding(self.sharding):
      return lax_numpy._rewriting_take(self, idx)
    # TODO(yashkatariya): Make it work for other Shardings too wherever its
    # possible to not do data movement.
    elif isinstance(self.sharding, PmapSharding):
      if not isinstance(idx, tuple):
        cidx = (idx,) + (slice(None),) * (len(self.shape) - 1)
      else:
        cidx = idx + (slice(None),) * (len(self.shape) - len(idx))
      if self._npy_value is None:
        if self._fast_path_args is None:
          indices = tuple(self.sharding.devices_indices_map(self.shape).values())
        else:
          indices = tuple(self._fast_path_args.devices_indices_map.values())
        try:
          buf_idx = indices.index(cidx)
        except ValueError:
          buf_idx = None
        if buf_idx is not None:
          buf = self._arrays[buf_idx]
          aval = core.ShapedArray(buf.xla_shape().dimensions(), self.dtype)
          return Array(aval, SingleDeviceSharding(buf.device()), [buf],
                       committed=False, _skip_checks=True)
      return lax_numpy._rewriting_take(self, idx)
    else:
      # TODO(yashkatariya): Don't bounce to host and use `_rewriting_take` or
      # the fast path (see PmapSharding branch above) after b/245667823 is
      # fixed.
      return self._value[idx]

  def __iter__(self):
    if self.ndim == 0:
      raise TypeError("iteration over a 0-d array")  # same as numpy error
    else:
      assert self.is_fully_replicated() or self.is_fully_addressable()
      if dispatch.is_single_device_sharding(self.sharding):
        return (sl for chunk in self._chunk_iter(100) for sl in chunk._unstack())  # type: ignore
      elif isinstance(self.sharding, PmapSharding):
        return (self[i] for i in range(self.shape[0]))  # type: ignore
      else:
        # TODO(yashkatariya): Don't bounce to host and use `_chunk_iter` path
        # here after b/245667823 is fixed.
        return (self._value[i] for i in range(self.shape[0]))

  def item(self):
    if dtypes.issubdtype(self.dtype, np.complexfloating):
      return complex(self)
    elif dtypes.issubdtype(self.dtype, np.floating):
      return float(self)
    elif dtypes.issubdtype(self.dtype, np.integer):
      return int(self)
    elif dtypes.issubdtype(self.dtype, np.bool_):
      return bool(self)
    else:
      raise TypeError(self.dtype)

  def is_fully_replicated(self) -> bool:
    return self.shape == self._arrays[0].shape

  def __repr__(self):
    prefix = '{}('.format(self.__class__.__name__.lstrip('_'))
    if self.aval is not None and self.aval.weak_type:
      dtype_str = f'dtype={self.dtype.name}, weak_type=True)'
    else:
      dtype_str = f'dtype={self.dtype.name})'

    if self.is_fully_addressable() or self.is_fully_replicated():
      line_width = np.get_printoptions()["linewidth"]
      s = np.array2string(self._value, prefix=prefix, suffix=',',
                          separator=', ', max_line_width=line_width)
      last_line_len = len(s) - s.rfind('\n') + 1
      sep = ' '
      if last_line_len + len(dtype_str) + 1 > line_width:
        sep = ' ' * len(prefix)
      return f"{prefix}{s},{sep}{dtype_str}"
    else:
      return f"{prefix}{self.shape}, {dtype_str}"

  def is_fully_addressable(self) -> bool:
    return self.sharding.is_fully_addressable()

  def __array__(self, dtype=None, context=None):
    return np.asarray(self._value, dtype=dtype)

  def __dlpack__(self):
    from jax.dlpack import to_dlpack  # pylint: disable=g-import-not-at-top
    return to_dlpack(self)

  def __reduce__(self):
    fun, args, arr_state = self._value.__reduce__()  # type: ignore
    aval_state = {'weak_type': self.aval.weak_type,
                  'named_shape': self.aval.named_shape}
    return (_reconstruct_array, (fun, args, arr_state, aval_state))

  def unsafe_buffer_pointer(self):
    assert len(self._arrays) == 1
    return self._arrays[0].unsafe_buffer_pointer()

  @property
  def __cuda_array_interface__(self):
    assert len(self._arrays) == 1
    return self._arrays[0].__cuda_array_interface__  # pytype: disable=attribute-error  # bind-properties

  # TODO(yashkatariya): Remove this method when everyone is using devices().
  def device(self) -> Device:
    self._check_if_deleted()
    device_set = self.sharding.device_set
    if len(device_set) == 1:
      single_device, = device_set
      return single_device
    raise ValueError('Length of devices is greater than 1. '
                     'Please use `.devices()`.')

  def devices(self) -> List[Device]:
    self._check_if_deleted()
    return list(self.sharding.device_set)

  @pxla.maybe_cached_property
  def addressable_shards(self) -> Sequence[Shard]:
    self._check_if_deleted()
    out = []
    for db in self._arrays:
      db = pxla._set_aval(db)
      device = db.device()
      # Wrap the device arrays in `Array` until C++ returns an Array instead
      # of a DA.
      array = Array(db.aval, SingleDeviceSharding(device), [db],
                    committed=self._committed, _skip_checks=True)
      out.append(Shard(
          device, self.sharding, self.shape, array, self._fast_path_args))
    return out

  def delete(self):
    if self._arrays is None:
      return
    for buf in self._arrays:
      buf.delete()
    self._arrays = None
    self._npy_value = None

  def is_deleted(self):
    if self._arrays is None:
      return True
    # This path is taken when a view of `Array` is created and the original
    # Array is deleted. In that case, the buffers the view represents also get
    # deleted.
    return any(buf.is_deleted() for buf in self._arrays)

  def _check_if_deleted(self):
    if self._arrays is None:
      raise RuntimeError("Array has been deleted.")

  @pxla.use_cpp_method
  def block_until_ready(self):
    self._check_if_deleted()
    for db in self._arrays:
      db.block_until_ready()
    return self

  def copy_to_host_async(self):
    self._check_if_deleted()
    if self._npy_value is None:
      try:
        self.addressable_shards[0].replica_id
        replica_id_exists = True
      except ValueError:
        replica_id_exists = False

      for s in self.addressable_shards:
        if not replica_id_exists or s.replica_id == 0:
          s.data._arrays[0].copy_to_host_async()  # pytype: disable=attribute-error

  @property
  def _value(self) -> np.ndarray:
    self._check_if_deleted()

    if self._npy_value is None:
      if self.is_fully_replicated():
        self._npy_value = np.asarray(self._arrays[0])  # type: ignore
        return cast(np.ndarray, self._npy_value)

      if not self.is_fully_addressable():
        raise RuntimeError("Fetching value for `jax.Array` that spans "
                           "non-addressable devices is not possible. You can use "
                           "`jax.experimental.multihost_utils.process_allgather` "
                           "for this use case.")

      self.copy_to_host_async()
      npy_value = np.empty(self.shape, self.dtype)

      try:
        self.addressable_shards[0].replica_id
        replica_id_exists = True
      except ValueError:
        replica_id_exists = False

      for s in self.addressable_shards:
        if not replica_id_exists or s.replica_id == 0:
          npy_value[s.index] = np.asarray(s.data._arrays[0])  # type: ignore  # [union-attr]
      self._npy_value = npy_value  # type: ignore
    # https://docs.python.org/3/library/typing.html#typing.cast
    return cast(np.ndarray, self._npy_value)

# explicitly set to be unhashable. Same as what device_array.py does.
setattr(Array, "__hash__", None)
setattr(Array, "__array_priority__", 100)

def make_array_from_callback(shape: Shape, sharding: Sharding,
                             data_callback: Callable[[Optional[Index]], ArrayLike]) -> Array:
  device_to_index_map = sharding.devices_indices_map(shape)
  # Use addressable_devices here instead of `_addressable_device_assignment`
  # because `_addressable_device_assignment` is only available on
  # `XLACompatibleSharding` and this function is supposed to work for every
  # `Sharding`.
  arrays = [
      device_put(data_callback(device_to_index_map[device]), device)
      for device in sharding.addressable_devices
  ]
  aval = core.ShapedArray(shape, arrays[0].dtype, weak_type=False)
  return Array(aval, sharding, arrays, committed=True)


core.pytype_aval_mappings[Array] = abstract_arrays.canonical_concrete_aval
xla.pytype_aval_mappings[Array] = op.attrgetter('aval')
xla.canonicalize_dtype_handlers[Array] = pxla.identity
api_util._shaped_abstractify_handlers[Array] = op.attrgetter('aval')
ad_util.jaxval_adders[Array] = lax_internal.add
ad_util.jaxval_zeros_likers[Array] = lax_internal.zeros_like_array
ndarray.register(Array)


def _array_mlir_constant_handler(val, canonicalize_types=True):
  return mlir.ir_constants(val._value,
                           canonicalize_types=canonicalize_types)
mlir.register_constant_handler(Array, _array_mlir_constant_handler)


def _device_put_array(x, device: Optional[Device]):
  # TODO(yashkatariya): Remove this restriction and the round trip via host
  # once lowering to XLA goes through `lower_mesh_computation`.
  assert x.is_fully_addressable()
  if dispatch.is_single_device_sharding(x.sharding):
    x = dispatch._copy_device_array_to_device(pxla._set_aval(x._arrays[0]), device)
    return (x,)
  else:
    # Round trip via host if x is sharded. SDA also does a round trip via host.
    return dispatch._device_put_array(x._value, device)

dispatch.device_put_handlers[Array] = _device_put_array


def _array_pmap_shard_arg(x, devices, indices, mode):
  if dispatch.is_single_device_sharding(x.sharding):
    return pxla._shard_device_array(x, devices, indices, mode)

  if x._fast_path_args is None:
    x_indices = tuple(x.sharding.devices_indices_map(x.shape).values())
  else:
    x_indices = tuple(x._fast_path_args.devices_indices_map.values())

  # If the sharding of Array does not match pmap's sharding then take the slow
  # path which is similar to what SDA does. This slow path reroute only happens
  # for `pmap`.
  if indices == x_indices:
    return [buf if buf.device() == d else buf.copy_to_device(d)
            for buf, d in safe_zip(x._arrays, devices)]
  else:
    return pxla._shard_sharded_device_array_slow_path(x, devices, indices, mode)


def _array_shard_arg(x, devices, indices, mode):
  if mode == pxla.InputsHandlerMode.pmap:
    return _array_pmap_shard_arg(x, devices, indices, mode)
  else:
    if dispatch.is_single_device_sharding(x.sharding):
      return [buf if buf.device() == d else buf.copy_to_device(d)
              for buf, d in safe_zip(x._arrays, devices)]
    # If PmapSharding exists, then do a round trip via host. This will happen
    # if the input Array containing PmapSharding takes the jit path
    # i.e. `apply_primitive` or `xla_callable_uncached`. `jit(pmap)` is the most
    # common case where this will happen.
    elif isinstance(x.sharding, PmapSharding):
      return pxla.device_put(x._value, devices, replicate=True)
    else:
      return x._arrays
pxla.shard_arg_handlers[Array] = _array_shard_arg


def _array_global_result_handler(global_aval, out_sharding, committed,
                                 is_out_sharding_from_xla):
  if global_aval.dtype == dtypes.float0:
    return lambda _: np.zeros(global_aval.shape, dtypes.float0)  # type: ignore
  if core.is_opaque_dtype(global_aval.dtype):
    return global_aval.dtype._rules.global_sharded_result_handler(
        global_aval, out_sharding, committed, is_out_sharding_from_xla)

  # Calculate the indices and addressable device assignment once during
  # compilation and pass it to the constructor.
  _array_fast_path_args = _ArrayFastPathArgs(
      out_sharding.devices_indices_map(global_aval.shape),
      out_sharding._addressable_device_assignment)
  return lambda bufs: Array(global_aval, out_sharding, bufs,
                            committed=committed, _skip_checks=True,
                            _fast_path_args=_array_fast_path_args)
pxla.global_result_handlers[(core.ShapedArray, pxla.OutputType.Array)] = _array_global_result_handler
pxla.global_result_handlers[(core.ConcreteArray, pxla.OutputType.Array)] = _array_global_result_handler
pxla.global_result_handlers[(core.AbstractToken, pxla.OutputType.Array)] = lambda *_: lambda *_: core.token


# Only used for Arrays that come out of pmap.
def _array_local_result_handler(aval, sharding, indices):
  if core.is_opaque_dtype(aval.dtype):
    return aval.dtype._rules.local_sharded_result_handler(
        aval, sharding, indices)

  # Calculate the indices and addressable device assignment once during
  # compilation and pass it to the constructor.
  _array_fast_path_args = _ArrayFastPathArgs(
      sharding.devices_indices_map(aval.shape),
      sharding._addressable_device_assignment)
  return lambda bufs: Array(aval, sharding, bufs, committed=True,
                            _skip_checks=True, _fast_path_args=_array_fast_path_args)
pxla.local_result_handlers[(core.ShapedArray, pxla.OutputType.Array)] = _array_local_result_handler
pxla.local_result_handlers[(core.ConcreteArray, pxla.OutputType.Array)] = _array_local_result_handler
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								# Copyright 2021 Google LLC
 								#
 								# Licensed under the Apache License, Version 2.0 (the "License");
 								# you may not use this file except in compliance with the License.
 								# You may obtain a copy of the License at
 								#
 								#     https://www.apache.org/licenses/LICENSE-2.0
 								#
 								# Unless required by applicable law or agreed to in writing, software
 								# distributed under the License is distributed on an "AS IS" BASIS,
 								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								# See the License for the specific language governing permissions and
 								# limitations under the License.
 								from __future__ import annotations
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								import operator as op
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								import numpy as np
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								from typing import (Sequence, Tuple, Callable, Union, Optional, cast, List,
-												Introduce class PyArray that contains the data members of python Array.

A few key methods is implemented in C++ while the rest are still implmemented in python and added to the class later. A class decorator, @use_cpp_array, is added to add python methods to xc.Array.

PiperOrigin-RevId: 473075244

											
										
										
											2022-09-08 13:47:57 -07:00
+								                    NamedTuple, Mapping, TYPE_CHECKING)
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add pjit support for `Array`. `Array` takes the same codepath as GDA so there are very little modifications to pjit. Add handlers aval, shard_args and result handlers for Array.

PiperOrigin-RevId: 454160854

											
										
										
											2022-06-10 07:31:43 -07:00
+								from jax import core
-												Array bug: make pytype_aval_mapping return a ConcreteShapedArray

											
										
										
											2022-08-18 12:31:30 -07:00
+								from jax._src import abstract_arrays
-												Make all pmap tests pass with Array! I am skipping all soft pmap tests for now.

PiperOrigin-RevId: 467264992

											
										
										
											2022-08-12 12:09:22 -07:00
+								from jax._src import ad_util
-												add an optional fastpath for api_util.shaped_abstractify

also add a benchmark for it, 8.7ms -> 0.2ms on my machine

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2022-07-27 10:54:54 -07:00
+								from jax._src import api_util
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
+								from jax._src import dispatch
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
+								from jax._src import dtypes
-												Make all pmap tests pass with Array! I am skipping all soft pmap tests for now.

PiperOrigin-RevId: 467264992

											
										
										
											2022-08-12 12:09:22 -07:00
+								from jax._src.lax import lax as lax_internal
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								from jax._src.config import config
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								from jax._src.util import prod, safe_zip
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								from jax._src.lib import xla_client as xc
 								from jax._src.api import device_put
-												Avoid import-time dependency on jax.experimental

											
										
										
											2022-08-19 11:30:25 -07:00
+								from jax._src.numpy.ndarray import ndarray
-												Make all pmap tests pass with Array! I am skipping all soft pmap tests for now.

PiperOrigin-RevId: 467264992

											
										
										
											2022-08-12 12:09:22 -07:00
+								from jax.interpreters import pxla, xla, mlir
-												Remove `device_replica_id_map` from the Sharding interface because the standalone function should be more than enough to use. The major use-case of this is for checkpointing and accessing addressable_shards which accesses the standalone function makes it work.

PiperOrigin-RevId: 470820443

											
										
										
											2022-08-29 14:49:17 -07:00
+								from jax.experimental.sharding import (
-												Remove the fallback to `lower_xla_callable` that exist for `jit(pmap)` cases when `Array` was enabled and add minimal support to `lower_sharding_computation`.

The `jit(pmap)` codepath is added to `lower_sharding_computation` to delete the `lower_xla_callable` codepath when `jax.Array` is enabled by default. This will help in cleaning up the codebase and get rid of tech debt.

* Round trip through host for `Array`'s that have PmapSharding and come through the `jit` path (exactly like SDAs).

* For other cases i.e. when `num_replicas > 1`, default to the `_execute_replicated` path in dispatch.py from `lower_sharding_computation`. This is exactly same to what happens in `lower_xla_callable`.

PiperOrigin-RevId: 471033420

											
										
										
											2022-08-30 10:45:29 -07:00
+								    Sharding, SingleDeviceSharding, XLACompatibleSharding, PmapSharding,
 								    device_replica_id_map)
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
 								Shape = Tuple[int, ...]
 								Device = xc.Device
 								DeviceArray = xc.Buffer
 								Index = Tuple[slice, ...]
 								ArrayLike = Union[np.ndarray, DeviceArray]
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								class _ArrayFastPathArgs(NamedTuple):
 								  devices_indices_map: Mapping[Device, Optional[Index]]
 								  addressable_device_assignment: Sequence[Device]
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								class Shard:
 								  """A single data shard of an Array.
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								  Attributes:
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								    device : Which device this shard resides on.
 								    index : The index into the global array of this shard.
 								    replica_id : Integer id indicating which replica of the global array this
 								      shard is part of. Always 0 for fully sharded data
 								      (i.e. when there’s only 1 replica).
 								    data : The data of this shard. None if ``device`` is non-local.
 								  """
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
 								  def __init__(self, device: Device, sharding: Sharding, global_shape: Shape,
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								               data: Optional[Array] = None,
 								               _fast_path_args: Optional[_ArrayFastPathArgs] = None):
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								    self.device = device
 								    self._sharding = sharding
 								    self._global_shape = global_shape
 								    self.data = data
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								    self._fast_path_args = _fast_path_args
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
-												Add `__repr__` to Shard since its not a dataclass anymore

PiperOrigin-RevId: 456463979

											
										
										
											2022-06-22 02:25:34 -07:00
+								  def __repr__(self):
 								    try:
 								      return (f'Shard(device={repr(self.device)}, index={self.index}, '
 								              f'replica_id={self.replica_id}, data={self.data})')
 								    except ValueError:
 								      return f'Shard(device={repr(self.device)}, data={self.data})'
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								  @property
 								  def index(self) -> Index:
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								    if self._fast_path_args is None:
 								      try:
 								        device_indices_map_fn = self._sharding.devices_indices_map
 								      except AttributeError:
 								        raise ValueError('Cannot calculate indices from sharding: '
 								                         f'{self._sharding}. Please create a device to index '
 								                         'mapping for your sharding.') from None
 								      index = device_indices_map_fn(self._global_shape)[self.device]
 								    else:
 								      index = self._fast_path_args.devices_indices_map[self.device]
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								    assert index is not None
 								    return index
 								  @property
 								  def replica_id(self) -> int:
-												Remove `device_replica_id_map` from the Sharding interface because the standalone function should be more than enough to use. The major use-case of this is for checkpointing and accessing addressable_shards which accesses the standalone function makes it work.

PiperOrigin-RevId: 470820443

											
										
										
											2022-08-29 14:49:17 -07:00
+								    return device_replica_id_map(self._sharding, self._global_shape)[self.device]
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								def _reconstruct_array(fun, args, arr_state, aval_state):
 								  """Method to reconstruct a device array from a serialized state."""
 								  np_value = fun(*args)
 								  np_value.__setstate__(arr_state)
 								  jnp_value = device_put(np_value)
 								  jnp_value.aval = jnp_value.aval.update(**aval_state)
 								  return jnp_value
-												Use C++ Array in pmap path and move PmapSharding to cpp

PiperOrigin-RevId: 474151089

											
										
										
											2022-09-13 16:18:31 -07:00
 								@pxla.use_cpp_class(xc.Array if xc._version >= 92 else None)
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								class Array:
 								  # TODO(yashkatariya): Add __slots__ here.
-												Use C++ Array in pmap path and move PmapSharding to cpp

PiperOrigin-RevId: 474151089

											
										
										
											2022-09-13 16:18:31 -07:00
+								  @pxla.use_cpp_method
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								  def __init__(self, aval: core.ShapedArray, sharding: Sharding,
-												Enable fast path in the Array constructor. This means that the rearranging of `_arrays` according to the device_assignment won't happen when fastpath is enabled because we assume that jax transformations will return the right arrangement.

PiperOrigin-RevId: 469492283

											
										
										
											2022-08-23 10:19:59 -07:00
+								               arrays: Union[Sequence[DeviceArray], Sequence[Array]],
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								               committed: bool, _skip_checks: bool = False,
 								               _fast_path_args: Optional[_ArrayFastPathArgs] = None):
-												Introduce class PyArray that contains the data members of python Array.

A few key methods is implemented in C++ while the rest are still implmemented in python and added to the class later. A class decorator, @use_cpp_array, is added to add python methods to xc.Array.

PiperOrigin-RevId: 473075244

											
										
										
											2022-09-08 13:47:57 -07:00
+								    # NOTE: the actual implementation of the constructor is moved to C++.
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    self.aval = aval
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								    self._sharding = sharding
-												Return `Array` from `jax.device_put` if `config.jax_array` is enabled.

PiperOrigin-RevId: 456531510

											
										
										
											2022-06-22 09:20:26 -07:00
+								    # Extract DeviceArrays from arrays with `SingleDeviceSharding` to keep the
 								    # code handling `self._arrays` simpler.
 								    # TODO(yashkatariya): This will be slower as it will happen during
 								    # `__init__` on single controller environment. Make it lazy.
 								    self._arrays: List[DeviceArray] = [a if isinstance(a, DeviceArray) else a._arrays[0]
 								                                       for a in arrays]
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								    # See https://jax.readthedocs.io/en/latest/faq.html#controlling-data-and-computation-placement-on-devices
 								    # for what committed means.
 								    self._committed = committed
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								    # Optionally precomputed for performance.
 								    self._fast_path_args = _fast_path_args
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    self._npy_value = None
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Check if the buffer shape matches the excepted shard shape by Array.

PiperOrigin-RevId: 470732792

											
										
										
											2022-08-29 09:00:03 -07:00
+								    if not _skip_checks or config.jax_enable_checks:
-												Introduce class PyArray that contains the data members of python Array.

A few key methods is implemented in C++ while the rest are still implmemented in python and added to the class later. A class decorator, @use_cpp_array, is added to add python methods to xc.Array.

PiperOrigin-RevId: 473075244

											
										
										
											2022-09-08 13:47:57 -07:00
+								      self._check()
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Enable fast path in the Array constructor. This means that the rearranging of `_arrays` according to the device_assignment won't happen when fastpath is enabled because we assume that jax transformations will return the right arrangement.

PiperOrigin-RevId: 469492283

											
										
										
											2022-08-23 10:19:59 -07:00
+								    # Don't rearrange if skip_checks is enabled because this assumes that the
 								    # input buffers are already arranged properly. This usually happens when
 								    # Array's are created as output of a JAX transformation
 								    # (like pjit, xmap, etc).
 								    if not _skip_checks:
-												Introduce class PyArray that contains the data members of python Array.

A few key methods is implemented in C++ while the rest are still implmemented in python and added to the class later. A class decorator, @use_cpp_array, is added to add python methods to xc.Array.

PiperOrigin-RevId: 473075244

											
										
										
											2022-09-08 13:47:57 -07:00
+								      self._rearrange()
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Introduce class PyArray that contains the data members of python Array.

A few key methods is implemented in C++ while the rest are still implmemented in python and added to the class later. A class decorator, @use_cpp_array, is added to add python methods to xc.Array.

PiperOrigin-RevId: 473075244

											
										
										
											2022-09-08 13:47:57 -07:00
+								  def _check(self):
 								    ss = self.sharding.shard_shape(self.shape)
 								    for db in self._arrays:
 								      if db.shape != ss:
 								        raise ValueError(
 								            f"Expected shard shape {ss} doesn't match the buffer "
 								            f"shape {db.shape} for buffer: {db}")
 								    for db in self._arrays:
 								      if db.dtype != self.dtype:
 								        raise ValueError(
 								            "Input buffers to `Array` must have matching dtypes. "
 								            f"Got {db.dtype}, expected {self.dtype} for buffer: {db}")
 								  def _rearrange(self):
 								    # Rearrange arrays based on the device assignment.
 								    # TODO(yashkatariya): Add a similar check for shardings that are not
 								    # XLACompatibleSharding. But leave the rearragement to XLACompatibleSharding
 								    # only.
 								    if isinstance(self.sharding, XLACompatibleSharding):
 								      if self._fast_path_args is None:
 								        addressable_da = cast(XLACompatibleSharding, self.sharding)._addressable_device_assignment
 								      else:
 								        addressable_da = self._fast_path_args.addressable_device_assignment
 								      if len(self._arrays) != len(addressable_da):
 								        raise ValueError(
 								            f"Expected {len(addressable_da)} per-device arrays "
 								            "(this is how many devices are addressable by the sharding), but "
 								            f"got {len(self._arrays)}")
 								      device_to_buffer = {db.device().id: db for db in self._arrays}
 								      try:
 								        self._arrays = [device_to_buffer[device.id]
 								                        for device in addressable_da]
 								      except KeyError as e:
 								        array_device_ids = set(a.device().id for a in self._arrays)
 								        addressable_device_ids = set(d.id for d in addressable_da)
 								        diff = set(array_device_ids) - set(addressable_device_ids)
 								        raise ValueError(
 								            f"Some per-device arrays are placed on devices {diff}, which are "
 								            f"not used in the specified sharding {self.sharding}") from e
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  @property
 								  def shape(self) -> Shape:
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    return self.aval.shape
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
+								  @property
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								  def dtype(self):
 								    return self.aval.dtype
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  @property
 								  def ndim(self):
 								    return len(self.shape)
 								  @property
 								  def size(self):
 								    return prod(self.shape)
 								  @property
 								  def sharding(self):
 								    return self._sharding
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
+								  def __str__(self):
 								    return str(self._value)
 								  def __len__(self):
 								    try:
 								      return self.shape[0]
 								    except IndexError as err:
 								      raise TypeError("len() of unsized object") from err  # same as numpy error
 								  def __bool__(self):
 								    return bool(self._value)
 								  def __nonzero__(self):
 								    return bool(self._value)
 								  def __float__(self):
 								    return self._value.__float__()
 								  def __int__(self):
 								    return self._value.__int__()
 								  def __complex__(self):
 								    return self._value.__complex__()
 								  def __hex__(self):
 								    assert self.ndim == 0, 'hex only works on scalar values'
 								    return hex(self._value)  # type: ignore
 								  def __oct__(self):
 								    assert self.ndim == 0, 'oct only works on scalar values'
 								    return oct(self._value)  # type: ignore
 								  def __index__(self):
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    return op.index(self._value)
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								  def tobytes(self, order="C"):
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
+								    return self._value.tobytes(order)
 								  def tolist(self):
 								    return self._value.tolist()
 								  def __format__(self, format_spec):
 								    # Simulates behavior of https://github.com/numpy/numpy/pull/9883
 								    if self.ndim == 0:
 								      return format(self._value[()], format_spec)
 								    else:
 								      return format(self._value, format_spec)
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
+								  def __getitem__(self, idx):
 								    from jax._src.numpy import lax_numpy
 								    self._check_if_deleted()
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								    if dispatch.is_single_device_sharding(self.sharding):
 								      return lax_numpy._rewriting_take(self, idx)
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
+								    # TODO(yashkatariya): Make it work for other Shardings too wherever its
 								    # possible to not do data movement.
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								    elif isinstance(self.sharding, PmapSharding):
 								      if not isinstance(idx, tuple):
 								        cidx = (idx,) + (slice(None),) * (len(self.shape) - 1)
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
+								      else:
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								        cidx = idx + (slice(None),) * (len(self.shape) - len(idx))
 								      if self._npy_value is None:
 								        if self._fast_path_args is None:
 								          indices = tuple(self.sharding.devices_indices_map(self.shape).values())
 								        else:
 								          indices = tuple(self._fast_path_args.devices_indices_map.values())
 								        try:
 								          buf_idx = indices.index(cidx)
 								        except ValueError:
 								          buf_idx = None
 								        if buf_idx is not None:
 								          buf = self._arrays[buf_idx]
 								          aval = core.ShapedArray(buf.xla_shape().dimensions(), self.dtype)
 								          return Array(aval, SingleDeviceSharding(buf.device()), [buf],
 								                       committed=False, _skip_checks=True)
 								      return lax_numpy._rewriting_take(self, idx)
 								    else:
 								      # TODO(yashkatariya): Don't bounce to host and use `_rewriting_take` or
 								      # the fast path (see PmapSharding branch above) after b/245667823 is
 								      # fixed.
 								      return self._value[idx]
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
+								  def __iter__(self):
 								    if self.ndim == 0:
 								      raise TypeError("iteration over a 0-d array")  # same as numpy error
 								    else:
-												Make `__iter__` of `Array` behave like DA when there is a SingleDeviceSharding and like SDA when there is a non-trivial sharding.

This is important because when `Array` contains more than 1 shard, each shard can be on a different device and those things need to be preserved when iterating over `Array`.

PiperOrigin-RevId: 471695841

											
										
										
											2022-09-01 19:53:58 -07:00
+								      assert self.is_fully_replicated() or self.is_fully_addressable()
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								      if dispatch.is_single_device_sharding(self.sharding):
 								        return (sl for chunk in self._chunk_iter(100) for sl in chunk._unstack())  # type: ignore
 								      elif isinstance(self.sharding, PmapSharding):
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
+								        return (self[i] for i in range(self.shape[0]))  # type: ignore
-												Make `__iter__` of `Array` behave like DA when there is a SingleDeviceSharding and like SDA when there is a non-trivial sharding.

This is important because when `Array` contains more than 1 shard, each shard can be on a different device and those things need to be preserved when iterating over `Array`.

PiperOrigin-RevId: 471695841

											
										
										
											2022-09-01 19:53:58 -07:00
+								      else:
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								        # TODO(yashkatariya): Don't bounce to host and use `_chunk_iter` path
 								        # here after b/245667823 is fixed.
 								        return (self._value[i] for i in range(self.shape[0]))
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
 								  def item(self):
 								    if dtypes.issubdtype(self.dtype, np.complexfloating):
 								      return complex(self)
 								    elif dtypes.issubdtype(self.dtype, np.floating):
 								      return float(self)
 								    elif dtypes.issubdtype(self.dtype, np.integer):
 								      return int(self)
 								    elif dtypes.issubdtype(self.dtype, np.bool_):
 								      return bool(self)
 								    else:
 								      raise TypeError(self.dtype)
-												`__repr__` if an Array is fully replicated. Its the same for `_value` so it makes sense to do the same for `__repr__`.

PiperOrigin-RevId: 469892350

											
										
										
											2022-08-24 20:41:48 -07:00
+								  def is_fully_replicated(self) -> bool:
 								    return self.shape == self._arrays[0].shape
-												Add `__repr__` to `Array`. It works exactly as it does for DA and SDA when it is fully addressable. Otherwise it works like GDA.

TODO is adding weak_type support in general and to `__repr__`.

PiperOrigin-RevId: 455680796

											
										
										
											2022-06-17 13:11:52 -07:00
+								  def __repr__(self):
 								    prefix = '{}('.format(self.__class__.__name__.lstrip('_'))
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    if self.aval is not None and self.aval.weak_type:
 								      dtype_str = f'dtype={self.dtype.name}, weak_type=True)'
 								    else:
 								      dtype_str = f'dtype={self.dtype.name})'
-												Add `__repr__` to `Array`. It works exactly as it does for DA and SDA when it is fully addressable. Otherwise it works like GDA.

TODO is adding weak_type support in general and to `__repr__`.

PiperOrigin-RevId: 455680796

											
										
										
											2022-06-17 13:11:52 -07:00
-												`__repr__` if an Array is fully replicated. Its the same for `_value` so it makes sense to do the same for `__repr__`.

PiperOrigin-RevId: 469892350

											
										
										
											2022-08-24 20:41:48 -07:00
+								    if self.is_fully_addressable() or self.is_fully_replicated():
-												Add `__repr__` to `Array`. It works exactly as it does for DA and SDA when it is fully addressable. Otherwise it works like GDA.

TODO is adding weak_type support in general and to `__repr__`.

PiperOrigin-RevId: 455680796

											
										
										
											2022-06-17 13:11:52 -07:00
+								      line_width = np.get_printoptions()["linewidth"]
 								      s = np.array2string(self._value, prefix=prefix, suffix=',',
 								                          separator=', ', max_line_width=line_width)
 								      last_line_len = len(s) - s.rfind('\n') + 1
 								      sep = ' '
 								      if last_line_len + len(dtype_str) + 1 > line_width:
 								        sep = ' ' * len(prefix)
 								      return f"{prefix}{s},{sep}{dtype_str}"
 								    else:
-												Allow `to_py()` on Array if the sharding is fully replicated.

PiperOrigin-RevId: 469617855

											
										
										
											2022-08-23 19:48:59 -07:00
+								      return f"{prefix}{self.shape}, {dtype_str}"
-												Add `__repr__` to `Array`. It works exactly as it does for DA and SDA when it is fully addressable. Otherwise it works like GDA.

TODO is adding weak_type support in general and to `__repr__`.

PiperOrigin-RevId: 455680796

											
										
										
											2022-06-17 13:11:52 -07:00
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  def is_fully_addressable(self) -> bool:
-												Make lower_mesh_computation accept sharding instances. The new path is tested as everything in pjit goes through the new `lower_sharding_computation` except of `AUTO` and `UNSPECIFIED` (see below for these 2).

* Split `lower_mesh_computation` into `lower_mesh_computation` and `lower_sharding_computation`. This is because `lower_mesh_computation` handles 3 paths; `spmd lowering path`, `non-spmd lowering path` and `xmap spmd lowering path`. I didn't want to add a 4th path to it for general shardings.
  * `lower_sharding_computation` works in SPMD mode since its only used in pjit. Majority of the logic is the same. The only difference is that `mesh` does not exist in this function.

* `MeshComputation` is the point where `lower_mesh_computation` and `lower_sharding_computation` merge.

* `AUTO` and `UNSPECIFIED` cannot be used without mesh right now but I have a CL to fix this.

* Rest of the changes are to make all other functions play nicely with sharding instances.

PiperOrigin-RevId: 461260553

											
										
										
											2022-07-15 16:12:42 -07:00
+								    return self.sharding.is_fully_addressable()
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add context to `__array__` of Array just like DA.

PiperOrigin-RevId: 469874479

											
										
										
											2022-08-24 18:27:40 -07:00
+								  def __array__(self, dtype=None, context=None):
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    return np.asarray(self._value, dtype=dtype)
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								  def __dlpack__(self):
 								    from jax.dlpack import to_dlpack  # pylint: disable=g-import-not-at-top
 								    return to_dlpack(self)
 								  def __reduce__(self):
-												Check if the buffer shape matches the excepted shard shape by Array.

PiperOrigin-RevId: 470732792

											
										
										
											2022-08-29 09:00:03 -07:00
+								    fun, args, arr_state = self._value.__reduce__()  # type: ignore
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								    aval_state = {'weak_type': self.aval.weak_type,
 								                  'named_shape': self.aval.named_shape}
 								    return (_reconstruct_array, (fun, args, arr_state, aval_state))
-												Make jit == pjit. This means that the lowering and execution paths of jit and pjit are merged.

A fallback to `lower_xla_callable` is taken when pmap appears in the jaxpr during the jit lowering path.

Added support for `keep_unused`, `committed` and `core.Token` to pxla.py.

PiperOrigin-RevId: 470896270

											
										
										
											2022-08-29 22:02:32 -07:00
+								  def unsafe_buffer_pointer(self):
 								    assert len(self._arrays) == 1
 								    return self._arrays[0].unsafe_buffer_pointer()
 								  @property
 								  def __cuda_array_interface__(self):
 								    assert len(self._arrays) == 1
 								    return self._arrays[0].__cuda_array_interface__  # pytype: disable=attribute-error  # bind-properties
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								  # TODO(yashkatariya): Remove this method when everyone is using devices().
 								  def device(self) -> Device:
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								    self._check_if_deleted()
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    device_set = self.sharding.device_set
 								    if len(device_set) == 1:
 								      single_device, = device_set
 								      return single_device
 								    raise ValueError('Length of devices is greater than 1. '
 								                     'Please use `.devices()`.')
 								  def devices(self) -> List[Device]:
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								    self._check_if_deleted()
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    return list(self.sharding.device_set)
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								  @pxla.maybe_cached_property
 								  def addressable_shards(self) -> Sequence[Shard]:
-												Fix the _check_if_deleted check that was merged at the wrong place by the cider merging machinery.

PiperOrigin-RevId: 454912448

											
										
										
											2022-06-14 11:23:07 -07:00
+								    self._check_if_deleted()
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								    out = []
 								    for db in self._arrays:
 								      db = pxla._set_aval(db)
 								      device = db.device()
 								      # Wrap the device arrays in `Array` until C++ returns an Array instead
 								      # of a DA.
-												Some minor changes to make_array_from_callback to use the device_indices_map method and calculate the indices just once. Also set the `_committed` attribute of shards to what the parent Array has.

PiperOrigin-RevId: 471167295

											
										
										
											2022-08-30 21:56:39 -07:00
+								      array = Array(db.aval, SingleDeviceSharding(device), [db],
 								                    committed=self._committed, _skip_checks=True)
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								      out.append(Shard(
 								          device, self.sharding, self.shape, array, self._fast_path_args))
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								    return out
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								  def delete(self):
 								    if self._arrays is None:
 								      return
 								    for buf in self._arrays:
 								      buf.delete()
 								    self._arrays = None
 								    self._npy_value = None
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								  def is_deleted(self):
-												Enable `testArrayCopy` now that its fixed.

PiperOrigin-RevId: 473088085

											
										
										
											2022-09-08 14:39:12 -07:00
+								    if self._arrays is None:
 								      return True
 								    # This path is taken when a view of `Array` is created and the original
 								    # Array is deleted. In that case, the buffers the view represents also get
 								    # deleted.
 								    return any(buf.is_deleted() for buf in self._arrays)
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								  def _check_if_deleted(self):
 								    if self._arrays is None:
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								      raise RuntimeError("Array has been deleted.")
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
-												Use C++ Array in pmap path and move PmapSharding to cpp

PiperOrigin-RevId: 474151089

											
										
										
											2022-09-13 16:18:31 -07:00
+								  @pxla.use_cpp_method
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								  def block_until_ready(self):
 								    self._check_if_deleted()
 								    for db in self._arrays:
 								      db.block_until_ready()
 								    return self
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  def copy_to_host_async(self):
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    self._check_if_deleted()
 								    if self._npy_value is None:
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								      try:
 								        self.addressable_shards[0].replica_id
 								        replica_id_exists = True
 								      except ValueError:
 								        replica_id_exists = False
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								      for s in self.addressable_shards:
-												Fix the _check_if_deleted check that was merged at the wrong place by the cider merging machinery.

PiperOrigin-RevId: 454912448

											
										
										
											2022-06-14 11:23:07 -07:00
+								        if not replica_id_exists or s.replica_id == 0:
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								          s.data._arrays[0].copy_to_host_async()  # pytype: disable=attribute-error
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								  @property
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  def _value(self) -> np.ndarray:
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    self._check_if_deleted()
-												Allow `to_py()` on Array if the sharding is fully replicated.

PiperOrigin-RevId: 469617855

											
										
										
											2022-08-23 19:48:59 -07:00
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    if self._npy_value is None:
-												`__repr__` if an Array is fully replicated. Its the same for `_value` so it makes sense to do the same for `__repr__`.

PiperOrigin-RevId: 469892350

											
										
										
											2022-08-24 20:41:48 -07:00
+								      if self.is_fully_replicated():
 								        self._npy_value = np.asarray(self._arrays[0])  # type: ignore
 								        return cast(np.ndarray, self._npy_value)
-												Allow `to_py()` on Array if the sharding is fully replicated.

PiperOrigin-RevId: 469617855

											
										
										
											2022-08-23 19:48:59 -07:00
 								      if not self.is_fully_addressable():
 								        raise RuntimeError("Fetching value for `jax.Array` that spans "
 								                           "non-addressable devices is not possible. You can use "
 								                           "`jax.experimental.multihost_utils.process_allgather` "
 								                           "for this use case.")
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								      self.copy_to_host_async()
 								      npy_value = np.empty(self.shape, self.dtype)
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
 								      try:
 								        self.addressable_shards[0].replica_id
 								        replica_id_exists = True
 								      except ValueError:
 								        replica_id_exists = False
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								      for s in self.addressable_shards:
-												Fix the _check_if_deleted check that was merged at the wrong place by the cider merging machinery.

PiperOrigin-RevId: 454912448

											
										
										
											2022-06-14 11:23:07 -07:00
+								        if not replica_id_exists or s.replica_id == 0:
-												[JAX] Deprecate .to_py() property on arrays. Implement __array__ instead.

.to_py() was something of an accidental export from the JAX array classes. There are other mechanisms to turn a JAX array into a NumPy array, including `np.asarray(x)` and `jax.device_get(x)`. Deprecate this mechanism because it is redundant.

PiperOrigin-RevId: 469984029

											
										
										
											2022-08-25 07:27:54 -07:00
+								          npy_value[s.index] = np.asarray(s.data._arrays[0])  # type: ignore  # [union-attr]
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								      self._npy_value = npy_value  # type: ignore
 								    # https://docs.python.org/3/library/typing.html#typing.cast
 								    return cast(np.ndarray, self._npy_value)
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								# explicitly set to be unhashable. Same as what device_array.py does.
 								setattr(Array, "__hash__", None)
-												Enabled cpp_jit test with Array now that Array is in C++. Also check for the unsafe_buffer_pointer in the trivial_computation tests rather than an identity check. Also set the array priority to 100 and that makes the float0 test pass.

PiperOrigin-RevId: 473323695

											
										
										
											2022-09-09 13:03:35 -07:00
+								setattr(Array, "__array_priority__", 100)
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								def make_array_from_callback(shape: Shape, sharding: Sharding,
 								                             data_callback: Callable[[Optional[Index]], ArrayLike]) -> Array:
-												Some minor changes to make_array_from_callback to use the device_indices_map method and calculate the indices just once. Also set the `_committed` attribute of shards to what the parent Array has.

PiperOrigin-RevId: 471167295

											
										
										
											2022-08-30 21:56:39 -07:00
+								  device_to_index_map = sharding.devices_indices_map(shape)
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								  # Use addressable_devices here instead of `_addressable_device_assignment`
 								  # because `_addressable_device_assignment` is only available on
 								  # `XLACompatibleSharding` and this function is supposed to work for every
 								  # `Sharding`.
-												Return `Array` from `jax.device_put` if `config.jax_array` is enabled.

PiperOrigin-RevId: 456531510

											
										
										
											2022-06-22 09:20:26 -07:00
+								  arrays = [
-												Some minor changes to make_array_from_callback to use the device_indices_map method and calculate the indices just once. Also set the `_committed` attribute of shards to what the parent Array has.

PiperOrigin-RevId: 471167295

											
										
										
											2022-08-30 21:56:39 -07:00
+								      device_put(data_callback(device_to_index_map[device]), device)
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								      for device in sharding.addressable_devices
 								  ]
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								  aval = core.ShapedArray(shape, arrays[0].dtype, weak_type=False)
 								  return Array(aval, sharding, arrays, committed=True)
-												Add pjit support for `Array`. `Array` takes the same codepath as GDA so there are very little modifications to pjit. Add handlers aval, shard_args and result handlers for Array.

PiperOrigin-RevId: 454160854

											
										
										
											2022-06-10 07:31:43 -07:00
-												Array bug: make pytype_aval_mapping return a ConcreteShapedArray

											
										
										
											2022-08-18 12:31:30 -07:00
+								core.pytype_aval_mappings[Array] = abstract_arrays.canonical_concrete_aval
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								xla.pytype_aval_mappings[Array] = op.attrgetter('aval')
-												Add pjit support for `Array`. `Array` takes the same codepath as GDA so there are very little modifications to pjit. Add handlers aval, shard_args and result handlers for Array.

PiperOrigin-RevId: 454160854

											
										
										
											2022-06-10 07:31:43 -07:00
+								xla.canonicalize_dtype_handlers[Array] = pxla.identity
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								api_util._shaped_abstractify_handlers[Array] = op.attrgetter('aval')
-												Make all pmap tests pass with Array! I am skipping all soft pmap tests for now.

PiperOrigin-RevId: 467264992

											
										
										
											2022-08-12 12:09:22 -07:00
+								ad_util.jaxval_adders[Array] = lax_internal.add
 								ad_util.jaxval_zeros_likers[Array] = lax_internal.zeros_like_array
-												Avoid import-time dependency on jax.experimental

											
										
										
											2022-08-19 11:30:25 -07:00
+								ndarray.register(Array)
-												Make all pmap tests pass with Array! I am skipping all soft pmap tests for now.

PiperOrigin-RevId: 467264992

											
										
										
											2022-08-12 12:09:22 -07:00
 								def _array_mlir_constant_handler(val, canonicalize_types=True):
 								  return mlir.ir_constants(val._value,
 								                           canonicalize_types=canonicalize_types)
 								mlir.register_constant_handler(Array, _array_mlir_constant_handler)
-												Add pjit support for `Array`. `Array` takes the same codepath as GDA so there are very little modifications to pjit. Add handlers aval, shard_args and result handlers for Array.

PiperOrigin-RevId: 454160854

											
										
										
											2022-06-10 07:31:43 -07:00
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
 								def _device_put_array(x, device: Optional[Device]):
-												Make `astype` work for `Array` that are sharded. The current behavior is the same as SDA i.e. it round trips via host.

PiperOrigin-RevId: 457797458

											
										
										
											2022-06-28 12:48:39 -07:00
+								  # TODO(yashkatariya): Remove this restriction and the round trip via host
 								  # once lowering to XLA goes through `lower_mesh_computation`.
 								  assert x.is_fully_addressable()
-												Fix Forward. The fix is on the user's end. Original PR: https://github.com/google/jax/pull/12217

Co-authored-by: Matthew Johnson <mattjj@google.com>
Co-authored-by: Yash Katariya <yashkatariya@google.com>
PiperOrigin-RevId: 472999907

											
										
										
											2022-09-08 08:49:12 -07:00
+								  if dispatch.is_single_device_sharding(x.sharding):
-												Make `astype` work for `Array` that are sharded. The current behavior is the same as SDA i.e. it round trips via host.

PiperOrigin-RevId: 457797458

											
										
										
											2022-06-28 12:48:39 -07:00
+								    x = dispatch._copy_device_array_to_device(pxla._set_aval(x._arrays[0]), device)
 								    return (x,)
 								  else:
 								    # Round trip via host if x is sharded. SDA also does a round trip via host.
 								    return dispatch._device_put_array(x._value, device)
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
+								dispatch.device_put_handlers[Array] = _device_put_array
-												Make eager pmap tests pass with `Array`. Also add a slow path for Array in `pmap` similar to what SDA has. This is required for eager pmap. Adding a slow path removes the need for doing sharding checks in api.py because SDA doesn't do those checks and if the sharding does not match with pmap sharding, then it just defaults to the slow path (exactly like SDA).

PiperOrigin-RevId: 468843310

											
										
										
											2022-08-19 21:36:43 -07:00
+								def _array_pmap_shard_arg(x, devices, indices, mode):
-												Fix Forward. The fix is on the user's end. Original PR: https://github.com/google/jax/pull/12217

Co-authored-by: Matthew Johnson <mattjj@google.com>
Co-authored-by: Yash Katariya <yashkatariya@google.com>
PiperOrigin-RevId: 472999907

											
										
										
											2022-09-08 08:49:12 -07:00
+								  if dispatch.is_single_device_sharding(x.sharding):
-												Make eager pmap tests pass with `Array`. Also add a slow path for Array in `pmap` similar to what SDA has. This is required for eager pmap. Adding a slow path removes the need for doing sharding checks in api.py because SDA doesn't do those checks and if the sharding does not match with pmap sharding, then it just defaults to the slow path (exactly like SDA).

PiperOrigin-RevId: 468843310

											
										
										
											2022-08-19 21:36:43 -07:00
+								    return pxla._shard_device_array(x, devices, indices, mode)
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								  if x._fast_path_args is None:
 								    x_indices = tuple(x.sharding.devices_indices_map(x.shape).values())
 								  else:
 								    x_indices = tuple(x._fast_path_args.devices_indices_map.values())
-												Make eager pmap tests pass with `Array`. Also add a slow path for Array in `pmap` similar to what SDA has. This is required for eager pmap. Adding a slow path removes the need for doing sharding checks in api.py because SDA doesn't do those checks and if the sharding does not match with pmap sharding, then it just defaults to the slow path (exactly like SDA).

PiperOrigin-RevId: 468843310

											
										
										
											2022-08-19 21:36:43 -07:00
+								  # If the sharding of Array does not match pmap's sharding then take the slow
 								  # path which is similar to what SDA does. This slow path reroute only happens
 								  # for `pmap`.
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								  if indices == x_indices:
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								    return [buf if buf.device() == d else buf.copy_to_device(d)
 								            for buf, d in safe_zip(x._arrays, devices)]
-												Make eager pmap tests pass with `Array`. Also add a slow path for Array in `pmap` similar to what SDA has. This is required for eager pmap. Adding a slow path removes the need for doing sharding checks in api.py because SDA doesn't do those checks and if the sharding does not match with pmap sharding, then it just defaults to the slow path (exactly like SDA).

PiperOrigin-RevId: 468843310

											
										
										
											2022-08-19 21:36:43 -07:00
+								  else:
 								    return pxla._shard_sharded_device_array_slow_path(x, devices, indices, mode)
 								def _array_shard_arg(x, devices, indices, mode):
 								  if mode == pxla.InputsHandlerMode.pmap:
 								    return _array_pmap_shard_arg(x, devices, indices, mode)
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								  else:
-												Fix Forward. The fix is on the user's end. Original PR: https://github.com/google/jax/pull/12217

Co-authored-by: Matthew Johnson <mattjj@google.com>
Co-authored-by: Yash Katariya <yashkatariya@google.com>
PiperOrigin-RevId: 472999907

											
										
										
											2022-09-08 08:49:12 -07:00
+								    if dispatch.is_single_device_sharding(x.sharding):
-												Make jit == pjit. This means that the lowering and execution paths of jit and pjit are merged.

A fallback to `lower_xla_callable` is taken when pmap appears in the jaxpr during the jit lowering path.

Added support for `keep_unused`, `committed` and `core.Token` to pxla.py.

PiperOrigin-RevId: 470896270

											
										
										
											2022-08-29 22:02:32 -07:00
+								      return [buf if buf.device() == d else buf.copy_to_device(d)
 								              for buf, d in safe_zip(x._arrays, devices)]
-												Remove the fallback to `lower_xla_callable` that exist for `jit(pmap)` cases when `Array` was enabled and add minimal support to `lower_sharding_computation`.

The `jit(pmap)` codepath is added to `lower_sharding_computation` to delete the `lower_xla_callable` codepath when `jax.Array` is enabled by default. This will help in cleaning up the codebase and get rid of tech debt.

* Round trip through host for `Array`'s that have PmapSharding and come through the `jit` path (exactly like SDAs).

* For other cases i.e. when `num_replicas > 1`, default to the `_execute_replicated` path in dispatch.py from `lower_sharding_computation`. This is exactly same to what happens in `lower_xla_callable`.

PiperOrigin-RevId: 471033420

											
										
										
											2022-08-30 10:45:29 -07:00
+								    # If PmapSharding exists, then do a round trip via host. This will happen
 								    # if the input Array containing PmapSharding takes the jit path
 								    # i.e. `apply_primitive` or `xla_callable_uncached`. `jit(pmap)` is the most
 								    # common case where this will happen.
 								    elif isinstance(x.sharding, PmapSharding):
 								      return pxla.device_put(x._value, devices, replicate=True)
-												Make jit == pjit. This means that the lowering and execution paths of jit and pjit are merged.

A fallback to `lower_xla_callable` is taken when pmap appears in the jaxpr during the jit lowering path.

Added support for `keep_unused`, `committed` and `core.Token` to pxla.py.

PiperOrigin-RevId: 470896270

											
										
										
											2022-08-29 22:02:32 -07:00
+								    else:
 								      return x._arrays
-												Add pjit support for `Array`. `Array` takes the same codepath as GDA so there are very little modifications to pjit. Add handlers aval, shard_args and result handlers for Array.

PiperOrigin-RevId: 454160854

											
										
										
											2022-06-10 07:31:43 -07:00
+								pxla.shard_arg_handlers[Array] = _array_shard_arg
-												Add support to handle arbitrary shardings to KeyArray. Resolve all the TODOs that were created before.

Co-authored-by: Roy Frostig <frostig@google.com>
PiperOrigin-RevId: 471443690

											
										
										
											2022-08-31 22:53:32 -07:00
+								def _array_global_result_handler(global_aval, out_sharding, committed,
 								                                 is_out_sharding_from_xla):
-												Make jit == pjit. This means that the lowering and execution paths of jit and pjit are merged.

A fallback to `lower_xla_callable` is taken when pmap appears in the jaxpr during the jit lowering path.

Added support for `keep_unused`, `committed` and `core.Token` to pxla.py.

PiperOrigin-RevId: 470896270

											
										
										
											2022-08-29 22:02:32 -07:00
+								  if global_aval.dtype == dtypes.float0:
 								    return lambda _: np.zeros(global_aval.shape, dtypes.float0)  # type: ignore
-												internal rename: swap mentions of "custom eltypes" for "opaque dtypes"

Also, avoid direct set membership tests on `core.opaque_dtypes`. Update
callers to use `core.{is,has}_opaque_dtype` predicates instead.

											
										
										
											2022-08-30 14:47:15 -07:00
+								  if core.is_opaque_dtype(global_aval.dtype):
-												access rules through a hidden attribute of opaque dtype

											
										
										
											2022-08-30 13:25:49 -07:00
+								    return global_aval.dtype._rules.global_sharded_result_handler(
-												Add support to handle arbitrary shardings to KeyArray. Resolve all the TODOs that were created before.

Co-authored-by: Roy Frostig <frostig@google.com>
PiperOrigin-RevId: 471443690

											
										
										
											2022-08-31 22:53:32 -07:00
+								        global_aval, out_sharding, committed, is_out_sharding_from_xla)
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
 								  # Calculate the indices and addressable device assignment once during
 								  # compilation and pass it to the constructor.
 								  _array_fast_path_args = _ArrayFastPathArgs(
 								      out_sharding.devices_indices_map(global_aval.shape),
 								      out_sharding._addressable_device_assignment)
-												Make jit == pjit. This means that the lowering and execution paths of jit and pjit are merged.

A fallback to `lower_xla_callable` is taken when pmap appears in the jaxpr during the jit lowering path.

Added support for `keep_unused`, `committed` and `core.Token` to pxla.py.

PiperOrigin-RevId: 470896270

											
										
										
											2022-08-29 22:02:32 -07:00
+								  return lambda bufs: Array(global_aval, out_sharding, bufs,
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								                            committed=committed, _skip_checks=True,
 								                            _fast_path_args=_array_fast_path_args)
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								pxla.global_result_handlers[(core.ShapedArray, pxla.OutputType.Array)] = _array_global_result_handler
 								pxla.global_result_handlers[(core.ConcreteArray, pxla.OutputType.Array)] = _array_global_result_handler
-												Make jit == pjit. This means that the lowering and execution paths of jit and pjit are merged.

A fallback to `lower_xla_callable` is taken when pmap appears in the jaxpr during the jit lowering path.

Added support for `keep_unused`, `committed` and `core.Token` to pxla.py.

PiperOrigin-RevId: 470896270

											
										
										
											2022-08-29 22:02:32 -07:00
+								pxla.global_result_handlers[(core.AbstractToken, pxla.OutputType.Array)] = lambda *_: lambda *_: core.token
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								# Only used for Arrays that come out of pmap.
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								def _array_local_result_handler(aval, sharding, indices):
-												internal rename: swap mentions of "custom eltypes" for "opaque dtypes"

Also, avoid direct set membership tests on `core.opaque_dtypes`. Update
callers to use `core.{is,has}_opaque_dtype` predicates instead.

											
										
										
											2022-08-30 14:47:15 -07:00
+								  if core.is_opaque_dtype(aval.dtype):
-												access rules through a hidden attribute of opaque dtype

											
										
										
											2022-08-30 13:25:49 -07:00
+								    return aval.dtype._rules.local_sharded_result_handler(
 								        aval, sharding, indices)
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
 								  # Calculate the indices and addressable device assignment once during
 								  # compilation and pass it to the constructor.
 								  _array_fast_path_args = _ArrayFastPathArgs(
 								      sharding.devices_indices_map(aval.shape),
 								      sharding._addressable_device_assignment)
 								  return lambda bufs: Array(aval, sharding, bufs, committed=True,
 								                            _skip_checks=True, _fast_path_args=_array_fast_path_args)
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								pxla.local_result_handlers[(core.ShapedArray, pxla.OutputType.Array)] = _array_local_result_handler
 								pxla.local_result_handlers[(core.ConcreteArray, pxla.OutputType.Array)] = _array_local_result_handler