rocm_jax/jax/_src/array.py

# Copyright 2021 The JAX Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import operator as op
import numpy as np
from typing import Sequence, Tuple, Callable, Union, Optional, cast, List

from jax import core
from jax._src import abstract_arrays
from jax._src import ad_util
from jax._src import api_util
from jax._src import basearray
from jax._src import dispatch
from jax._src import dtypes
from jax._src.lax import lax as lax_internal
from jax._src.config import config
from jax._src.util import prod, safe_zip
from jax._src.lib import xla_client as xc
from jax._src.api import device_put
from jax._src.typing import ArrayLike
from jax.interpreters import pxla, xla, mlir
from jax._src.sharding import (
    Sharding, SingleDeviceSharding, XLACompatibleSharding, PmapSharding,
    device_replica_id_map)

Shape = Tuple[int, ...]
Device = xc.Device
DeviceArray = xc.Buffer
Index = Tuple[slice, ...]


class Shard:
  """A single data shard of an Array.

  Attributes:
    device : Which device this shard resides on.
    index : The index into the global array of this shard.
    replica_id : Integer id indicating which replica of the global array this
      shard is part of. Always 0 for fully sharded data
      (i.e. when there’s only 1 replica).
    data : The data of this shard. None if ``device`` is non-local.
  """

  def __init__(self, device: Device, sharding: Sharding, global_shape: Shape,
               data: Optional[ArrayImpl] = None):
    self.device = device
    self._sharding = sharding
    self._global_shape = global_shape
    self.data = data

  def __repr__(self):
    try:
      return (f'Shard(device={repr(self.device)}, index={self.index}, '
              f'replica_id={self.replica_id}, data={self.data})')
    except ValueError:
      return f'Shard(device={repr(self.device)}, data={self.data})'

  @property
  def index(self) -> Index:
    try:
      device_indices_map_fn = self._sharding.devices_indices_map
    except AttributeError:
      raise ValueError('Cannot calculate indices from sharding: '
                       f'{self._sharding}. Please create a device to index '
                       'mapping for your sharding.') from None
    index = device_indices_map_fn(self._global_shape)[self.device]
    assert index is not None
    return index

  @property
  def replica_id(self) -> int:
    return device_replica_id_map(self._sharding, self._global_shape)[self.device]


def _reconstruct_array(fun, args, arr_state, aval_state):
  """Method to reconstruct a device array from a serialized state."""
  np_value = fun(*args)
  np_value.__setstate__(arr_state)
  jnp_value = device_put(np_value)
  jnp_value.aval = jnp_value.aval.update(**aval_state)
  return jnp_value


def _single_device_array_from_buf(buf, committed):
  db = pxla._set_aval(buf)
  return ArrayImpl(db.aval, SingleDeviceSharding(db.device()), [db],
                   committed=committed, _skip_checks=True)


@pxla.use_cpp_class(xc.ArrayImpl if xc._version >= 97 else None)
class ArrayImpl(basearray.Array):
  # TODO(yashkatariya): Add __slots__ here.

  aval: core.ShapedArray
  _sharding: Sharding
  _arrays: List[DeviceArray]
  _committed: bool
  _skip_checks: bool
  _npy_value: Optional[np.ndarray]

  @pxla.use_cpp_method
  def __init__(self, aval: core.ShapedArray, sharding: Sharding,
               arrays: Union[Sequence[DeviceArray], Sequence[ArrayImpl]],
               committed: bool, _skip_checks: bool = False):
    # NOTE: the actual implementation of the constructor is moved to C++.

    self.aval = aval
    self._sharding = sharding
    # Extract DeviceArrays from arrays with `SingleDeviceSharding` to keep the
    # code handling `self._arrays` simpler.
    # TODO(yashkatariya): This will be slower as it will happen during
    # `__init__` on single controller environment. Make it lazy.
    self._arrays = [a if isinstance(a, DeviceArray) else a._arrays[0] for a in arrays]
    # See https://jax.readthedocs.io/en/latest/faq.html#controlling-data-and-computation-placement-on-devices
    # for what committed means.
    self._committed = committed
    self._npy_value = None

    # Don't rearrange if skip_checks is enabled because this assumes that the
    # input buffers are already arranged properly. This usually happens when
    # Array's are created as output of a JAX transformation
    # (like pjit, xmap, etc).
    if not _skip_checks or config.jax_enable_checks:
      self._check_and_rearrange()

  def _check_and_rearrange(self):
    for db in self._arrays:
      if db.dtype != self.dtype:
        raise ValueError(
            "Input buffers to `Array` must have matching dtypes. "
            f"Got {db.dtype}, expected {self.dtype} for buffer: {db}")

    device_id_to_buffer = {db.device().id: db for db in self._arrays}

    addressable_dev = self.sharding.addressable_devices
    if len(self._arrays) != len(addressable_dev):
      raise ValueError(
          f"Expected {len(addressable_dev)} per-device arrays "
          "(this is how many devices are addressable by the sharding), but "
          f"got {len(self._arrays)}")

    array_device_ids = set(device_id_to_buffer.keys())
    addressable_device_ids = set(d.id for d in addressable_dev)
    # Calculate a symmetric difference because the device ids between sharding
    # and _arrays should match.
    diff = set(array_device_ids) ^ set(addressable_device_ids)
    if diff:
      dev_in_sharding_not_in_arrays = set(addressable_device_ids) - set(array_device_ids)
      dev_in_arrays_not_in_sharding = set(array_device_ids) - set(addressable_device_ids)
      err_msg = (
          "Addressable devices and per-device arrays devices do not match.")
      if dev_in_sharding_not_in_arrays:
        err_msg += (f" Sharding contains devices {dev_in_sharding_not_in_arrays} "
                    "that are not present in per-device arrays.")
      if dev_in_arrays_not_in_sharding:
        err_msg += (f" Per-device arrays contain devices {dev_in_arrays_not_in_sharding} "
                    "that are not present in the sharding.")
      raise ValueError(err_msg)

    ss = self.sharding.shard_shape(self.shape)
    for db in self._arrays:
      if db.shape != ss:
        raise ValueError(
            f"Expected shard shape {ss} doesn't match the buffer "
            f"shape {db.shape} for buffer: {db}")

    # Rearrange arrays based on the device assignment.
    if isinstance(self.sharding, XLACompatibleSharding):
      addressable_da = self.sharding._addressable_device_assignment
      self._arrays = [device_id_to_buffer[device.id] for device in addressable_da]

  @property
  def shape(self) -> Shape:
    return self.aval.shape

  @property
  def dtype(self):
    return self.aval.dtype

  @property
  def ndim(self):
    return len(self.shape)

  @property
  def size(self):
    return prod(self.shape)

  @property
  def sharding(self):
    return self._sharding

  def __str__(self):
    return str(self._value)

  def __len__(self):
    try:
      return self.shape[0]
    except IndexError as err:
      raise TypeError("len() of unsized object") from err  # same as numpy error

  def __bool__(self):
    return bool(self._value)

  def __nonzero__(self):
    return bool(self._value)

  def __float__(self):
    return self._value.__float__()

  def __int__(self):
    return self._value.__int__()

  def __complex__(self):
    return self._value.__complex__()

  def __hex__(self):
    assert self.ndim == 0, 'hex only works on scalar values'
    return hex(self._value)  # type: ignore

  def __oct__(self):
    assert self.ndim == 0, 'oct only works on scalar values'
    return oct(self._value)  # type: ignore

  def __index__(self):
    return op.index(self._value)

  def tobytes(self, order="C"):
    return self._value.tobytes(order)

  def tolist(self):
    return self._value.tolist()

  def __format__(self, format_spec):
    # Simulates behavior of https://github.com/numpy/numpy/pull/9883
    if self.ndim == 0:
      return format(self._value[()], format_spec)
    else:
      return format(self._value, format_spec)

  def __getitem__(self, idx):
    from jax._src.numpy import lax_numpy
    self._check_if_deleted()

    if dispatch.is_single_device_sharding(self.sharding):
      return lax_numpy._rewriting_take(self, idx)
    # TODO(yashkatariya): Make it work for other Shardings too wherever its
    # possible to not do data movement.
    elif isinstance(self.sharding, PmapSharding):
      if not isinstance(idx, tuple):
        cidx = (idx,) + (slice(None),) * (len(self.shape) - 1)
      else:
        cidx = idx + (slice(None),) * (len(self.shape) - len(idx))
      if self._npy_value is None:
        indices = tuple(self.sharding.devices_indices_map(self.shape).values())
        try:
          buf_idx = indices.index(cidx)
        except ValueError:
          buf_idx = None
        if buf_idx is not None:
          buf = self._arrays[buf_idx]
          aval = core.ShapedArray(buf.shape, self.dtype)
          return ArrayImpl(aval, SingleDeviceSharding(buf.device()), [buf],
                           committed=False, _skip_checks=True)
      return lax_numpy._rewriting_take(self, idx)
    else:
      # TODO(yashkatariya): Don't bounce to host and use `_rewriting_take` or
      # the fast path (see PmapSharding branch above) after after uneven
      # partitioning support is added
      return device_put(self._value[idx])

  def __iter__(self):
    if self.ndim == 0:
      raise TypeError("iteration over a 0-d array")  # same as numpy error
    else:
      assert self.is_fully_replicated or self.is_fully_addressable
      if dispatch.is_single_device_sharding(self.sharding):
        return (sl for chunk in self._chunk_iter(100) for sl in chunk._unstack())  # type: ignore
      elif isinstance(self.sharding, PmapSharding):
        return (self[i] for i in range(self.shape[0]))  # type: ignore
      else:
        # TODO(yashkatariya): Don't bounce to host and use `_chunk_iter` path
        # here after uneven partitioning support is added.
        return (device_put(self._value[i]) for i in range(self.shape[0]))

  def item(self):
    if dtypes.issubdtype(self.dtype, np.complexfloating):
      return complex(self)
    elif dtypes.issubdtype(self.dtype, np.floating):
      return float(self)
    elif dtypes.issubdtype(self.dtype, np.integer):
      return int(self)
    elif dtypes.issubdtype(self.dtype, np.bool_):
      return bool(self)
    else:
      raise TypeError(self.dtype)

  @property
  def is_fully_replicated(self) -> bool:
    return self.shape == self._arrays[0].shape

  def __repr__(self):
    prefix = 'Array('
    if self.aval is not None and self.aval.weak_type:
      dtype_str = f'dtype={self.dtype.name}, weak_type=True)'
    else:
      dtype_str = f'dtype={self.dtype.name})'

    if self.is_fully_addressable or self.is_fully_replicated:
      line_width = np.get_printoptions()["linewidth"]
      s = np.array2string(self._value, prefix=prefix, suffix=',',
                          separator=', ', max_line_width=line_width)
      last_line_len = len(s) - s.rfind('\n') + 1
      sep = ' '
      if last_line_len + len(dtype_str) + 1 > line_width:
        sep = ' ' * len(prefix)
      return f"{prefix}{s},{sep}{dtype_str}"
    else:
      return f"{prefix}{self.shape}, {dtype_str}"

  @pxla.maybe_cached_property
  def is_fully_addressable(self) -> bool:
    return self.sharding.is_fully_addressable

  def __array__(self, dtype=None, context=None):
    return np.asarray(self._value, dtype=dtype)

  def __dlpack__(self):
    from jax.dlpack import to_dlpack  # pylint: disable=g-import-not-at-top
    return to_dlpack(self)

  def __reduce__(self):
    fun, args, arr_state = self._value.__reduce__()  # type: ignore
    aval_state = {'weak_type': self.aval.weak_type,
                  'named_shape': self.aval.named_shape}
    return (_reconstruct_array, (fun, args, arr_state, aval_state))

  def unsafe_buffer_pointer(self):
    assert len(self._arrays) == 1
    return self._arrays[0].unsafe_buffer_pointer()

  @property
  def __cuda_array_interface__(self):
    assert len(self._arrays) == 1
    return self._arrays[0].__cuda_array_interface__  # pytype: disable=attribute-error  # bind-properties

  # TODO(yashkatariya): Remove this method when everyone is using devices().
  def device(self) -> Device:
    self._check_if_deleted()
    device_set = self.sharding.device_set
    if len(device_set) == 1:
      single_device, = device_set
      return single_device
    raise ValueError('Length of devices is greater than 1. '
                     'Please use `.devices()`.')

  def devices(self) -> List[Device]:
    self._check_if_deleted()
    return list(self.sharding.device_set)

  # TODO(https://github.com/google/jax/issues/12380): Remove this when DA is
  # deleted.
  @property
  def device_buffer(self) -> ArrayImpl:
    self._check_if_deleted()
    if len(self._arrays) == 1:
      return _single_device_array_from_buf(self._arrays[0], self._committed)
    raise ValueError('Length of buffers is greater than 1. Please use '
                     '`.device_buffers` instead.')

  # TODO(https://github.com/google/jax/issues/12380): Remove this when SDA is
  # deleted.
  @property
  def device_buffers(self) -> Sequence[ArrayImpl]:
    self._check_if_deleted()
    return [_single_device_array_from_buf(a, self._committed)
            for a in self._arrays]

  def addressable_data(self, index: int) -> ArrayImpl:
    self._check_if_deleted()
    return _single_device_array_from_buf(self._arrays[index], self._committed)

  @pxla.maybe_cached_property
  def addressable_shards(self) -> Sequence[Shard]:
    self._check_if_deleted()
    out = []
    for db in self._arrays:
      # Wrap the device arrays in `Array` until C++ returns an Array instead
      # of a DA.
      array = _single_device_array_from_buf(db, self._committed)
      out.append(Shard(db.device(), self.sharding, self.shape, array))
    return out

  def delete(self):
    if self._arrays is None:
      return
    for buf in self._arrays:
      buf.delete()
    self._arrays = None
    self._npy_value = None

  def is_deleted(self):
    if self._arrays is None:
      return True
    # This path is taken when a view of `Array` is created and the original
    # Array is deleted. In that case, the buffers the view represents also get
    # deleted.
    return any(buf.is_deleted() for buf in self._arrays)

  def _check_if_deleted(self):
    if self._arrays is None:
      raise RuntimeError("Array has been deleted.")

  @pxla.use_cpp_method
  def block_until_ready(self):
    self._check_if_deleted()
    for db in self._arrays:
      db.block_until_ready()
    return self

  def copy_to_host_async(self):
    self._check_if_deleted()
    if self._npy_value is None:
      try:
        self.addressable_shards[0].replica_id
        replica_id_exists = True
      except ValueError:
        replica_id_exists = False

      for s in self.addressable_shards:
        if not replica_id_exists or s.replica_id == 0:
          s.data._arrays[0].copy_to_host_async()  # pytype: disable=attribute-error

  @property
  def _value(self) -> np.ndarray:
    self._check_if_deleted()

    if self._npy_value is None:
      if self.is_fully_replicated:
        self._npy_value = np.asarray(self._arrays[0])  # type: ignore
        self._npy_value.flags.writeable = False
        return cast(np.ndarray, self._npy_value)

      if not self.is_fully_addressable:
        raise RuntimeError("Fetching value for `jax.Array` that spans "
                           "non-addressable devices is not possible. You can use "
                           "`jax.experimental.multihost_utils.process_allgather` "
                           "for this use case.")

      self.copy_to_host_async()
      npy_value = np.empty(self.shape, self.dtype)

      try:
        self.addressable_shards[0].replica_id
        replica_id_exists = True
      except ValueError:
        replica_id_exists = False

      for s in self.addressable_shards:
        if not replica_id_exists or s.replica_id == 0:
          npy_value[s.index] = np.asarray(s.data._arrays[0])  # type: ignore  # [union-attr]
      self._npy_value = npy_value  # type: ignore
      self._npy_value.flags.writeable = False
    # https://docs.python.org/3/library/typing.html#typing.cast
    return cast(np.ndarray, self._npy_value)

# explicitly set to be unhashable. Same as what device_array.py does.
setattr(ArrayImpl, "__hash__", None)
setattr(ArrayImpl, "__array_priority__", 100)

def make_array_from_callback(
    shape: Shape, sharding: Sharding,
    data_callback: Callable[[Optional[Index]], ArrayLike]) -> ArrayImpl:
  device_to_index_map = sharding.devices_indices_map(shape)
  # Use addressable_devices here instead of `_addressable_device_assignment`
  # because `_addressable_device_assignment` is only available on
  # `XLACompatibleSharding` and this function is supposed to work for every
  # `Sharding`.
  arrays = [
      device_put(data_callback(device_to_index_map[device]), device)
      for device in sharding.addressable_devices
  ]
  aval = core.ShapedArray(shape, arrays[0].dtype, weak_type=False)
  return ArrayImpl(aval, sharding, arrays, committed=True)


def make_array_from_single_device_arrays(
    shape: Shape, sharding: Sharding, arrays: Sequence[ArrayImpl]) -> ArrayImpl:
  # All input arrays should be committed. Checking it is expensive on
  # single-controller systems.
  aval = core.ShapedArray(shape, arrays[0].dtype, weak_type=False)
  return ArrayImpl(aval, sharding, arrays, committed=True)


core.pytype_aval_mappings[ArrayImpl] = abstract_arrays.canonical_concrete_aval
xla.pytype_aval_mappings[ArrayImpl] = op.attrgetter('aval')
xla.canonicalize_dtype_handlers[ArrayImpl] = pxla.identity
api_util._shaped_abstractify_handlers[ArrayImpl] = op.attrgetter('aval')
ad_util.jaxval_adders[ArrayImpl] = lax_internal.add
ad_util.jaxval_zeros_likers[ArrayImpl] = lax_internal.zeros_like_array
if xc._version >= 96:
  # TODO(jakevdp) replace this with true inheritance at the C++ level.
  basearray.Array.register(ArrayImpl)


def _array_mlir_constant_handler(val, canonicalize_types=True):
  return mlir.ir_constants(val._value,
                           canonicalize_types=canonicalize_types)
mlir.register_constant_handler(ArrayImpl, _array_mlir_constant_handler)


def _device_put_array(x, device: Optional[Device]):
  if dispatch.is_single_device_sharding(x.sharding):
    x = dispatch._copy_device_array_to_device(pxla._set_aval(x._arrays[0]), device)
    return (x,)
  else:
    # Round trip via host if x is sharded. SDA also does a round trip via host.
    return dispatch._device_put_array(x._value, device)

dispatch.device_put_handlers[ArrayImpl] = _device_put_array


def _array_pmap_shard_arg(x, devices, indices, mode):
  if dispatch.is_single_device_sharding(x.sharding):
    return pxla._shard_device_array(x, devices, indices, mode)

  # If the sharding of Array does not match pmap's sharding then take the slow
  # path which is similar to what SDA does. This slow path reroute only happens
  # for `pmap`.
  x_indices = tuple(x.sharding.addressable_devices_indices_map(x.shape).values())
  if indices == x_indices:
    return [buf if buf.device() == d else buf.copy_to_device(d)
            for buf, d in safe_zip(x._arrays, devices)]
  else:
    return pxla._shard_sharded_device_array_slow_path(x, devices, indices, mode)


def _array_rest_shard_arg(x: ArrayImpl, devices, indices, mode):
  x_indices = x.sharding.addressable_devices_indices_map(x.shape).values()
  if not x.is_fully_addressable:
    if tuple(x_indices) == tuple(indices):
      return x._arrays
    else:
      return NotImplementedError("Cannot reshard an input that is not fully "
                                 "addressable")
  else:
    if tuple(x_indices) == tuple(indices):
      return [buf if buf.device() == d else buf.copy_to_device(d)
              for buf, d in safe_zip(x._arrays, devices)]
    # Resharding starts here:
    if isinstance(x.sharding, PmapSharding):
      return pxla.device_put(x._value, devices, replicate=True)
    if dispatch.is_single_device_sharding(x.sharding):
      return pxla._shard_device_array(x, devices, indices, mode)
    else:
      return pxla._shard_sharded_device_array_slow_path(x, devices, indices, mode)


def _array_shard_arg(x, devices, indices, mode):
  if mode == pxla.InputsHandlerMode.pmap:
    return _array_pmap_shard_arg(x, devices, indices, mode)
  else:
    return _array_rest_shard_arg(x, devices, indices, mode)
pxla.shard_arg_handlers[ArrayImpl] = _array_shard_arg


def _array_global_result_handler(global_aval, out_sharding, committed,
                                 is_out_sharding_from_xla):
  if global_aval.dtype == dtypes.float0:
    return lambda _: np.zeros(global_aval.shape, dtypes.float0)  # type: ignore
  if core.is_opaque_dtype(global_aval.dtype):
    return global_aval.dtype._rules.global_sharded_result_handler(
        global_aval, out_sharding, committed, is_out_sharding_from_xla)
  return lambda bufs: ArrayImpl(global_aval, out_sharding, bufs,
                                committed=committed, _skip_checks=True)
pxla.global_result_handlers[(core.ShapedArray, pxla.OutputType.Array)] = _array_global_result_handler
pxla.global_result_handlers[(core.ConcreteArray, pxla.OutputType.Array)] = _array_global_result_handler
pxla.global_result_handlers[(core.AbstractToken, pxla.OutputType.Array)] = lambda *_: lambda *_: core.token


# Only used for Arrays that come out of pmap.
def _array_local_result_handler(aval, sharding, indices):
  if core.is_opaque_dtype(aval.dtype):
    return aval.dtype._rules.local_sharded_result_handler(
        aval, sharding, indices)
  return lambda bufs: ArrayImpl(aval, sharding, bufs, committed=True,
                                _skip_checks=True)
pxla.local_result_handlers[(core.ShapedArray, pxla.OutputType.Array)] = _array_local_result_handler
pxla.local_result_handlers[(core.ConcreteArray, pxla.OutputType.Array)] = _array_local_result_handler
-												Change JAX's copyright attribution from "Google LLC" to "The JAX Authors.".

See https://opensource.google/documentation/reference/releasing/contributions#copyright for more details.

PiperOrigin-RevId: 476167538

											
										
										
											2022-09-22 12:26:48 -07:00
+								# Copyright 2021 The JAX Authors.
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								#
 								# Licensed under the Apache License, Version 2.0 (the "License");
 								# you may not use this file except in compliance with the License.
 								# You may obtain a copy of the License at
 								#
 								#     https://www.apache.org/licenses/LICENSE-2.0
 								#
 								# Unless required by applicable law or agreed to in writing, software
 								# distributed under the License is distributed on an "AS IS" BASIS,
 								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								# See the License for the specific language governing permissions and
 								# limitations under the License.
 								from __future__ import annotations
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								import operator as op
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								import numpy as np
-												Only copy_to_device if the indices match. Otherwise reshard the array if its uncommitted. This is important where you have 1 process per device.

PiperOrigin-RevId: 475418561

											
										
										
											2022-09-19 16:58:46 -07:00
+								from typing import Sequence, Tuple, Callable, Union, Optional, cast, List
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add pjit support for `Array`. `Array` takes the same codepath as GDA so there are very little modifications to pjit. Add handlers aval, shard_args and result handlers for Array.

PiperOrigin-RevId: 454160854

											
										
										
											2022-06-10 07:31:43 -07:00
+								from jax import core
-												Array bug: make pytype_aval_mapping return a ConcreteShapedArray

											
										
										
											2022-08-18 12:31:30 -07:00
+								from jax._src import abstract_arrays
-												Make all pmap tests pass with Array! I am skipping all soft pmap tests for now.

PiperOrigin-RevId: 467264992

											
										
										
											2022-08-12 12:09:22 -07:00
+								from jax._src import ad_util
-												add an optional fastpath for api_util.shaped_abstractify

also add a benchmark for it, 8.7ms -> 0.2ms on my machine

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2022-07-27 10:54:54 -07:00
+								from jax._src import api_util
-												Add initial jax.Array base class for instance checks & annotation

											
										
										
											2022-09-23 09:59:46 -07:00
+								from jax._src import basearray
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
+								from jax._src import dispatch
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
+								from jax._src import dtypes
-												Make all pmap tests pass with Array! I am skipping all soft pmap tests for now.

PiperOrigin-RevId: 467264992

											
										
										
											2022-08-12 12:09:22 -07:00
+								from jax._src.lax import lax as lax_internal
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								from jax._src.config import config
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								from jax._src.util import prod, safe_zip
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								from jax._src.lib import xla_client as xc
 								from jax._src.api import device_put
-												Add initial jax.Array base class for instance checks & annotation

											
										
										
											2022-09-23 09:59:46 -07:00
+								from jax._src.typing import ArrayLike
-												Make all pmap tests pass with Array! I am skipping all soft pmap tests for now.

PiperOrigin-RevId: 467264992

											
										
										
											2022-08-12 12:09:22 -07:00
+								from jax.interpreters import pxla, xla, mlir
-												Move `array.py` and `sharding.py` from `experimental/` to `_src/`.

PiperOrigin-RevId: 477201711

											
										
										
											2022-09-27 10:06:10 -07:00
+								from jax._src.sharding import (
-												Remove the fallback to `lower_xla_callable` that exist for `jit(pmap)` cases when `Array` was enabled and add minimal support to `lower_sharding_computation`.

The `jit(pmap)` codepath is added to `lower_sharding_computation` to delete the `lower_xla_callable` codepath when `jax.Array` is enabled by default. This will help in cleaning up the codebase and get rid of tech debt.

* Round trip through host for `Array`'s that have PmapSharding and come through the `jit` path (exactly like SDAs).

* For other cases i.e. when `num_replicas > 1`, default to the `_execute_replicated` path in dispatch.py from `lower_sharding_computation`. This is exactly same to what happens in `lower_xla_callable`.

PiperOrigin-RevId: 471033420

											
										
										
											2022-08-30 10:45:29 -07:00
+								    Sharding, SingleDeviceSharding, XLACompatibleSharding, PmapSharding,
 								    device_replica_id_map)
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
 								Shape = Tuple[int, ...]
 								Device = xc.Device
 								DeviceArray = xc.Buffer
 								Index = Tuple[slice, ...]
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								class Shard:
 								  """A single data shard of an Array.
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								  Attributes:
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								    device : Which device this shard resides on.
 								    index : The index into the global array of this shard.
 								    replica_id : Integer id indicating which replica of the global array this
 								      shard is part of. Always 0 for fully sharded data
 								      (i.e. when there’s only 1 replica).
 								    data : The data of this shard. None if ``device`` is non-local.
 								  """
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
 								  def __init__(self, device: Device, sharding: Sharding, global_shape: Shape,
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								               data: Optional[ArrayImpl] = None):
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								    self.device = device
 								    self._sharding = sharding
 								    self._global_shape = global_shape
 								    self.data = data
-												Add `__repr__` to Shard since its not a dataclass anymore

PiperOrigin-RevId: 456463979

											
										
										
											2022-06-22 02:25:34 -07:00
+								  def __repr__(self):
 								    try:
 								      return (f'Shard(device={repr(self.device)}, index={self.index}, '
 								              f'replica_id={self.replica_id}, data={self.data})')
 								    except ValueError:
 								      return f'Shard(device={repr(self.device)}, data={self.data})'
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								  @property
 								  def index(self) -> Index:
-												Remove fast_path_args from Array and add `id` checks to Sharding's `__eq__` method as a fast shortcut.

Also the C++ pjit path should help optimize the dispatch path.

PiperOrigin-RevId: 475163903

											
										
										
											2022-09-18 15:35:18 -07:00
+								    try:
 								      device_indices_map_fn = self._sharding.devices_indices_map
 								    except AttributeError:
 								      raise ValueError('Cannot calculate indices from sharding: '
 								                       f'{self._sharding}. Please create a device to index '
 								                       'mapping for your sharding.') from None
 								    index = device_indices_map_fn(self._global_shape)[self.device]
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								    assert index is not None
 								    return index
 								  @property
 								  def replica_id(self) -> int:
-												Remove `device_replica_id_map` from the Sharding interface because the standalone function should be more than enough to use. The major use-case of this is for checkpointing and accessing addressable_shards which accesses the standalone function makes it work.

PiperOrigin-RevId: 470820443

											
										
										
											2022-08-29 14:49:17 -07:00
+								    return device_replica_id_map(self._sharding, self._global_shape)[self.device]
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								def _reconstruct_array(fun, args, arr_state, aval_state):
 								  """Method to reconstruct a device array from a serialized state."""
 								  np_value = fun(*args)
 								  np_value.__setstate__(arr_state)
 								  jnp_value = device_put(np_value)
 								  jnp_value.aval = jnp_value.aval.update(**aval_state)
 								  return jnp_value
-												Use C++ Array in pmap path and move PmapSharding to cpp

PiperOrigin-RevId: 474151089

											
										
										
											2022-09-13 16:18:31 -07:00
-												Return single device Arrays from `.device_buffer` and `.device_buffers`.

PiperOrigin-RevId: 476449591

											
										
										
											2022-09-23 13:29:47 -07:00
+								def _single_device_array_from_buf(buf, committed):
 								  db = pxla._set_aval(buf)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								  return ArrayImpl(db.aval, SingleDeviceSharding(db.device()), [db],
 								                   committed=committed, _skip_checks=True)
-												Return single device Arrays from `.device_buffer` and `.device_buffers`.

PiperOrigin-RevId: 476449591

											
										
										
											2022-09-23 13:29:47 -07:00
-												Improve the checks done in `Array` and apply them to all `Sharding`s rather than just `XLACompatibleSharding`.

Also check the symmetric difference of sharding and `_arrays` devices.

PiperOrigin-RevId: 478017409

											
										
										
											2022-09-30 09:55:25 -07:00
+								@pxla.use_cpp_class(xc.ArrayImpl if xc._version >= 97 else None)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								class ArrayImpl(basearray.Array):
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  # TODO(yashkatariya): Add __slots__ here.
-												[typing] add class-level declarations of Array members.

This fixes some pytype errors associated with the changes in #12421

											
										
										
											2022-09-21 12:51:32 -07:00
+								  aval: core.ShapedArray
 								  _sharding: Sharding
 								  _arrays: List[DeviceArray]
 								  _committed: bool
 								  _skip_checks: bool
 								  _npy_value: Optional[np.ndarray]
-												Use C++ Array in pmap path and move PmapSharding to cpp

PiperOrigin-RevId: 474151089

											
										
										
											2022-09-13 16:18:31 -07:00
+								  @pxla.use_cpp_method
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								  def __init__(self, aval: core.ShapedArray, sharding: Sharding,
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								               arrays: Union[Sequence[DeviceArray], Sequence[ArrayImpl]],
-												Remove fast_path_args from Array and add `id` checks to Sharding's `__eq__` method as a fast shortcut.

Also the C++ pjit path should help optimize the dispatch path.

PiperOrigin-RevId: 475163903

											
										
										
											2022-09-18 15:35:18 -07:00
+								               committed: bool, _skip_checks: bool = False):
-												Introduce class PyArray that contains the data members of python Array.

A few key methods is implemented in C++ while the rest are still implmemented in python and added to the class later. A class decorator, @use_cpp_array, is added to add python methods to xc.Array.

PiperOrigin-RevId: 473075244

											
										
										
											2022-09-08 13:47:57 -07:00
+								    # NOTE: the actual implementation of the constructor is moved to C++.
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    self.aval = aval
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								    self._sharding = sharding
-												Return `Array` from `jax.device_put` if `config.jax_array` is enabled.

PiperOrigin-RevId: 456531510

											
										
										
											2022-06-22 09:20:26 -07:00
+								    # Extract DeviceArrays from arrays with `SingleDeviceSharding` to keep the
 								    # code handling `self._arrays` simpler.
 								    # TODO(yashkatariya): This will be slower as it will happen during
 								    # `__init__` on single controller environment. Make it lazy.
-												[typing] add class-level declarations of Array members.

This fixes some pytype errors associated with the changes in #12421

											
										
										
											2022-09-21 12:51:32 -07:00
+								    self._arrays = [a if isinstance(a, DeviceArray) else a._arrays[0] for a in arrays]
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								    # See https://jax.readthedocs.io/en/latest/faq.html#controlling-data-and-computation-placement-on-devices
 								    # for what committed means.
 								    self._committed = committed
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    self._npy_value = None
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Enable fast path in the Array constructor. This means that the rearranging of `_arrays` according to the device_assignment won't happen when fastpath is enabled because we assume that jax transformations will return the right arrangement.

PiperOrigin-RevId: 469492283

											
										
										
											2022-08-23 10:19:59 -07:00
+								    # Don't rearrange if skip_checks is enabled because this assumes that the
 								    # input buffers are already arranged properly. This usually happens when
 								    # Array's are created as output of a JAX transformation
 								    # (like pjit, xmap, etc).
-												Improve the checks done in `Array` and apply them to all `Sharding`s rather than just `XLACompatibleSharding`.

Also check the symmetric difference of sharding and `_arrays` devices.

PiperOrigin-RevId: 478017409

											
										
										
											2022-09-30 09:55:25 -07:00
+								    if not _skip_checks or config.jax_enable_checks:
 								      self._check_and_rearrange()
 								  def _check_and_rearrange(self):
 								    for db in self._arrays:
 								      if db.dtype != self.dtype:
 								        raise ValueError(
 								            "Input buffers to `Array` must have matching dtypes. "
 								            f"Got {db.dtype}, expected {self.dtype} for buffer: {db}")
 								    device_id_to_buffer = {db.device().id: db for db in self._arrays}
 								    addressable_dev = self.sharding.addressable_devices
 								    if len(self._arrays) != len(addressable_dev):
 								      raise ValueError(
 								          f"Expected {len(addressable_dev)} per-device arrays "
 								          "(this is how many devices are addressable by the sharding), but "
 								          f"got {len(self._arrays)}")
 								    array_device_ids = set(device_id_to_buffer.keys())
 								    addressable_device_ids = set(d.id for d in addressable_dev)
 								    # Calculate a symmetric difference because the device ids between sharding
 								    # and _arrays should match.
 								    diff = set(array_device_ids) ^ set(addressable_device_ids)
 								    if diff:
 								      dev_in_sharding_not_in_arrays = set(addressable_device_ids) - set(array_device_ids)
 								      dev_in_arrays_not_in_sharding = set(array_device_ids) - set(addressable_device_ids)
 								      err_msg = (
 								          "Addressable devices and per-device arrays devices do not match.")
 								      if dev_in_sharding_not_in_arrays:
 								        err_msg += (f" Sharding contains devices {dev_in_sharding_not_in_arrays} "
 								                    "that are not present in per-device arrays.")
 								      if dev_in_arrays_not_in_sharding:
 								        err_msg += (f" Per-device arrays contain devices {dev_in_arrays_not_in_sharding} "
 								                    "that are not present in the sharding.")
 								      raise ValueError(err_msg)
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Introduce class PyArray that contains the data members of python Array.

A few key methods is implemented in C++ while the rest are still implmemented in python and added to the class later. A class decorator, @use_cpp_array, is added to add python methods to xc.Array.

PiperOrigin-RevId: 473075244

											
										
										
											2022-09-08 13:47:57 -07:00
+								    ss = self.sharding.shard_shape(self.shape)
 								    for db in self._arrays:
 								      if db.shape != ss:
 								        raise ValueError(
 								            f"Expected shard shape {ss} doesn't match the buffer "
 								            f"shape {db.shape} for buffer: {db}")
 								    # Rearrange arrays based on the device assignment.
 								    if isinstance(self.sharding, XLACompatibleSharding):
-												Remove fast_path_args from Array and add `id` checks to Sharding's `__eq__` method as a fast shortcut.

Also the C++ pjit path should help optimize the dispatch path.

PiperOrigin-RevId: 475163903

											
										
										
											2022-09-18 15:35:18 -07:00
+								      addressable_da = self.sharding._addressable_device_assignment
-												Improve the checks done in `Array` and apply them to all `Sharding`s rather than just `XLACompatibleSharding`.

Also check the symmetric difference of sharding and `_arrays` devices.

PiperOrigin-RevId: 478017409

											
										
										
											2022-09-30 09:55:25 -07:00
+								      self._arrays = [device_id_to_buffer[device.id] for device in addressable_da]
-												Introduce class PyArray that contains the data members of python Array.

A few key methods is implemented in C++ while the rest are still implmemented in python and added to the class later. A class decorator, @use_cpp_array, is added to add python methods to xc.Array.

PiperOrigin-RevId: 473075244

											
										
										
											2022-09-08 13:47:57 -07:00
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  @property
 								  def shape(self) -> Shape:
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    return self.aval.shape
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
+								  @property
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								  def dtype(self):
 								    return self.aval.dtype
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  @property
 								  def ndim(self):
 								    return len(self.shape)
 								  @property
 								  def size(self):
 								    return prod(self.shape)
 								  @property
 								  def sharding(self):
 								    return self._sharding
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
+								  def __str__(self):
 								    return str(self._value)
 								  def __len__(self):
 								    try:
 								      return self.shape[0]
 								    except IndexError as err:
 								      raise TypeError("len() of unsized object") from err  # same as numpy error
 								  def __bool__(self):
 								    return bool(self._value)
 								  def __nonzero__(self):
 								    return bool(self._value)
 								  def __float__(self):
 								    return self._value.__float__()
 								  def __int__(self):
 								    return self._value.__int__()
 								  def __complex__(self):
 								    return self._value.__complex__()
 								  def __hex__(self):
 								    assert self.ndim == 0, 'hex only works on scalar values'
 								    return hex(self._value)  # type: ignore
 								  def __oct__(self):
 								    assert self.ndim == 0, 'oct only works on scalar values'
 								    return oct(self._value)  # type: ignore
 								  def __index__(self):
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    return op.index(self._value)
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								  def tobytes(self, order="C"):
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
+								    return self._value.tobytes(order)
 								  def tolist(self):
 								    return self._value.tolist()
 								  def __format__(self, format_spec):
 								    # Simulates behavior of https://github.com/numpy/numpy/pull/9883
 								    if self.ndim == 0:
 								      return format(self._value[()], format_spec)
 								    else:
 								      return format(self._value, format_spec)
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
+								  def __getitem__(self, idx):
 								    from jax._src.numpy import lax_numpy
 								    self._check_if_deleted()
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								    if dispatch.is_single_device_sharding(self.sharding):
 								      return lax_numpy._rewriting_take(self, idx)
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
+								    # TODO(yashkatariya): Make it work for other Shardings too wherever its
 								    # possible to not do data movement.
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								    elif isinstance(self.sharding, PmapSharding):
 								      if not isinstance(idx, tuple):
 								        cidx = (idx,) + (slice(None),) * (len(self.shape) - 1)
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
+								      else:
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								        cidx = idx + (slice(None),) * (len(self.shape) - len(idx))
 								      if self._npy_value is None:
-												Remove fast_path_args from Array and add `id` checks to Sharding's `__eq__` method as a fast shortcut.

Also the C++ pjit path should help optimize the dispatch path.

PiperOrigin-RevId: 475163903

											
										
										
											2022-09-18 15:35:18 -07:00
+								        indices = tuple(self.sharding.devices_indices_map(self.shape).values())
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								        try:
 								          buf_idx = indices.index(cidx)
 								        except ValueError:
 								          buf_idx = None
 								        if buf_idx is not None:
 								          buf = self._arrays[buf_idx]
-												Return single device Arrays from `.device_buffer` and `.device_buffers`.

PiperOrigin-RevId: 476449591

											
										
										
											2022-09-23 13:29:47 -07:00
+								          aval = core.ShapedArray(buf.shape, self.dtype)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								          return ArrayImpl(aval, SingleDeviceSharding(buf.device()), [buf],
 								                           committed=False, _skip_checks=True)
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								      return lax_numpy._rewriting_take(self, idx)
 								    else:
 								      # TODO(yashkatariya): Don't bounce to host and use `_rewriting_take` or
-												Create `Array`s from `__getitem__` and `__iter__`. This is done by `device_put`ting from the host to default device which is suboptimal. But there is a TODO to fix this!

PiperOrigin-RevId: 478691051

											
										
										
											2022-10-03 22:28:26 -07:00
+								      # the fast path (see PmapSharding branch above) after after uneven
 								      # partitioning support is added
 								      return device_put(self._value[idx])
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
+								  def __iter__(self):
 								    if self.ndim == 0:
 								      raise TypeError("iteration over a 0-d array")  # same as numpy error
 								    else:
-												Make `is_fully_replicated` and `is_fully_addressble` a property rather than a method.

Why?

1. Because it's easy to cache a property than a method with only the `self` argument. (See below for article)

2. There's no harm in making them a property because both of them return a bool without any side-effects and are cached (so its fast). Why cache `is_fully_addressable`? Because its very expensive to calculate when you have 1000s of devices.

PiperOrigin-RevId: 479850850

											
										
										
											2022-10-08 19:23:32 -07:00
+								      assert self.is_fully_replicated or self.is_fully_addressable
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								      if dispatch.is_single_device_sharding(self.sharding):
 								        return (sl for chunk in self._chunk_iter(100) for sl in chunk._unstack())  # type: ignore
 								      elif isinstance(self.sharding, PmapSharding):
-												Make `__getitem__` work for PmapSharding just like SDA works. DA is already covered with the current implementation.

Added TODOs to take fast path for indices wherever it is possible to do that. If a correct index is passed during getitem and if that index exists on `Array`, then the fast path is taken (see the test in this CL).

PiperOrigin-RevId: 473342504

											
										
										
											2022-09-09 14:24:39 -07:00
+								        return (self[i] for i in range(self.shape[0]))  # type: ignore
-												Make `__iter__` of `Array` behave like DA when there is a SingleDeviceSharding and like SDA when there is a non-trivial sharding.

This is important because when `Array` contains more than 1 shard, each shard can be on a different device and those things need to be preserved when iterating over `Array`.

PiperOrigin-RevId: 471695841

											
										
										
											2022-09-01 19:53:58 -07:00
+								      else:
-												Bounce to host for any sharding that's not PmapSharding or a sharding with a single device for `__iter__` and `__getitem__`.

PiperOrigin-RevId: 473402857

											
										
										
											2022-09-09 20:41:12 -07:00
+								        # TODO(yashkatariya): Don't bounce to host and use `_chunk_iter` path
-												Create `Array`s from `__getitem__` and `__iter__`. This is done by `device_put`ting from the host to default device which is suboptimal. But there is a TODO to fix this!

PiperOrigin-RevId: 478691051

											
										
										
											2022-10-03 22:28:26 -07:00
+								        # here after uneven partitioning support is added.
 								        return (device_put(self._value[i]) for i in range(self.shape[0]))
-												Make checkify tests pass with Array and add methods on Array that are present on DA.

PiperOrigin-RevId: 468058909

											
										
										
											2022-08-16 16:51:26 -07:00
 								  def item(self):
 								    if dtypes.issubdtype(self.dtype, np.complexfloating):
 								      return complex(self)
 								    elif dtypes.issubdtype(self.dtype, np.floating):
 								      return float(self)
 								    elif dtypes.issubdtype(self.dtype, np.integer):
 								      return int(self)
 								    elif dtypes.issubdtype(self.dtype, np.bool_):
 								      return bool(self)
 								    else:
 								      raise TypeError(self.dtype)
-												Make `is_fully_replicated` and `is_fully_addressble` a property rather than a method.

Why?

1. Because it's easy to cache a property than a method with only the `self` argument. (See below for article)

2. There's no harm in making them a property because both of them return a bool without any side-effects and are cached (so its fast). Why cache `is_fully_addressable`? Because its very expensive to calculate when you have 1000s of devices.

PiperOrigin-RevId: 479850850

											
										
										
											2022-10-08 19:23:32 -07:00
+								  @property
-												`__repr__` if an Array is fully replicated. Its the same for `_value` so it makes sense to do the same for `__repr__`.

PiperOrigin-RevId: 469892350

											
										
										
											2022-08-24 20:41:48 -07:00
+								  def is_fully_replicated(self) -> bool:
 								    return self.shape == self._arrays[0].shape
-												Add `__repr__` to `Array`. It works exactly as it does for DA and SDA when it is fully addressable. Otherwise it works like GDA.

TODO is adding weak_type support in general and to `__repr__`.

PiperOrigin-RevId: 455680796

											
										
										
											2022-06-17 13:11:52 -07:00
+								  def __repr__(self):
-												Use `Array` in `__repr__` instead of the class name which is `ArrayImpl`.

PiperOrigin-RevId: 477465432

											
										
										
											2022-09-28 08:57:07 -07:00
+								    prefix = 'Array('
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    if self.aval is not None and self.aval.weak_type:
 								      dtype_str = f'dtype={self.dtype.name}, weak_type=True)'
 								    else:
 								      dtype_str = f'dtype={self.dtype.name})'
-												Add `__repr__` to `Array`. It works exactly as it does for DA and SDA when it is fully addressable. Otherwise it works like GDA.

TODO is adding weak_type support in general and to `__repr__`.

PiperOrigin-RevId: 455680796

											
										
										
											2022-06-17 13:11:52 -07:00
-												Make `is_fully_replicated` and `is_fully_addressble` a property rather than a method.

Why?

1. Because it's easy to cache a property than a method with only the `self` argument. (See below for article)

2. There's no harm in making them a property because both of them return a bool without any side-effects and are cached (so its fast). Why cache `is_fully_addressable`? Because its very expensive to calculate when you have 1000s of devices.

PiperOrigin-RevId: 479850850

											
										
										
											2022-10-08 19:23:32 -07:00
+								    if self.is_fully_addressable or self.is_fully_replicated:
-												Add `__repr__` to `Array`. It works exactly as it does for DA and SDA when it is fully addressable. Otherwise it works like GDA.

TODO is adding weak_type support in general and to `__repr__`.

PiperOrigin-RevId: 455680796

											
										
										
											2022-06-17 13:11:52 -07:00
+								      line_width = np.get_printoptions()["linewidth"]
 								      s = np.array2string(self._value, prefix=prefix, suffix=',',
 								                          separator=', ', max_line_width=line_width)
 								      last_line_len = len(s) - s.rfind('\n') + 1
 								      sep = ' '
 								      if last_line_len + len(dtype_str) + 1 > line_width:
 								        sep = ' ' * len(prefix)
 								      return f"{prefix}{s},{sep}{dtype_str}"
 								    else:
-												Allow `to_py()` on Array if the sharding is fully replicated.

PiperOrigin-RevId: 469617855

											
										
										
											2022-08-23 19:48:59 -07:00
+								      return f"{prefix}{self.shape}, {dtype_str}"
-												Add `__repr__` to `Array`. It works exactly as it does for DA and SDA when it is fully addressable. Otherwise it works like GDA.

TODO is adding weak_type support in general and to `__repr__`.

PiperOrigin-RevId: 455680796

											
										
										
											2022-06-17 13:11:52 -07:00
-												Make `is_fully_replicated` and `is_fully_addressble` a property rather than a method.

Why?

1. Because it's easy to cache a property than a method with only the `self` argument. (See below for article)

2. There's no harm in making them a property because both of them return a bool without any side-effects and are cached (so its fast). Why cache `is_fully_addressable`? Because its very expensive to calculate when you have 1000s of devices.

PiperOrigin-RevId: 479850850

											
										
										
											2022-10-08 19:23:32 -07:00
+								  @pxla.maybe_cached_property
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  def is_fully_addressable(self) -> bool:
-												Make `is_fully_replicated` and `is_fully_addressble` a property rather than a method.

Why?

1. Because it's easy to cache a property than a method with only the `self` argument. (See below for article)

2. There's no harm in making them a property because both of them return a bool without any side-effects and are cached (so its fast). Why cache `is_fully_addressable`? Because its very expensive to calculate when you have 1000s of devices.

PiperOrigin-RevId: 479850850

											
										
										
											2022-10-08 19:23:32 -07:00
+								    return self.sharding.is_fully_addressable
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add context to `__array__` of Array just like DA.

PiperOrigin-RevId: 469874479

											
										
										
											2022-08-24 18:27:40 -07:00
+								  def __array__(self, dtype=None, context=None):
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    return np.asarray(self._value, dtype=dtype)
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								  def __dlpack__(self):
 								    from jax.dlpack import to_dlpack  # pylint: disable=g-import-not-at-top
 								    return to_dlpack(self)
 								  def __reduce__(self):
-												Check if the buffer shape matches the excepted shard shape by Array.

PiperOrigin-RevId: 470732792

											
										
										
											2022-08-29 09:00:03 -07:00
+								    fun, args, arr_state = self._value.__reduce__()  # type: ignore
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								    aval_state = {'weak_type': self.aval.weak_type,
 								                  'named_shape': self.aval.named_shape}
 								    return (_reconstruct_array, (fun, args, arr_state, aval_state))
-												Make jit == pjit. This means that the lowering and execution paths of jit and pjit are merged.

A fallback to `lower_xla_callable` is taken when pmap appears in the jaxpr during the jit lowering path.

Added support for `keep_unused`, `committed` and `core.Token` to pxla.py.

PiperOrigin-RevId: 470896270

											
										
										
											2022-08-29 22:02:32 -07:00
+								  def unsafe_buffer_pointer(self):
 								    assert len(self._arrays) == 1
 								    return self._arrays[0].unsafe_buffer_pointer()
 								  @property
 								  def __cuda_array_interface__(self):
 								    assert len(self._arrays) == 1
 								    return self._arrays[0].__cuda_array_interface__  # pytype: disable=attribute-error  # bind-properties
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								  # TODO(yashkatariya): Remove this method when everyone is using devices().
 								  def device(self) -> Device:
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								    self._check_if_deleted()
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    device_set = self.sharding.device_set
 								    if len(device_set) == 1:
 								      single_device, = device_set
 								      return single_device
 								    raise ValueError('Length of devices is greater than 1. '
 								                     'Please use `.devices()`.')
 								  def devices(self) -> List[Device]:
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								    self._check_if_deleted()
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								    return list(self.sharding.device_set)
-												Some miscellaneous changes to make tests pass when jax.Array is enabled by default.

1. Add `device_buffer` and `device_buffers` fields to Array as a backwards compatible change for DA and SDA.
2. Support PartitionSpecs as input to in_axis_resources and out_axis_resources when jax_array is enabled as a backwards compatible change since all user code uses this currently. Create a MeshPspecSharding internally.
3. Some tests changes to make them pass

PiperOrigin-RevId: 474642889

											
										
										
											2022-09-15 13:26:57 -07:00
+								  # TODO(https://github.com/google/jax/issues/12380): Remove this when DA is
 								  # deleted.
 								  @property
-												Fix the type annotation of return type of `device_buffer` and `device_buffers` which return `ArrayImpl` instead of DeviceArray.

PiperOrigin-RevId: 480181798

											
										
										
											2022-10-10 14:44:28 -07:00
+								  def device_buffer(self) -> ArrayImpl:
-												Some miscellaneous changes to make tests pass when jax.Array is enabled by default.

1. Add `device_buffer` and `device_buffers` fields to Array as a backwards compatible change for DA and SDA.
2. Support PartitionSpecs as input to in_axis_resources and out_axis_resources when jax_array is enabled as a backwards compatible change since all user code uses this currently. Create a MeshPspecSharding internally.
3. Some tests changes to make them pass

PiperOrigin-RevId: 474642889

											
										
										
											2022-09-15 13:26:57 -07:00
+								    self._check_if_deleted()
 								    if len(self._arrays) == 1:
-												Return single device Arrays from `.device_buffer` and `.device_buffers`.

PiperOrigin-RevId: 476449591

											
										
										
											2022-09-23 13:29:47 -07:00
+								      return _single_device_array_from_buf(self._arrays[0], self._committed)
-												Some miscellaneous changes to make tests pass when jax.Array is enabled by default.

1. Add `device_buffer` and `device_buffers` fields to Array as a backwards compatible change for DA and SDA.
2. Support PartitionSpecs as input to in_axis_resources and out_axis_resources when jax_array is enabled as a backwards compatible change since all user code uses this currently. Create a MeshPspecSharding internally.
3. Some tests changes to make them pass

PiperOrigin-RevId: 474642889

											
										
										
											2022-09-15 13:26:57 -07:00
+								    raise ValueError('Length of buffers is greater than 1. Please use '
 								                     '`.device_buffers` instead.')
 								  # TODO(https://github.com/google/jax/issues/12380): Remove this when SDA is
 								  # deleted.
 								  @property
-												Fix the type annotation of return type of `device_buffer` and `device_buffers` which return `ArrayImpl` instead of DeviceArray.

PiperOrigin-RevId: 480181798

											
										
										
											2022-10-10 14:44:28 -07:00
+								  def device_buffers(self) -> Sequence[ArrayImpl]:
-												Some miscellaneous changes to make tests pass when jax.Array is enabled by default.

1. Add `device_buffer` and `device_buffers` fields to Array as a backwards compatible change for DA and SDA.
2. Support PartitionSpecs as input to in_axis_resources and out_axis_resources when jax_array is enabled as a backwards compatible change since all user code uses this currently. Create a MeshPspecSharding internally.
3. Some tests changes to make them pass

PiperOrigin-RevId: 474642889

											
										
										
											2022-09-15 13:26:57 -07:00
+								    self._check_if_deleted()
-												Return single device Arrays from `.device_buffer` and `.device_buffers`.

PiperOrigin-RevId: 476449591

											
										
										
											2022-09-23 13:29:47 -07:00
+								    return [_single_device_array_from_buf(a, self._committed)
 								            for a in self._arrays]
-												Some miscellaneous changes to make tests pass when jax.Array is enabled by default.

1. Add `device_buffer` and `device_buffers` fields to Array as a backwards compatible change for DA and SDA.
2. Support PartitionSpecs as input to in_axis_resources and out_axis_resources when jax_array is enabled as a backwards compatible change since all user code uses this currently. Create a MeshPspecSharding internally.
3. Some tests changes to make them pass

PiperOrigin-RevId: 474642889

											
										
										
											2022-09-15 13:26:57 -07:00
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								  def addressable_data(self, index: int) -> ArrayImpl:
-												Add addressable_data to Array (similar to GDA) to aid in transition and also in auto spmd partitioner mode, always convert to MeshPspecSharding.

PiperOrigin-RevId: 475972534

											
										
										
											2022-09-21 18:18:57 -07:00
+								    self._check_if_deleted()
-												Return single device Arrays from `.device_buffer` and `.device_buffers`.

PiperOrigin-RevId: 476449591

											
										
										
											2022-09-23 13:29:47 -07:00
+								    return _single_device_array_from_buf(self._arrays[index], self._committed)
-												Add addressable_data to Array (similar to GDA) to aid in transition and also in auto spmd partitioner mode, always convert to MeshPspecSharding.

PiperOrigin-RevId: 475972534

											
										
										
											2022-09-21 18:18:57 -07:00
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								  @pxla.maybe_cached_property
 								  def addressable_shards(self) -> Sequence[Shard]:
-												Fix the _check_if_deleted check that was merged at the wrong place by the cider merging machinery.

PiperOrigin-RevId: 454912448

											
										
										
											2022-06-14 11:23:07 -07:00
+								    self._check_if_deleted()
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								    out = []
 								    for db in self._arrays:
 								      # Wrap the device arrays in `Array` until C++ returns an Array instead
 								      # of a DA.
-												Return single device Arrays from `.device_buffer` and `.device_buffers`.

PiperOrigin-RevId: 476449591

											
										
										
											2022-09-23 13:29:47 -07:00
+								      array = _single_device_array_from_buf(db, self._committed)
 								      out.append(Shard(db.device(), self.sharding, self.shape, array))
-												Add the Shard class from GDA to `jax.Array` to make the management of shards easier and less verbose.

PiperOrigin-RevId: 453330349

											
										
										
											2022-06-06 18:44:45 -07:00
+								    return out
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								  def delete(self):
 								    if self._arrays is None:
 								      return
 								    for buf in self._arrays:
 								      buf.delete()
 								    self._arrays = None
 								    self._npy_value = None
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
+								  def is_deleted(self):
-												Enable `testArrayCopy` now that its fixed.

PiperOrigin-RevId: 473088085

											
										
										
											2022-09-08 14:39:12 -07:00
+								    if self._arrays is None:
 								      return True
 								    # This path is taken when a view of `Array` is created and the original
 								    # Array is deleted. In that case, the buffers the view represents also get
 								    # deleted.
 								    return any(buf.is_deleted() for buf in self._arrays)
-												Add support for interoperability via dlpack for Array and also make pickle_tests and lax_numpy_test pass with Array.

PiperOrigin-RevId: 468568917

											
										
										
											2022-08-18 15:58:40 -07:00
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								  def _check_if_deleted(self):
 								    if self._arrays is None:
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								      raise RuntimeError("Array has been deleted.")
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
-												Use C++ Array in pmap path and move PmapSharding to cpp

PiperOrigin-RevId: 474151089

											
										
										
											2022-09-13 16:18:31 -07:00
+								  @pxla.use_cpp_method
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								  def block_until_ready(self):
 								    self._check_if_deleted()
 								    for db in self._arrays:
 								      db.block_until_ready()
 								    return self
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  def copy_to_host_async(self):
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    self._check_if_deleted()
 								    if self._npy_value is None:
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								      try:
 								        self.addressable_shards[0].replica_id
 								        replica_id_exists = True
 								      except ValueError:
 								        replica_id_exists = False
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								      for s in self.addressable_shards:
-												Fix the _check_if_deleted check that was merged at the wrong place by the cider merging machinery.

PiperOrigin-RevId: 454912448

											
										
										
											2022-06-14 11:23:07 -07:00
+								        if not replica_id_exists or s.replica_id == 0:
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
+								          s.data._arrays[0].copy_to_host_async()  # pytype: disable=attribute-error
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								  @property
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								  def _value(self) -> np.ndarray:
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    self._check_if_deleted()
-												Allow `to_py()` on Array if the sharding is fully replicated.

PiperOrigin-RevId: 469617855

											
										
										
											2022-08-23 19:48:59 -07:00
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    if self._npy_value is None:
-												Make `is_fully_replicated` and `is_fully_addressble` a property rather than a method.

Why?

1. Because it's easy to cache a property than a method with only the `self` argument. (See below for article)

2. There's no harm in making them a property because both of them return a bool without any side-effects and are cached (so its fast). Why cache `is_fully_addressable`? Because its very expensive to calculate when you have 1000s of devices.

PiperOrigin-RevId: 479850850

											
										
										
											2022-10-08 19:23:32 -07:00
+								      if self.is_fully_replicated:
-												`__repr__` if an Array is fully replicated. Its the same for `_value` so it makes sense to do the same for `__repr__`.

PiperOrigin-RevId: 469892350

											
										
										
											2022-08-24 20:41:48 -07:00
+								        self._npy_value = np.asarray(self._arrays[0])  # type: ignore
-												Some miscellaneous changes to make tests pass when jax.Array is enabled by default.

1. Add `device_buffer` and `device_buffers` fields to Array as a backwards compatible change for DA and SDA.
2. Support PartitionSpecs as input to in_axis_resources and out_axis_resources when jax_array is enabled as a backwards compatible change since all user code uses this currently. Create a MeshPspecSharding internally.
3. Some tests changes to make them pass

PiperOrigin-RevId: 474642889

											
										
										
											2022-09-15 13:26:57 -07:00
+								        self._npy_value.flags.writeable = False
-												`__repr__` if an Array is fully replicated. Its the same for `_value` so it makes sense to do the same for `__repr__`.

PiperOrigin-RevId: 469892350

											
										
										
											2022-08-24 20:41:48 -07:00
+								        return cast(np.ndarray, self._npy_value)
-												Allow `to_py()` on Array if the sharding is fully replicated.

PiperOrigin-RevId: 469617855

											
										
										
											2022-08-23 19:48:59 -07:00
-												Make `is_fully_replicated` and `is_fully_addressble` a property rather than a method.

Why?

1. Because it's easy to cache a property than a method with only the `self` argument. (See below for article)

2. There's no harm in making them a property because both of them return a bool without any side-effects and are cached (so its fast). Why cache `is_fully_addressable`? Because its very expensive to calculate when you have 1000s of devices.

PiperOrigin-RevId: 479850850

											
										
										
											2022-10-08 19:23:32 -07:00
+								      if not self.is_fully_addressable:
-												Allow `to_py()` on Array if the sharding is fully replicated.

PiperOrigin-RevId: 469617855

											
										
										
											2022-08-23 19:48:59 -07:00
+								        raise RuntimeError("Fetching value for `jax.Array` that spans "
 								                           "non-addressable devices is not possible. You can use "
 								                           "`jax.experimental.multihost_utils.process_allgather` "
 								                           "for this use case.")
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								      self.copy_to_host_async()
 								      npy_value = np.empty(self.shape, self.dtype)
-												Make `Shard` a normal class making `index` and `replica_id` properties on that class. Raise an error if an indices cannot be calculated from a sharding.

PiperOrigin-RevId: 454899275

											
										
										
											2022-06-14 10:34:19 -07:00
 								      try:
 								        self.addressable_shards[0].replica_id
 								        replica_id_exists = True
 								      except ValueError:
 								        replica_id_exists = False
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								      for s in self.addressable_shards:
-												Fix the _check_if_deleted check that was merged at the wrong place by the cider merging machinery.

PiperOrigin-RevId: 454912448

											
										
										
											2022-06-14 11:23:07 -07:00
+								        if not replica_id_exists or s.replica_id == 0:
-												[JAX] Deprecate .to_py() property on arrays. Implement __array__ instead.

.to_py() was something of an accidental export from the JAX array classes. There are other mechanisms to turn a JAX array into a NumPy array, including `np.asarray(x)` and `jax.device_get(x)`. Deprecate this mechanism because it is redundant.

PiperOrigin-RevId: 469984029

											
										
										
											2022-08-25 07:27:54 -07:00
+								          npy_value[s.index] = np.asarray(s.data._arrays[0])  # type: ignore  # [union-attr]
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								      self._npy_value = npy_value  # type: ignore
-												Some miscellaneous changes to make tests pass when jax.Array is enabled by default.

1. Add `device_buffer` and `device_buffers` fields to Array as a backwards compatible change for DA and SDA.
2. Support PartitionSpecs as input to in_axis_resources and out_axis_resources when jax_array is enabled as a backwards compatible change since all user code uses this currently. Create a MeshPspecSharding internally.
3. Some tests changes to make them pass

PiperOrigin-RevId: 474642889

											
										
										
											2022-09-15 13:26:57 -07:00
+								      self._npy_value.flags.writeable = False
-												Add `__array__` (for device_get), `_npy_value`, `block_until_ready`, `delete` and `_check_if_deleted` to Array.

PiperOrigin-RevId: 454741685

											
										
										
											2022-06-13 18:07:55 -07:00
+								    # https://docs.python.org/3/library/typing.html#typing.cast
 								    return cast(np.ndarray, self._npy_value)
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								# explicitly set to be unhashable. Same as what device_array.py does.
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								setattr(ArrayImpl, "__hash__", None)
 								setattr(ArrayImpl, "__array_priority__", 100)
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								def make_array_from_callback(
 								    shape: Shape, sharding: Sharding,
 								    data_callback: Callable[[Optional[Index]], ArrayLike]) -> ArrayImpl:
-												Some minor changes to make_array_from_callback to use the device_indices_map method and calculate the indices just once. Also set the `_committed` attribute of shards to what the parent Array has.

PiperOrigin-RevId: 471167295

											
										
										
											2022-08-30 21:56:39 -07:00
+								  device_to_index_map = sharding.devices_indices_map(shape)
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								  # Use addressable_devices here instead of `_addressable_device_assignment`
 								  # because `_addressable_device_assignment` is only available on
 								  # `XLACompatibleSharding` and this function is supposed to work for every
 								  # `Sharding`.
-												Return `Array` from `jax.device_put` if `config.jax_array` is enabled.

PiperOrigin-RevId: 456531510

											
										
										
											2022-06-22 09:20:26 -07:00
+								  arrays = [
-												Some minor changes to make_array_from_callback to use the device_indices_map method and calculate the indices just once. Also set the `_committed` attribute of shards to what the parent Array has.

PiperOrigin-RevId: 471167295

											
										
										
											2022-08-30 21:56:39 -07:00
+								      device_put(data_callback(device_to_index_map[device]), device)
-												Adding `jax.Array` to jax.experimental. Its pretty much the same as GDA (without the performance optimization for now).

Currently, jax.Array takes DeviceArrays in `assemble_array` because device_put returns a DA. In the future (with IFRT), it will return an `Array`.

`addressable_shards` wraps DA into jax.Array with a `SingleDeviceSharding`.

PiperOrigin-RevId: 453319811

											
										
										
											2022-06-06 17:31:20 -07:00
+								      for device in sharding.addressable_devices
 								  ]
-												Add weak type support to Array. Also make all api_test.py tests pass with Array. I have disabled the `float0` test for now until I investigate.

PiperOrigin-RevId: 468264910

											
										
										
											2022-08-17 12:25:14 -07:00
+								  aval = core.ShapedArray(shape, arrays[0].dtype, weak_type=False)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								  return ArrayImpl(aval, sharding, arrays, committed=True)
-												Add pjit support for `Array`. `Array` takes the same codepath as GDA so there are very little modifications to pjit. Add handlers aval, shard_args and result handlers for Array.

PiperOrigin-RevId: 454160854

											
										
										
											2022-06-10 07:31:43 -07:00
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								def make_array_from_single_device_arrays(
 								    shape: Shape, sharding: Sharding, arrays: Sequence[ArrayImpl]) -> ArrayImpl:
-												Add `make_array_from_single_device_arrays` to prepare to rename of the concrete `Array` to `ArrayImpl`.

PiperOrigin-RevId: 476965287

											
										
										
											2022-09-26 12:43:13 -07:00
+								  # All input arrays should be committed. Checking it is expensive on
 								  # single-controller systems.
 								  aval = core.ShapedArray(shape, arrays[0].dtype, weak_type=False)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								  return ArrayImpl(aval, sharding, arrays, committed=True)
-												Add `make_array_from_single_device_arrays` to prepare to rename of the concrete `Array` to `ArrayImpl`.

PiperOrigin-RevId: 476965287

											
										
										
											2022-09-26 12:43:13 -07:00
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								core.pytype_aval_mappings[ArrayImpl] = abstract_arrays.canonical_concrete_aval
 								xla.pytype_aval_mappings[ArrayImpl] = op.attrgetter('aval')
 								xla.canonicalize_dtype_handlers[ArrayImpl] = pxla.identity
 								api_util._shaped_abstractify_handlers[ArrayImpl] = op.attrgetter('aval')
 								ad_util.jaxval_adders[ArrayImpl] = lax_internal.add
 								ad_util.jaxval_zeros_likers[ArrayImpl] = lax_internal.zeros_like_array
 								if xc._version >= 96:
-												Add initial jax.Array base class for instance checks & annotation

											
										
										
											2022-09-23 09:59:46 -07:00
+								  # TODO(jakevdp) replace this with true inheritance at the C++ level.
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								  basearray.Array.register(ArrayImpl)
-												Make all pmap tests pass with Array! I am skipping all soft pmap tests for now.

PiperOrigin-RevId: 467264992

											
										
										
											2022-08-12 12:09:22 -07:00
 								def _array_mlir_constant_handler(val, canonicalize_types=True):
 								  return mlir.ir_constants(val._value,
 								                           canonicalize_types=canonicalize_types)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								mlir.register_constant_handler(ArrayImpl, _array_mlir_constant_handler)
-												Add pjit support for `Array`. `Array` takes the same codepath as GDA so there are very little modifications to pjit. Add handlers aval, shard_args and result handlers for Array.

PiperOrigin-RevId: 454160854

											
										
										
											2022-06-10 07:31:43 -07:00
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
 								def _device_put_array(x, device: Optional[Device]):
-												Fix Forward. The fix is on the user's end. Original PR: https://github.com/google/jax/pull/12217

Co-authored-by: Matthew Johnson <mattjj@google.com>
Co-authored-by: Yash Katariya <yashkatariya@google.com>
PiperOrigin-RevId: 472999907

											
										
										
											2022-09-08 08:49:12 -07:00
+								  if dispatch.is_single_device_sharding(x.sharding):
-												Make `astype` work for `Array` that are sharded. The current behavior is the same as SDA i.e. it round trips via host.

PiperOrigin-RevId: 457797458

											
										
										
											2022-06-28 12:48:39 -07:00
+								    x = dispatch._copy_device_array_to_device(pxla._set_aval(x._arrays[0]), device)
 								    return (x,)
 								  else:
 								    # Round trip via host if x is sharded. SDA also does a round trip via host.
 								    return dispatch._device_put_array(x._value, device)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								dispatch.device_put_handlers[ArrayImpl] = _device_put_array
-												Make `jnp.array` return `jax.Array`. Add input and result handlers for `jax.Array`. Also added tests for `add` under jit.

TODO:
* Don't allow `x + y` if `jax.Array` is not fully addressable.
* Figure out how to use the already written tests with Array. Might be able to follow the path taken by SDA.
PiperOrigin-RevId: 457034779

											
										
										
											2022-06-24 10:04:31 -07:00
-												Make eager pmap tests pass with `Array`. Also add a slow path for Array in `pmap` similar to what SDA has. This is required for eager pmap. Adding a slow path removes the need for doing sharding checks in api.py because SDA doesn't do those checks and if the sharding does not match with pmap sharding, then it just defaults to the slow path (exactly like SDA).

PiperOrigin-RevId: 468843310

											
										
										
											2022-08-19 21:36:43 -07:00
+								def _array_pmap_shard_arg(x, devices, indices, mode):
-												Fix Forward. The fix is on the user's end. Original PR: https://github.com/google/jax/pull/12217

Co-authored-by: Matthew Johnson <mattjj@google.com>
Co-authored-by: Yash Katariya <yashkatariya@google.com>
PiperOrigin-RevId: 472999907

											
										
										
											2022-09-08 08:49:12 -07:00
+								  if dispatch.is_single_device_sharding(x.sharding):
-												Make eager pmap tests pass with `Array`. Also add a slow path for Array in `pmap` similar to what SDA has. This is required for eager pmap. Adding a slow path removes the need for doing sharding checks in api.py because SDA doesn't do those checks and if the sharding does not match with pmap sharding, then it just defaults to the slow path (exactly like SDA).

PiperOrigin-RevId: 468843310

											
										
										
											2022-08-19 21:36:43 -07:00
+								    return pxla._shard_device_array(x, devices, indices, mode)
 								  # If the sharding of Array does not match pmap's sharding then take the slow
 								  # path which is similar to what SDA does. This slow path reroute only happens
 								  # for `pmap`.
-												Only copy_to_device if the indices match. Otherwise reshard the array if its uncommitted. This is important where you have 1 process per device.

PiperOrigin-RevId: 475418561

											
										
										
											2022-09-19 16:58:46 -07:00
+								  x_indices = tuple(x.sharding.addressable_devices_indices_map(x.shape).values())
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								  if indices == x_indices:
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								    return [buf if buf.device() == d else buf.copy_to_device(d)
 								            for buf, d in safe_zip(x._arrays, devices)]
-												Make eager pmap tests pass with `Array`. Also add a slow path for Array in `pmap` similar to what SDA has. This is required for eager pmap. Adding a slow path removes the need for doing sharding checks in api.py because SDA doesn't do those checks and if the sharding does not match with pmap sharding, then it just defaults to the slow path (exactly like SDA).

PiperOrigin-RevId: 468843310

											
										
										
											2022-08-19 21:36:43 -07:00
+								  else:
 								    return pxla._shard_sharded_device_array_slow_path(x, devices, indices, mode)
-												make device_put(prngkeyarray, sharding) for Array

Co-authored-by: Yash Katariya <yashkatariya@google.com>
Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2022-10-07 16:48:34 -07:00
+								def _array_rest_shard_arg(x: ArrayImpl, devices, indices, mode):
 								  x_indices = x.sharding.addressable_devices_indices_map(x.shape).values()
-												Make `is_fully_replicated` and `is_fully_addressble` a property rather than a method.

Why?

1. Because it's easy to cache a property than a method with only the `self` argument. (See below for article)

2. There's no harm in making them a property because both of them return a bool without any side-effects and are cached (so its fast). Why cache `is_fully_addressable`? Because its very expensive to calculate when you have 1000s of devices.

PiperOrigin-RevId: 479850850

											
										
										
											2022-10-08 19:23:32 -07:00
+								  if not x.is_fully_addressable:
-												Merge pull request #12705 from mattjj:fix-prng-key-array-device-put

PiperOrigin-RevId: 479813689

											
										
										
											2022-10-08 11:39:05 -07:00
+								    if tuple(x_indices) == tuple(indices):
 								      return x._arrays
 								    else:
 								      return NotImplementedError("Cannot reshard an input that is not fully "
 								                                 "addressable")
-												Enable the debugging_primitives pjit(xmap) case. Also don't check for sharding mismatch when the array is not committed. Check the device assignment only for committed arrays.

PiperOrigin-RevId: 474598597

											
										
										
											2022-09-15 10:33:31 -07:00
+								  else:
-												Merge pull request #12705 from mattjj:fix-prng-key-array-device-put

PiperOrigin-RevId: 479813689

											
										
										
											2022-10-08 11:39:05 -07:00
+								    if tuple(x_indices) == tuple(indices):
 								      return [buf if buf.device() == d else buf.copy_to_device(d)
 								              for buf, d in safe_zip(x._arrays, devices)]
 								    # Resharding starts here:
 								    if isinstance(x.sharding, PmapSharding):
 								      return pxla.device_put(x._value, devices, replicate=True)
-												Enable the debugging_primitives pjit(xmap) case. Also don't check for sharding mismatch when the array is not committed. Check the device assignment only for committed arrays.

PiperOrigin-RevId: 474598597

											
										
										
											2022-09-15 10:33:31 -07:00
+								    if dispatch.is_single_device_sharding(x.sharding):
 								      return pxla._shard_device_array(x, devices, indices, mode)
 								    else:
-												Merge pull request #12705 from mattjj:fix-prng-key-array-device-put

PiperOrigin-RevId: 479813689

											
										
										
											2022-10-08 11:39:05 -07:00
+								      return pxla._shard_sharded_device_array_slow_path(x, devices, indices, mode)
-												Enable the debugging_primitives pjit(xmap) case. Also don't check for sharding mismatch when the array is not committed. Check the device assignment only for committed arrays.

PiperOrigin-RevId: 474598597

											
										
										
											2022-09-15 10:33:31 -07:00
-												Make eager pmap tests pass with `Array`. Also add a slow path for Array in `pmap` similar to what SDA has. This is required for eager pmap. Adding a slow path removes the need for doing sharding checks in api.py because SDA doesn't do those checks and if the sharding does not match with pmap sharding, then it just defaults to the slow path (exactly like SDA).

PiperOrigin-RevId: 468843310

											
										
										
											2022-08-19 21:36:43 -07:00
+								def _array_shard_arg(x, devices, indices, mode):
 								  if mode == pxla.InputsHandlerMode.pmap:
 								    return _array_pmap_shard_arg(x, devices, indices, mode)
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								  else:
-												Enable the debugging_primitives pjit(xmap) case. Also don't check for sharding mismatch when the array is not committed. Check the device assignment only for committed arrays.

PiperOrigin-RevId: 474598597

											
										
										
											2022-09-15 10:33:31 -07:00
+								    return _array_rest_shard_arg(x, devices, indices, mode)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								pxla.shard_arg_handlers[ArrayImpl] = _array_shard_arg
-												Add pjit support for `Array`. `Array` takes the same codepath as GDA so there are very little modifications to pjit. Add handlers aval, shard_args and result handlers for Array.

PiperOrigin-RevId: 454160854

											
										
										
											2022-06-10 07:31:43 -07:00
-												Add support to handle arbitrary shardings to KeyArray. Resolve all the TODOs that were created before.

Co-authored-by: Roy Frostig <frostig@google.com>
PiperOrigin-RevId: 471443690

											
										
										
											2022-08-31 22:53:32 -07:00
+								def _array_global_result_handler(global_aval, out_sharding, committed,
 								                                 is_out_sharding_from_xla):
-												Make jit == pjit. This means that the lowering and execution paths of jit and pjit are merged.

A fallback to `lower_xla_callable` is taken when pmap appears in the jaxpr during the jit lowering path.

Added support for `keep_unused`, `committed` and `core.Token` to pxla.py.

PiperOrigin-RevId: 470896270

											
										
										
											2022-08-29 22:02:32 -07:00
+								  if global_aval.dtype == dtypes.float0:
 								    return lambda _: np.zeros(global_aval.shape, dtypes.float0)  # type: ignore
-												internal rename: swap mentions of "custom eltypes" for "opaque dtypes"

Also, avoid direct set membership tests on `core.opaque_dtypes`. Update
callers to use `core.{is,has}_opaque_dtype` predicates instead.

											
										
										
											2022-08-30 14:47:15 -07:00
+								  if core.is_opaque_dtype(global_aval.dtype):
-												access rules through a hidden attribute of opaque dtype

											
										
										
											2022-08-30 13:25:49 -07:00
+								    return global_aval.dtype._rules.global_sharded_result_handler(
-												Add support to handle arbitrary shardings to KeyArray. Resolve all the TODOs that were created before.

Co-authored-by: Roy Frostig <frostig@google.com>
PiperOrigin-RevId: 471443690

											
										
										
											2022-08-31 22:53:32 -07:00
+								        global_aval, out_sharding, committed, is_out_sharding_from_xla)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								  return lambda bufs: ArrayImpl(global_aval, out_sharding, bufs,
 								                                committed=committed, _skip_checks=True)
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								pxla.global_result_handlers[(core.ShapedArray, pxla.OutputType.Array)] = _array_global_result_handler
 								pxla.global_result_handlers[(core.ConcreteArray, pxla.OutputType.Array)] = _array_global_result_handler
-												Make jit == pjit. This means that the lowering and execution paths of jit and pjit are merged.

A fallback to `lower_xla_callable` is taken when pmap appears in the jaxpr during the jit lowering path.

Added support for `keep_unused`, `committed` and `core.Token` to pxla.py.

PiperOrigin-RevId: 470896270

											
										
										
											2022-08-29 22:02:32 -07:00
+								pxla.global_result_handlers[(core.AbstractToken, pxla.OutputType.Array)] = lambda *_: lambda *_: core.token
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
-												Add fast path args to Array similar to GDA to speed up initialization and other operations like calculating indices and addressable_device_assignment.

This is important because looping over 1000s of devices is extremely expensive during runtime and throttles the performance (all these optimizations were applied to GDA when integrating it into PAX and are applicable to Array as well). This will also be helpful for single-controller environments.

Also even hashing and __eq__ checks when you have 1000s of devices is going to be slow and will show up in xprof as a slowdown (I have seen this before).

PiperOrigin-RevId: 471366295

											
										
										
											2022-08-31 15:06:58 -07:00
+								# Only used for Arrays that come out of pmap.
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								def _array_local_result_handler(aval, sharding, indices):
-												internal rename: swap mentions of "custom eltypes" for "opaque dtypes"

Also, avoid direct set membership tests on `core.opaque_dtypes`. Update
callers to use `core.{is,has}_opaque_dtype` predicates instead.

											
										
										
											2022-08-30 14:47:15 -07:00
+								  if core.is_opaque_dtype(aval.dtype):
-												access rules through a hidden attribute of opaque dtype

											
										
										
											2022-08-30 13:25:49 -07:00
+								    return aval.dtype._rules.local_sharded_result_handler(
 								        aval, sharding, indices)
-												Rename the concrete class `Array` to `ArrayImpl`

PiperOrigin-RevId: 477017236

											
										
										
											2022-09-26 16:17:26 -07:00
+								  return lambda bufs: ArrayImpl(aval, sharding, bufs, committed=True,
 								                                _skip_checks=True)
-												Pmap should output SDA like `Array`s to maintain the current behavior exactly. Split the shard_arg_handler for `Array` based on whether the mode is pmap or pjit. Why do this? The doc below explains more about the context.

PiperOrigin-RevId: 466849614

											
										
										
											2022-08-10 20:11:06 -07:00
+								pxla.local_result_handlers[(core.ShapedArray, pxla.OutputType.Array)] = _array_local_result_handler
 								pxla.local_result_handlers[(core.ConcreteArray, pxla.OutputType.Array)] = _array_local_result_handler