rocm_jax/jax/_src/pjit.py

# Copyright 2021 The JAX Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import inspect
import logging
import weakref
import numpy as np
from typing import (Callable, Sequence, Tuple, Union, cast, List, Optional,
                    Iterable, NamedTuple, Any)
import itertools as it
from functools import partial, lru_cache
import threading
import warnings

from jax._src import core
from jax._src import stages
from jax._src import dispatch
from jax._src import mesh as mesh_lib
from jax._src import linear_util as lu
from jax._src import op_shardings
from jax._src import sharding_impls
from jax._src import source_info_util
from jax._src import traceback_util
from jax._src import api
from jax._src import xla_bridge as xb
from jax._src.api_util import (
    argnums_partial_except, flatten_axes, flatten_fun, flatten_fun_nokwargs,
    donation_vector, shaped_abstractify, check_callable, resolve_argnums,
    argnames_partial_except, debug_info, result_paths, jaxpr_debug_info)
from jax._src.errors import JAXTypeError
from jax._src.interpreters import partial_eval as pe
from jax._src.partition_spec import PartitionSpec
from jax._src.interpreters import xla

from jax._src.config import config
from jax._src.interpreters import ad
from jax._src.interpreters import batching
from jax._src.interpreters import mlir
from jax._src.interpreters import pxla
from jax._src.lib.mlir import ir
from jax._src.lib.mlir.dialects import func as func_dialect
from jax._src.lib import xla_client as xc
from jax._src.sharding_impls import (
    NamedSharding, XLACompatibleSharding, GSPMDSharding,
    XLADeviceAssignment, SingleDeviceSharding, PmapSharding,
    AUTO, UNSPECIFIED, UnspecifiedValue,
    ParsedPartitionSpec, SpecSync, get_single_pspec, is_auto, is_unspecified,
    is_unspecified_or_auto, prepare_axis_resources, parse_flatten_op_sharding)
from jax._src.traceback_util import api_boundary
from jax._src.tree_util import (
    tree_map, tree_flatten, tree_unflatten, treedef_is_leaf, tree_structure,
    treedef_tuple, broadcast_prefix, all_leaves,
    prefix_errors, generate_key_paths)
from jax._src.util import (
    HashableFunction, safe_map, safe_zip, wraps,
    distributed_debug_log, split_list, weakref_lru_cache,
    merge_lists, flatten, unflatten)

map, unsafe_map = safe_map, map
zip, unsafe_zip = safe_zip, zip

traceback_util.register_exclusion(__file__)

PjitSharding = Union[GSPMDSharding, UnspecifiedValue, AUTO]
PjitShardingMinusUnspecified = Union[GSPMDSharding, AUTO]
MeshSharding = Union[NamedSharding, UnspecifiedValue, AUTO]
MeshShardingMinusUnspecified = Union[NamedSharding, AUTO]

logger = logging.getLogger(__name__)


def _try_infer_args(f, tree):
  dummy_args = tree_unflatten(tree, [False] * tree.num_leaves)
  try:
    return inspect.signature(f).bind(*dummy_args)
  except (TypeError, ValueError):
    return None


def _find_arg_mismatch(arg_list, fails, fun_name):
  first_err, second_err = fails
  mismatched_args_msg = []
  for name, inp_da, aval in arg_list:
    if first_err.m_type == pxla.MismatchType.ARG_SHARDING:
      if first_err.da == inp_da:
        mismatched_args_msg.append(
            (f"argument {name} of {fun_name} with shape {aval.str_short()} and "
             f"{first_err._dev_ids_plat_str}"))
        break

  for name, inp_da, aval in arg_list:
    if second_err.m_type == pxla.MismatchType.ARG_SHARDING:
      if second_err.da == inp_da:
        mismatched_args_msg.append(
            (f"argument {name} of {fun_name} with shape {aval.str_short()} and "
             f"{second_err._dev_ids_plat_str}"))
        break
  return mismatched_args_msg

# TODO(yashkatariya): Try to use debug_info that is populated in
# common_infer_params.
def _get_arg_names(fun, in_tree, args_flat):
  sig = _try_infer_args(fun, in_tree)
  args_aug = generate_key_paths(tree_unflatten(in_tree, args_flat))

  arg_names = []
  for arg_key, val in args_aug:
    ak, *rem_keys = arg_key
    if sig is not None:
      loc = ''.join(str(k) for k in rem_keys)
      arg_name = f'{list(sig.arguments.keys())[ak.idx]}{loc}'
    else:
      arg_name = ''
    arg_names.append(arg_name)
  return arg_names


def _device_assignment_mismatch_error(fun_name, fails, args_flat, api_name,
                                      arg_names):
  arg_list = []
  for a, n in zip(args_flat, arg_names):
    da = a.sharding._device_assignment if hasattr(a, 'sharding') else None
    arg_list.append((n, da, shaped_abstractify(a)))

  mismatched_args_msg = _find_arg_mismatch(arg_list, fails, fun_name)

  if len(mismatched_args_msg) == 2:
    first, second = mismatched_args_msg  # pylint: disable=unbalanced-tuple-unpacking
    extra_msg = f" Got {first} and {second}"
  elif len(mismatched_args_msg) == 1:
    first, second  = fails
    # Choose the failure left which is not already covered by ARG_SHARDING.
    left = second if first.m_type == pxla.MismatchType.ARG_SHARDING else first
    extra_msg = f" Got {mismatched_args_msg[0]} and{left._str(api_name)}"
  else:
    first, second = fails
    extra_msg = f" Got{first._str(api_name)} and{second._str(api_name)}"
  msg = (f"Received incompatible devices for {api_name}ted computation.{extra_msg}")
  return msg


def _python_pjit_helper(fun, infer_params_fn, *args, **kwargs):
  args_flat, _, params, in_tree, out_tree, _ = infer_params_fn(
      *args, **kwargs)
  for arg in args_flat:
    dispatch.check_arg(arg)
  try:
    out_flat = pjit_p.bind(*args_flat, **params)
  except pxla.DeviceAssignmentMismatchError as e:
    fails, = e.args
    api_name = 'jit' if params['resource_env'] is None else 'pjit'
    arg_names = _get_arg_names(fun, in_tree, args_flat)
    fun_name = getattr(fun, '__qualname__', getattr(fun, '__name__', str(fun)))
    msg = _device_assignment_mismatch_error(
        fun_name, fails, args_flat, api_name, arg_names)
    raise ValueError(msg) from None
  outs = tree_unflatten(out_tree, out_flat)
  return outs, out_flat, out_tree, args_flat


def _python_pjit(fun: Callable, infer_params_fn):

  @wraps(fun)
  @api_boundary
  def wrapped(*args, **kwargs):
    if config.jax_disable_jit:
      return fun(*args, **kwargs)
    return _python_pjit_helper(fun, infer_params_fn, *args, **kwargs)[0]

  def _python_pjit_evict_fn():
    _create_pjit_jaxpr.evict_function(fun)  # type: ignore
  wrapped.clear_cache = _python_pjit_evict_fn
  return wrapped


def _get_fastpath_data(executable, out_tree, args_flat, out_flat):
  use_fastpath = (
      executable is not None and
      isinstance(executable, pxla.MeshExecutable) and
      isinstance(executable.unsafe_call, pxla.ExecuteReplicated) and
      # No effects in computation
      not executable.unsafe_call.ordered_effects and
      not executable.unsafe_call.has_unordered_effects and
      not executable.unsafe_call.has_host_callbacks and
      all(isinstance(x, xc.ArrayImpl) for x in out_flat)
  )

  if use_fastpath:
    out_avals = [o.aval for o in out_flat]
    out_committed = [o._committed for o in out_flat]
    kept_var_bitvec = [i in executable._kept_var_idx
                       for i in range(len(args_flat))]
    fastpath_data = pxla.MeshExecutableFastpathData(
        executable.xla_executable, out_tree, executable._in_shardings,
        executable._out_shardings, out_avals, out_committed, kept_var_bitvec)
  else:
    fastpath_data = None
  return fastpath_data


class _MostRecentPjitCallExecutable(threading.local):
  def __init__(self):
    self.value = None

_most_recent_pjit_call_executable = _MostRecentPjitCallExecutable()


def _read_most_recent_pjit_call_executable():
  executable = _most_recent_pjit_call_executable.value
  _most_recent_pjit_call_executable.value = None
  return executable


def _cpp_pjit_evict_fn(self):
  self._clear_cache()
  _create_pjit_jaxpr.evict_function(self._fun)  # type: ignore


# The entries are doubled here from the default 4096 because _pjit_call_impl
# also has a cpp dispatch path and that would double the number of entries in
# the global shared cache.
_cpp_pjit_cache = xc._xla.PjitFunctionCache(capacity=8192)


def _cpp_pjit(fun: Callable, infer_params_fn, static_argnums, static_argnames,
              donate_argnums, pjit_has_explicit_sharding):

  @api_boundary
  def cache_miss(*args, **kwargs):
    outs, out_flat, out_tree, args_flat = _python_pjit_helper(
        fun, infer_params_fn, *args, **kwargs)
    executable = _read_most_recent_pjit_call_executable()
    fastpath_data = _get_fastpath_data(executable, out_tree, args_flat, out_flat)
    return outs, fastpath_data

  if pjit_has_explicit_sharding:
    global_cache = xc._xla.PjitFunctionCache()
  else:
    global_cache = _cpp_pjit_cache
  cpp_pjit_f = xc._xla.pjit(  # type: ignore
      getattr(fun, "__name__", "<unnamed function>"),  # type: ignore
      fun, cache_miss, static_argnums, static_argnames,  # type: ignore
      donate_argnums, global_cache)  # type: ignore

  cpp_pjitted_f = wraps(fun)(cpp_pjit_f)
  cpp_pjitted_f._fun = fun
  type(cpp_pjitted_f).clear_cache = _cpp_pjit_evict_fn
  return cpp_pjitted_f


def _resolve_axis_resources_and_shardings_arg(
    in_shardings, out_shardings, in_axis_resources, out_axis_resources):
  if not is_unspecified(in_shardings) and not is_unspecified(in_axis_resources):
    raise ValueError(
        'Setting both in_shardings and in_axis_resources is not '
        'allowed. in_axis_resources is deprecated. Please use in_shardings.')
  if not is_unspecified(out_shardings) and not is_unspecified(out_axis_resources):
    raise ValueError(
        'Setting both out_shardings and out_axis_resources is not '
        'allowed. out_axis_resources is deprecated. Please use out_shardings.')
  if (not is_unspecified(in_axis_resources) or
      not is_unspecified(out_axis_resources)):
    warnings.warn(
        'in_axis_resources and out_axis_resources are deprecated. Please use '
        'in_shardings and out_shardings as their replacement.',
        DeprecationWarning)

  if not is_unspecified(in_axis_resources):
    final_in_shardings = in_axis_resources
  else:
    final_in_shardings = in_shardings

  if not is_unspecified(out_axis_resources):
    final_out_shardings = out_axis_resources
  else:
    final_out_shardings = out_shardings
  return final_in_shardings, final_out_shardings


def pre_infer_params(fun, in_shardings, out_shardings,
                     donate_argnums, static_argnums, static_argnames, device,
                     backend, abstracted_axes):
  if abstracted_axes and not config.jax_dynamic_shapes:
    raise ValueError("abstracted_axes must be used with --jax_dynamic_shapes")

  check_callable(fun)

  if backend is not None or device is not None:
    warnings.warn(
        'backend and device argument on jit is deprecated. You can use a '
        '`jax.sharding.Mesh` context manager or device_put the arguments '
        'before passing them to `jit`. Please see '
        'https://jax.readthedocs.io/en/latest/notebooks/Distributed_arrays_and_automatic_parallelization.html '
        'for more information.', DeprecationWarning)
    if device is not None and backend is not None:
      raise ValueError("can't specify both a device and a backend for jit, "
                       f"got {device=} and {backend=}")
    if not is_unspecified(in_shardings):
      raise ValueError('If backend or device is specified on jit, then '
                       'in_shardings should not be specified.')
    if not is_unspecified(out_shardings):
      raise ValueError('If backend or device is specified on jit, then '
                       'out_shardings should not be specified.')

  if isinstance(in_shardings, list):
    # To be a tree prefix of the positional args tuple, in_axes can never be a
    # list: if in_axes is not a leaf, it must be a tuple of trees. However,
    # in cases like these users expect tuples and lists to be treated
    # essentially interchangeably, so we canonicalize lists to tuples here
    # rather than raising an error. https://github.com/google/jax/issues/2367
    in_shardings = tuple(in_shardings)

  in_shardings, _, _ = prepare_axis_resources(in_shardings, 'in_shardings')
  out_shardings, _, _ = prepare_axis_resources(out_shardings, 'out_shardings')

  donate_argnums, static_argnums, static_argnames = resolve_argnums(
      fun, donate_argnums, static_argnums, static_argnames)

  return (in_shardings, out_shardings, donate_argnums, static_argnums,
          static_argnames)


def post_infer_params(fun, infer_params_fn, static_argnums, static_argnames,
                      donate_argnums, abstracted_axes,
                      pjit_has_explicit_sharding):
  if abstracted_axes is None:
    wrapped = _cpp_pjit(fun, infer_params_fn, static_argnums, static_argnames,
                        donate_argnums, pjit_has_explicit_sharding)
  else:
    wrapped = _python_pjit(fun, infer_params_fn)

  @api_boundary
  def lower(*args, **kwargs):
    _experimental_lowering_platform = kwargs.pop(
        '_experimental_lowering_platform', None)
    (args_flat, flat_global_in_avals, params, in_tree, out_tree,
     donate_argnums) = infer_params_fn(*args, **kwargs)
    resource_env = params['resource_env']
    mesh = None if resource_env is None else resource_env.physical_mesh
    try:
      in_shardings = _resolve_in_shardings(
          args_flat, params['in_shardings'], params['out_shardings'], mesh)
      lowering = _pjit_lower(
          params['jaxpr'], in_shardings, params['out_shardings'],
          params['resource_env'], params['donated_invars'], params['name'],
          params['keep_unused'], params['inline'], always_lower=True,
          lowering_platform=_experimental_lowering_platform)
    except pxla.DeviceAssignmentMismatchError as e:
      fails, = e.args
      api_name = 'jit' if params['resource_env'] is None else 'pjit'
      arg_names = _get_arg_names(fun, in_tree, args_flat)
      fun_name = getattr(fun, '__qualname__', getattr(fun, '__name__', str(fun)))
      msg = _device_assignment_mismatch_error(
          fun_name, fails, args_flat, api_name, arg_names)
      raise ValueError(msg) from None

    if kwargs:
      args_kwargs_in_tree = in_tree
    else:
      args_kwargs_in_tree = treedef_tuple([in_tree, tree_flatten({})[1]])

    return stages.Lowered.from_flat_info(
        lowering, args_kwargs_in_tree, flat_global_in_avals, donate_argnums,
        out_tree)

  wrapped.lower = lower
  return wrapped


def _pjit_explicit_sharding(in_shardings, out_shardings, device,
                            backend) -> bool:
  in_shardings_flat, _ = tree_flatten(in_shardings)
  out_shardings_flat, _ = tree_flatten(out_shardings)
  return (device is not None or
          backend is not None or
          any(not is_unspecified(i) for i in in_shardings_flat) or
          any(not is_unspecified(i) for i in out_shardings_flat))


class PjitInfo(NamedTuple):
  fun: Callable
  in_shardings: Any
  out_shardings: Any
  static_argnums: Tuple[int, ...]
  static_argnames: Tuple[str, ...]
  donate_argnums: Tuple[int, ...]
  device: Optional[xc.Device]
  backend: Optional[str]
  keep_unused: bool
  inline: bool
  resource_env: Any
  abstracted_axes: Optional[Any]


def common_infer_params(pjit_info_args, *args, **kwargs):
  (fun, user_in_shardings, user_out_shardings, static_argnums, static_argnames,
   donate_argnums, device, backend, keep_unused, inline,
   resource_env, abstracted_axes) = pjit_info_args

  if kwargs and not is_unspecified(user_in_shardings):
    raise ValueError(
        "pjit does not support kwargs when in_shardings is specified.")

  if resource_env is not None:
    pjit_mesh = resource_env.physical_mesh
  else:
    pjit_mesh = None

  if (backend or device) and pjit_mesh is not None and not pjit_mesh.empty:
    raise ValueError(
        "Mesh context manager should not be used with jit when backend or "
        "device is also specified as an argument to jit.")

  axes_specs = _flat_axes_specs(abstracted_axes, *args, **kwargs)

  jit_name = 'jit' if resource_env is None else 'pjit'
  dbg = debug_info(jit_name, fun, args, kwargs, static_argnums, static_argnames)
  f = lu.wrap_init(fun)
  f, res_paths = result_paths(f)
  f, dyn_args = argnums_partial_except(f, static_argnums, args,
                                       allow_invalid=True)
  del args

  # TODO(yashkatariya): Merge the nokwargs and kwargs path. One blocker is
  # flatten_axes which if kwargs are present in the treedef (even empty {}),
  # leads to wrong expansion.
  if kwargs:
    f, dyn_kwargs = argnames_partial_except(f, static_argnames, kwargs)
    explicit_args, in_tree = tree_flatten((dyn_args, dyn_kwargs))
    flat_fun, out_tree = flatten_fun(f, in_tree)
  else:
    explicit_args, in_tree = tree_flatten(dyn_args)
    flat_fun, out_tree = flatten_fun_nokwargs(f, in_tree)
    dyn_kwargs = ()
  del kwargs

  if donate_argnums and not config.jax_debug_nans:
    donated_invars = donation_vector(donate_argnums, dyn_args, dyn_kwargs)
  else:
    donated_invars = (False,) * len(explicit_args)

  # If backend or device is set as an arg on jit, then resolve them to
  # in_shardings and out_shardings as if user passed in in_shardings
  # and out_shardings.
  device_or_backend_set = False
  if backend or device:
    in_shardings = out_shardings = _create_sharding_with_device_backend(
        device, backend)
    device_or_backend_set = True
  else:
    in_shardings = tree_map(
        lambda x: _create_sharding_for_array(pjit_mesh, x, 'in_shardings',
                                             jit_name),
        user_in_shardings)
    out_shardings = tree_map(
        lambda x: _create_sharding_for_array(pjit_mesh, x, 'out_shardings',
                                             jit_name),
        user_out_shardings)

  del user_in_shardings, user_out_shardings

  if config.jax_dynamic_shapes:
    in_type = pe.infer_lambda_input_type(axes_specs, explicit_args)
    in_avals = tuple(a for a, e in in_type if e)
  else:
    avals = []
    for i, a in enumerate(explicit_args):
      try:
        avals.append(shaped_abstractify(a))
      except OverflowError as e:
        arg_path = (f"argument path is {dbg.arg_names[i]}" if dbg
                    else f"flattened argument number is {i}")
        raise OverflowError(
          "An overflow was encountered while parsing an argument to a jitted "
          f"computation, whose {arg_path}."
        ) from e
    in_type = in_avals = tuple(avals)

  canonicalized_in_shardings_flat = _process_in_axis_resources(
      hashable_pytree(in_shardings), in_avals, in_tree, resource_env, dbg,
      device_or_backend_set)

  jaxpr, consts, canonicalized_out_shardings_flat = _pjit_jaxpr(
      flat_fun, hashable_pytree(out_shardings), in_type, dbg,
      device_or_backend_set, HashableFunction(out_tree, closure=()),
      HashableFunction(res_paths, closure=()))

  assert len(explicit_args) == len(canonicalized_in_shardings_flat)

  if config.jax_dynamic_shapes:
    implicit_args = _extract_implicit_args(in_type, explicit_args)
  else:
    implicit_args = []
  args_flat = [*implicit_args, *explicit_args]

  num_extra_args = len(implicit_args) + len(consts)
  canonicalized_in_shardings_flat = \
      (UNSPECIFIED,) * num_extra_args + canonicalized_in_shardings_flat
  donated_invars = (False,) * num_extra_args + donated_invars
  assert (len(canonicalized_in_shardings_flat) == len(donated_invars) ==
          len(consts) + len(args_flat))

  # in_shardings and out_shardings here are all GSPMDSharding.
  params = dict(
      jaxpr=jaxpr,
      in_shardings=canonicalized_in_shardings_flat,
      out_shardings=canonicalized_out_shardings_flat,
      resource_env=resource_env,
      donated_invars=donated_invars,
      name=getattr(flat_fun, '__name__', '<unnamed function>'),
      keep_unused=keep_unused,
      inline=inline,
  )
  return (consts + args_flat, in_type, params, in_tree, out_tree(),
          donate_argnums)

def _extract_implicit_args(
  in_type: Sequence[Tuple[core.AbstractValue, bool]],
  explicit_args: Sequence[Any]
) -> Sequence[core.Tracer]:
  """
  Given an input type and explicitly-passed arguments (per the user-facing API
  calling convention), extract implicit axis size arguments from shapes of
  explicit arguments (for the trace-time / jaxpr-level calling convention).
  """
  # First, using `in_type` construct a list to represent the full argument list,
  # leaving the implicit arguments as None placeholders for now.
  explicit_args_ = iter(explicit_args)
  args = [next(explicit_args_) if expl else None for _, expl in in_type]
  assert next(explicit_args_, None) is None
  del explicit_args, explicit_args_

  # Next, populate the implicit arguments using the DBIdxs in `in_type`.
  for i, (aval, explicit) in enumerate(in_type):
    if not explicit or not isinstance(aval, core.DShapedArray):
      continue  # can't populate an implicit argument
    arg = args[i]
    assert arg is not None
    for d1, d2 in zip(aval.shape, arg.aval.shape):
      if isinstance(d1, core.DBIdx):
        if args[d1.val] is None:
          args[d1.val] = d2
        assert core.same_referent(args[d1.val], d2)
  assert all(x is not None for x in args)
  return [x for x, (_, e) in zip(args, in_type) if not e]  # type: ignore

def _flat_axes_specs(abstracted_axes, *args, **kwargs
                     ) -> Optional[List[pe.AbstractedAxesSpec]]:
  if abstracted_axes is None: return None
  if kwargs: raise NotImplementedError
  def ax_leaf(l):
    return (isinstance(l, dict) and all_leaves(l.values()) or
            isinstance(l, tuple) and all_leaves(l, lambda x: x is None))
  return broadcast_prefix(abstracted_axes, args, ax_leaf)


# in_shardings and out_shardings can't be None as the default value
# because `None` means that the input is fully replicated.
def pjit(
    fun: Callable,
    in_shardings=UNSPECIFIED,
    out_shardings=UNSPECIFIED,
    in_axis_resources=UNSPECIFIED,
    out_axis_resources=UNSPECIFIED,
    static_argnums: Union[int, Sequence[int], None] = None,
    static_argnames: Union[str, Iterable[str], None] = None,
    donate_argnums: Union[int, Sequence[int]] = (),
    keep_unused: bool = False,
    device: Optional[xc.Device] = None,
    backend: Optional[str] = None,
    inline: bool = False,
    abstracted_axes: Optional[Any] = None,
) -> stages.Wrapped:
  """Makes ``fun`` compiled and automatically partitioned across multiple devices.

  NOTE: This function is now equivalent to jax.jit please use that instead.
  The returned function has semantics equivalent to those of ``fun``, but is
  compiled to an XLA computation that runs across multiple devices
  (e.g. multiple GPUs or multiple TPU cores). This can be useful if the jitted
  version of ``fun`` would not fit in a single device's memory, or to speed up
  ``fun`` by running each operation in parallel across multiple devices.

  The partitioning over devices happens automatically based on the
  propagation of the input partitioning specified in ``in_shardings`` and
  the output partitioning specified in ``out_shardings``. The resources
  specified in those two arguments must refer to mesh axes, as defined by
  the :py:func:`jax.sharding.Mesh` context manager. Note that the mesh
  definition at :func:`~pjit` application time is ignored, and the returned function
  will use the mesh definition available at each call site.

  Inputs to a :func:`~pjit`'d function will be automatically partitioned across devices
  if they're not already correctly partitioned based on ``in_shardings``.
  In some scenarios, ensuring that the inputs are already correctly pre-partitioned
  can increase performance. For example, if passing the output of one
  :func:`~pjit`'d function to another :func:`~pjit`’d function (or the same
  :func:`~pjit`’d function in a loop), make sure the relevant
  ``out_shardings`` match the corresponding ``in_shardings``.

  .. note::
    **Multi-process platforms:** On multi-process platforms such as TPU pods,
    :func:`~pjit` can be used to run computations across all available devices across
    processes. To achieve this, :func:`~pjit` is designed to be used in SPMD Python
    programs, where every process is running the same Python code such that all
    processes run the same :func:`~pjit`'d function in the same order.

    When running in this configuration, the mesh should contain devices across
    all processes. However, any input argument dimensions partitioned over
    multi-process mesh axes should be of size equal to the corresponding *local*
    mesh axis size, and outputs will be similarly sized according to the local
    mesh. ``fun`` will still be executed across *all* devices in the mesh,
    including those from other processes, and will be given a global view of the
    data spread across multiple processes as a single array. However, outside
    of :func:`~pjit` every process only "sees" its local piece of the input and output,
    corresponding to its local sub-mesh.

    This means that each process's participating local devices must form a
    _contiguous_ local sub-mesh within the full global mesh. A contiguous
    sub-mesh is one where all of its devices are adjacent within the global
    mesh, and form a rectangular prism.

    The SPMD model also requires that the same multi-process :func:`~pjit`'d
    functions must be run in the same order on all processes, but they can be
    interspersed with arbitrary operations running in a single process.

  Args:
    fun: Function to be compiled. Should be a pure function, as side-effects may
      only be executed once. Its arguments and return value should be arrays,
      scalars, or (nested) standard Python containers (tuple/list/dict) thereof.
      Positional arguments indicated by ``static_argnums`` can be anything at
      all, provided they are hashable and have an equality operation defined.
      Static arguments are included as part of a compilation cache key, which is
      why hash and equality operators must be defined.
    in_shardings: Pytree of structure matching that of arguments to ``fun``,
      with all actual arguments replaced by resource assignment specifications.
      It is also valid to specify a pytree prefix (e.g. one value in place of a
      whole subtree), in which case the leaves get broadcast to all values in
      that subtree.

      The ``in_shardings`` argument is optional. JAX will infer the shardings
      from the input :py:class:`jax.Array`'s, and defaults to replicating the input
      if the sharding cannot be inferred.

      The valid resource assignment specifications are:

      - :py:class:`XLACompatibleSharding`, which will decide how the value
        will be partitioned. With this, using a mesh context manager is not
        required.
      - For backwards compatibility, in_shardings still supports ingesting
        :py:class:`PartitionSpec` and :py:obj:`None`. These 2 options can
        *only* be used with the mesh context manager.

        - :py:obj:`None`, in which case the value will be replicated on all devices
        - :py:class:`PartitionSpec`, a tuple of length at most equal to the rank
          of the partitioned value. Each element can be a :py:obj:`None`, a mesh
          axis or a tuple of mesh axes, and specifies the set of resources assigned
          to partition the value's dimension matching its position in the spec.

      The size of every dimension has to be a multiple of the total number of
      resources assigned to it.
    out_shardings: Like ``in_shardings``, but specifies resource
      assignment for function outputs.
      The ``out_shardings`` argument is optional. If not specified, :py:func:`jax.jit`
      will use GSPMD's sharding propagation to determine how to shard the outputs.
    in_axis_resources: (Deprecated) Please use in_shardings.
    out_axis_resources: (Deprecated) Please use out_shardings.
    static_argnums: An optional int or collection of ints that specify which
      positional arguments to treat as static (compile-time constant).
      Operations that only depend on static arguments will be constant-folded in
      Python (during tracing), and so the corresponding argument values can be
      any Python object.

      Static arguments should be hashable, meaning both ``__hash__`` and
      ``__eq__`` are implemented, and immutable. Calling the jitted function
      with different values for these constants will trigger recompilation.
      Arguments that are not arrays or containers thereof must be marked as
      static.

      If ``static_argnums`` is not provided, no arguments are treated as static.
    static_argnames: An optional string or collection of strings specifying
      which named arguments to treat as static (compile-time constant). See the
      comment on ``static_argnums`` for details. If not
      provided but ``static_argnums`` is set, the default is based on calling
      ``inspect.signature(fun)`` to find corresponding named arguments.
    donate_argnums: Specify which argument buffers are "donated" to the computation.
      It is safe to donate argument buffers if you no longer need them once the
      computation has finished. In some cases XLA can make use of donated
      buffers to reduce the amount of memory needed to perform a computation,
      for example recycling one of your input buffers to store a result. You
      should not reuse buffers that you donate to a computation, JAX will raise
      an error if you try to.
      For more details on buffer donation see the `FAQ <https://jax.readthedocs.io/en/latest/faq.html#buffer-donation>`_.
    keep_unused: If `False` (the default), arguments that JAX determines to be
      unused by `fun` *may* be dropped from resulting compiled XLA executables.
      Such arguments will not be transferred to the device nor provided to the
      underlying executable. If `True`, unused arguments will not be pruned.
    device: This argument is deprecated. Please put your arguments on the
      device you want before passing them to jit.
      Optional, the Device the jitted function will run on. (Available devices
      can be retrieved via :py:func:`jax.devices`.) The default is inherited
      from XLA's DeviceAssignment logic and is usually to use
      ``jax.devices()[0]``.
    backend: This argument is deprecated. Please put your arguments on the
      backend you want before passing them to jit.
      Optional, a string representing the XLA backend: ``'cpu'``, ``'gpu'``, or
      ``'tpu'``.

  Returns:
    A wrapped version of ``fun``, set up for just-in-time compilation and
    automatically partitioned by the mesh available at each call site.

  For example, a convolution operator can be automatically partitioned over
  an arbitrary set of devices by a single :func:`~pjit` application:

  >>> import jax
  >>> import jax.numpy as jnp
  >>> import numpy as np
  >>> from jax.sharding import Mesh, PartitionSpec
  >>> from jax.experimental.pjit import pjit
  >>>
  >>> x = jnp.arange(8, dtype=jnp.float32)
  >>> f = pjit(lambda x: jax.numpy.convolve(x, jnp.asarray([0.5, 1.0, 0.5]), 'same'),
  ...         in_shardings=None, out_shardings=PartitionSpec('devices'))
  >>> with Mesh(np.array(jax.devices()), ('devices',)):
  ...   print(f(x))  # doctest: +SKIP
  [ 0.5  2.   4.   6.   8.  10.  12.  10. ]
  """
  in_shardings, out_shardings = _resolve_axis_resources_and_shardings_arg(
      in_shardings, out_shardings, in_axis_resources, out_axis_resources)

  (in_shardings, out_shardings, donate_argnums, static_argnums,
   static_argnames) = pre_infer_params(
       fun, in_shardings, out_shardings, donate_argnums,
       static_argnums, static_argnames, device, backend, abstracted_axes)

  def infer_params(*args, **kwargs):
    # Putting this outside of wrapped would make resources lexically scoped
    resource_env = mesh_lib.thread_resources.env
    pjit_info_args = PjitInfo(
          fun=fun, in_shardings=in_shardings,
          out_shardings=out_shardings, static_argnums=static_argnums,
          static_argnames=static_argnames, donate_argnums=donate_argnums,
          device=device, backend=backend, keep_unused=keep_unused,
          inline=inline, resource_env=resource_env,
          abstracted_axes=abstracted_axes)
    return common_infer_params(pjit_info_args, *args, **kwargs)

  has_explicit_sharding = _pjit_explicit_sharding(
      in_shardings, out_shardings, device, backend)
  return post_infer_params(fun, infer_params, static_argnums, static_argnames,
                           donate_argnums, abstracted_axes,
                           has_explicit_sharding)


def hashable_pytree(pytree):
  vals, treedef = tree_flatten(pytree)
  vals = tuple(vals)
  return HashableFunction(lambda: tree_unflatten(treedef, vals),
                          closure=(treedef, vals))


@lru_cache(maxsize=4096)
def _create_mesh_pspec_sharding_from_parsed_pspec(mesh, x):
  if is_unspecified_or_auto(x):
    return x
  return pxla.create_mesh_pspec_sharding(mesh, x.user_spec, x)


def _create_sharding_for_array(mesh, x, name, api_name):
  if isinstance(x, XLACompatibleSharding) or is_unspecified_or_auto(x):
    return x
  if mesh is None:
    msg = ('jax.jit only supports `XLACompatibleSharding`s being passed to'
           f' {name}. Looks like you are passing either `PartitionSpec` or `None`'
           f' which is not allowed in jax.jit.\n')
    if name == 'in_shardings':
      msg += (f'Note that {name} argument is optional. JAX will infer the shardings'
              " from the input jax.Array's and will default to replicating the"
              ' input if the sharding cannot be inferred.')
    elif name == 'out_shardings':
      msg += (f'Note that {name} is optional. If not specified, jax.jit will'
              " use GSPMD's sharding propagation to figure out what the sharding"
              ' of the output(s) should be.')
    raise RuntimeError(msg)
  if mesh.empty:
    raise RuntimeError(
        f'{api_name} requires a non-empty mesh if you are passing'
        f' `PartitionSpec`s or `None` to {name}! Is a mesh defined at the call'
        f' site? Alternatively, provide `XLACompatibleSharding`s to {name} and'
        ' then the mesh context manager is not required.')
  # A nice user error is raised in prepare_axis_resources.
  assert isinstance(x, ParsedPartitionSpec), x
  return _create_mesh_pspec_sharding_from_parsed_pspec(mesh, x)


def _create_sharding_with_device_backend(device, backend):
  if device is not None:
    assert backend is None
    out = SingleDeviceSharding(device)
  elif backend is not None:
    assert device is None
    out = SingleDeviceSharding(
        xb.get_backend(backend).get_default_device_assignment(1)[0])
  return out


def flatten_axis_resources(what, tree, shardings, tupled_args):
  try:
    return tuple(flatten_axes(what, tree, shardings, tupled_args=tupled_args))
  except ValueError:
    pass  # Raise a tree prefix error below

  # Tree leaves are always valid prefixes, so if there was a prefix error as
  # assumed here, axis_resources must not be a leaf.
  assert not treedef_is_leaf(tree_structure(shardings))

  # Check the type directly rather than using isinstance because of namedtuples.
  if tupled_args and (type(shardings) is not tuple or
                      len(shardings) != len(tree.children())):
    # We know axis_resources is meant to be a tuple corresponding to the args
    # tuple, but while it is a non-leaf pytree, either it wasn't a tuple or it
    # wasn't the right length.
    msg = (f"{what} specification must be a tree prefix of the positional "
           f"arguments tuple passed to the `pjit`-decorated function. In "
           f"particular, {what} must either be a None, a PartitionSpec, or "
           f"a tuple of length equal to the number of positional arguments.")
    # If `tree` represents an args tuple, then `axis_resources` must be a tuple.
    # TODO(mattjj,apaszke): disable implicit list casts, remove 'or list' below
    if type(shardings) is not tuple:
      msg += f" But {what} is not a tuple: got {type(shardings)} instead."
    elif len(shardings) != len(tree.children()):
      msg += (f" But {what} is the wrong length: got a tuple or list of length "
              f"{len(shardings)} for an args tuple of length "
              f"{len(tree.children())}.")

    # As an extra hint, let's check if the user just forgot to wrap
    # shardings in a singleton tuple.
    if len(tree.children()) == 1:
      try: flatten_axes(what, tree, (shardings,))
      except ValueError: pass  # That's not the issue.
      else:
        msg += (f" Given the corresponding argument being "
                f"passed, it looks like {what} might need to be wrapped in "
                f"a singleton tuple.")

    raise ValueError(msg)

  axis_tree = shardings

  # Because we only have the `tree` treedef and not the full pytree here,
  # we construct a dummy tree to compare against. Revise this in callers?
  dummy_tree = tree_unflatten(tree, [PytreeLeaf()] * tree.num_leaves)
  errors = prefix_errors(axis_tree, dummy_tree)
  if errors:
    e = errors[0]  # Only show information about the first disagreement found.
    raise e(what)

  # At this point we've failed to find a tree prefix error.
  assert False, "Please open a bug report!"  # This should be unreachable.

class PytreeLeaf:
  def __repr__(self): return "pytree leaf"


@lru_cache(maxsize=4096)
def _process_in_axis_resources(in_shardings_thunk, in_avals, in_tree,
                               resource_env, debug_info, device_or_backend_set):
  orig_in_shardings = in_shardings_thunk()
  # Only do this if original in_shardings are unspecified. If it is AUTO, go
  # via flatten_axis_resources.
  if is_unspecified(orig_in_shardings):
    in_shardings_flat = (orig_in_shardings,) * len(in_avals)
  else:
    in_shardings_flat = flatten_axis_resources(
          "pjit in_shardings", in_tree, orig_in_shardings,
          tupled_args=True)

  if not config.jax_dynamic_shapes:
    pjit_check_aval_sharding(in_shardings_flat, in_avals,
                             None if debug_info is None else debug_info.arg_names,
                             "pjit arguments", allow_uneven_sharding=False)
  canonicalized_shardings = tuple(
      i if is_unspecified_or_auto(i) else
      to_gspmd_sharding(i, aval.ndim, device_or_backend_set)
      for i, aval in zip(in_shardings_flat, in_avals))
  return canonicalized_shardings


@lu.cache
def _create_pjit_jaxpr(fun, in_type, debug_info, out_paths):
  with dispatch.log_elapsed_time(
      "Finished tracing + transforming {fun_name} for pjit in {elapsed_time} sec",
      fun_name=fun.__name__, event=dispatch.JAXPR_TRACE_EVENT):
    pe_debug = debug_info and pe.debug_info_final(fun, debug_info.traced_for)
    if config.jax_dynamic_shapes:
      jaxpr, global_out_avals, consts = pe.trace_to_jaxpr_dynamic2(
          lu.annotate(fun, in_type), debug_info=pe_debug)
    else:
      jaxpr, global_out_avals, consts = pe.trace_to_jaxpr_dynamic(
          fun, in_type, debug_info=pe_debug)

  if not config.jax_dynamic_shapes:
    jaxpr = jaxpr_debug_info(jaxpr, debug_info, out_paths())

  if any(isinstance(c, core.Tracer) for c in consts):
    closed_jaxpr = pe.close_jaxpr(pe.convert_constvars_jaxpr(jaxpr))
    final_consts = consts
  else:
    closed_jaxpr = core.ClosedJaxpr(jaxpr, consts)
    final_consts = []
  return closed_jaxpr, final_consts, global_out_avals


@lru_cache(maxsize=4096)
def _check_and_canonicalize_out_shardings(
    out_shardings_thunk, out_tree, out_type, debug_info, device_or_backend_set):
  orig_out_shardings = out_shardings_thunk()
  # TODO(yashkatariya): Remove the if branch and fix flatten_axis_resources
  # instead. This condition exists because flatten_axis_resources passes in an
  # `object()` while unflattening which breaks assertion is user defined
  # pytrees (which shouldn't exist but they do).
  if (is_unspecified(orig_out_shardings) or
      isinstance(orig_out_shardings, XLACompatibleSharding)):
    out_shardings_flat = (orig_out_shardings,) * len(out_type)
  else:
    out_shardings_flat = flatten_axis_resources(
        "pjit out_shardings", out_tree(), orig_out_shardings,
        tupled_args=False)

  if not config.jax_dynamic_shapes:
    pjit_check_aval_sharding(
        out_shardings_flat, out_type,
        None if debug_info is None else debug_info.result_paths,
        "pjit outputs", allow_uneven_sharding=False)

  canonicalized_out_shardings_flat = tuple(
      o if is_unspecified(o) or is_auto(o) else
      to_gspmd_sharding(o, aval.ndim, device_or_backend_set)
      for o, aval in zip(out_shardings_flat, out_type)
  )
  return canonicalized_out_shardings_flat


def _pjit_jaxpr(fun, out_shardings_thunk, in_type, debug_info,
                device_or_backend_set, out_tree, result_paths):
  jaxpr, final_consts, out_type = _create_pjit_jaxpr(
      fun, in_type, debug_info, result_paths)
  canonicalized_out_shardings_flat = _check_and_canonicalize_out_shardings(
      out_shardings_thunk, out_tree, tuple(out_type), jaxpr.jaxpr.debug_info,
      device_or_backend_set)
  # lu.cache needs to be able to create weakrefs to outputs, so we can't return a plain tuple
  return jaxpr, final_consts, canonicalized_out_shardings_flat


def pjit_check_aval_sharding(
    shardings, flat_avals, names: Optional[Tuple[str, ...]],
    what_aval: str, allow_uneven_sharding: bool):
  new_names = [''] * len(shardings) if names is None else names
  for aval, s, name in zip(flat_avals, shardings, new_names):
    if is_unspecified_or_auto(s):
      continue
    name_str = f' with pytree key path {name}' if name else ''
    shape = aval.shape
    try:
      # Sharding interfaces can implement `is_compatible_aval` as an optional
      # method to raise a more meaningful error.
      if hasattr(s, 'is_compatible_aval'):
        s.is_compatible_aval(shape)
      else:
        s._to_xla_op_sharding(len(shape))
    except ValueError as e:
      raise ValueError(
          f'One of {what_aval}{name_str} is incompatible with its sharding '
          f'annotation {s}: {str(e)}')
    # Use the `OpSharding` proto to find out how many ways each dimension of
    # the aval is sharded. This approach will work across all
    # XLACompatibleSharding.
    op_sharding = s._to_xla_op_sharding(len(shape))
    assert op_sharding is not None
    num_ways_dim_sharded, _ = op_shardings.get_num_ways_dim_sharded(
        cast(xc.OpSharding, op_sharding))
    for i, size in enumerate(num_ways_dim_sharded):
      if not allow_uneven_sharding and shape[i] % size != 0:
        raise ValueError(f"One of {what_aval}{name_str} was given the sharding "
                         f"of {s}, which implies that "
                         f"the global size of its dimension {i} should be "
                         f"divisible by {size}, but it is equal to {shape[i]} "
                         f"(full shape: {shape})")


# -------------------- pjit rules --------------------

pjit_p = core.AxisPrimitive("pjit")
pjit_p.multiple_results = True


def _resolve_in_shardings(
    args, pjit_in_shardings: Sequence[PjitSharding],
    out_shardings: Sequence[PjitSharding],
    pjit_mesh: Optional[pxla.Mesh]) -> Sequence[PjitSharding]:
  # If True, means that device or backend is set by the user on pjit and it
  # has the same semantics as device_put i.e. doesn't matter which device the
  # arg is on, reshard it to the device mentioned. So don't do any of the
  # checks and just return the pjit_in_shardings directly. `shard_args` will
  # handle the resharding.
  if pxla.check_device_backend_on_shardings(pjit_in_shardings):
    return pjit_in_shardings

  committed_arg_shardings = []
  for a in args:
    if hasattr(a, 'sharding'):
      arg_s = a.sharding
      if not isinstance(arg_s, XLACompatibleSharding):
        raise ValueError(f'One of the argument to pjit got sharding {arg_s} '
                         'which is not a subclass of XLACompatibleSharding.')
      # Don't consider PmapSharding inputs as committed. They will get resharded
      # unconditionally.
      if isinstance(arg_s, PmapSharding):
        continue
      if getattr(a, '_committed', True):
        committed_arg_shardings.append((arg_s, pxla.MismatchType.ARG_SHARDING, None))

  # Check if the device_assignment across inputs, outputs and arguments is the
  # same.
  pxla._get_and_check_device_assignment(
      it.chain(
          committed_arg_shardings,
          [(i, pxla.MismatchType.IN_SHARDING, None) for i in pjit_in_shardings],
          [(o, pxla.MismatchType.OUT_SHARDING, None) for o in out_shardings]),
      (None if pjit_mesh is None or pjit_mesh.empty else list(pjit_mesh.devices.flat)))

  resolved_in_shardings = []
  for arg, pjit_in_s in zip(args, pjit_in_shardings):
    arg_s, committed = ((arg.sharding, getattr(arg, '_committed', True))
                        if hasattr(arg, 'sharding') else (UNSPECIFIED, False))
    if is_unspecified(pjit_in_s):
      if is_unspecified(arg_s):
        resolved_in_shardings.append(arg_s)
      else:
        if committed:
          # If the arg has a PmapSharding, then reshard it unconditionally.
          if isinstance(arg_s, PmapSharding):
            resolved_in_shardings.append(UNSPECIFIED)
          else:
            resolved_in_shardings.append(to_gspmd_sharding(
                cast(XLACompatibleSharding, arg_s), arg.ndim))
        else:
          if dispatch.is_single_device_sharding(arg_s):
            resolved_in_shardings.append(UNSPECIFIED)
          else:
            raise NotImplementedError('Having uncommitted Array sharded on '
                                      'multiple devices is not supported.')
    else:
      if (isinstance(arg, np.ndarray) and
          not pjit_in_s.is_fully_replicated and  # type: ignore
          xb.process_count() > 1):
        raise ValueError(
            'Passing non-trivial shardings for numpy '
            'inputs is not allowed. To fix this error, either specify a '
            'replicated sharding explicitly or use '
            '`jax.experimental.multihost_utils.host_local_array_to_global_array(...)` '
            'to convert your host local numpy inputs to a jax.Array which you '
            'can pass to pjit. '
            'If the numpy input is the same on each process, then you can use '
            '`jax.make_array_from_callback(...) to create a `jax.Array` which '
            'you can pass to pjit. '
            'Please see the jax.Array migration guide for more information '
            'https://jax.readthedocs.io/en/latest/jax_array_migration.html#handling-of-host-local-inputs-to-pjit-like-batch-etc. '
            f'Got arg shape: {arg.shape}, arg value: {arg}')
      if not is_unspecified(arg_s):
        if (committed and
            not isinstance(arg_s, PmapSharding) and
            not op_shardings.are_op_shardings_equal(
                pjit_in_s._to_xla_op_sharding(arg.ndim),  # type: ignore
                arg_s._to_xla_op_sharding(arg.ndim))):
          op =  getattr(pjit_in_s, '_original_sharding', pjit_in_s)
          raise ValueError('Sharding passed to pjit does not match the sharding '
                           'on the respective arg. '
                           f'Got pjit sharding: {op},\n'
                           f'arg sharding: {arg_s} for arg shape: {arg.shape}, '
                           f'arg value: {arg}')
      resolved_in_shardings.append(pjit_in_s)

  return tuple(resolved_in_shardings)


def _pjit_call_impl_python(
    *args, jaxpr, in_shardings, out_shardings, resource_env, donated_invars,
    name, keep_unused, inline):
  global _most_recent_pjit_call_executable

  in_shardings = _resolve_in_shardings(
      args, in_shardings, out_shardings,
      resource_env.physical_mesh if resource_env is not None else None)

  compiled = _pjit_lower(
      jaxpr, in_shardings, out_shardings, resource_env,
      donated_invars, name, keep_unused, inline,
      always_lower=False, lowering_platform=None).compile()
  _most_recent_pjit_call_executable.value = compiled
  # This check is expensive so only do it if enable_checks is on.
  if compiled._auto_spmd_lowering and config.jax_enable_checks:
    pxla.check_gda_or_array_xla_sharding_match(args, compiled._in_shardings,
                                               jaxpr.jaxpr.debug_info)
  if config.jax_distributed_debug:
    # Defensively only perform fingerprint logic if debug logging is enabled
    # NOTE(skyewm): I didn't benchmark this
    fingerprint = None
    if hasattr(compiled.runtime_executable(), "fingerprint"):
      fingerprint = compiled.runtime_executable().fingerprint
    if fingerprint is not None:
      fingerprint = fingerprint.hex()
    distributed_debug_log(("Running pjit'd function", name),
                          ("in_shardings", in_shardings),
                          ("out_shardings", out_shardings),
                          ("abstract args", map(xla.abstractify, args)),
                          ("fingerprint", fingerprint))
  try:
    return compiled.unsafe_call(*args), compiled
  except FloatingPointError:
    assert config.jax_debug_nans or config.jax_debug_infs  # compiled_fun can only raise in this case

    _ = core.jaxpr_as_fun(jaxpr)(*args)  # may raise, not return

    # If control reaches this line, we got a NaN on the output of `compiled`
    # but not `fun.call_wrapped` on the same arguments. Let's tell the user.
    msg = ("An invalid value was encountered in the output of the "
           f"`jit`-decorated function {name}. Because "
           "config.jax_debug_nans and/or config.jax_debug_infs is set, the "
           "de-optimized function (i.e., the function as if the `jit` "
           "decorator were removed) was called in an attempt to get a more "
           "precise error message. However, the de-optimized function did not "
           "produce invalid values during its execution. This behavior can "
           "result from `jit` optimizations causing the invalid value to be "
           "produced. It may also arise from having nan/inf constants as "
           "outputs, like `jax.jit(lambda ...: jax.numpy.nan)(...)`. "
           "\n\n"
           "It may be possible to avoid the invalid value by removing the "
           "`jit` decorator, at the cost of losing optimizations. "
           "\n\n"
           "If you see this error, consider opening a bug report at "
           "https://github.com/google/jax.")
    raise FloatingPointError(msg)


@weakref_lru_cache
def _get_jaxpr_as_fun(jaxpr, in_shardings, out_shardings, resource_env,
                      donated_invars, name, keep_unused, inline):
  # The input jaxpr to `_get_jaxpr_as_fun` is under a weakref_lru_cache so
  # returning `core.jaxpr_as_fun(jaxpr)` directly creates a strong reference to
  # the jaxpr defeating the purpose of weakref_lru_cache. So return a function
  # that closes over a weakrefed jaxpr and gets called inside that function.
  # This way there won't be a strong reference to the jaxpr from the output
  # function.
  jaxpr = weakref.ref(jaxpr)
  return lambda *args: core.jaxpr_as_fun(jaxpr())(*args)  # pylint: disable=unnecessary-lambda


def _pjit_call_impl(*args, jaxpr,
                    in_shardings, out_shardings, resource_env,
                    donated_invars, name, keep_unused, inline):
  def call_impl_cache_miss(*args_, **kwargs_):
    out_flat, compiled = _pjit_call_impl_python(
        *args, jaxpr=jaxpr, in_shardings=in_shardings,
        out_shardings=out_shardings, resource_env=resource_env,
        donated_invars=donated_invars, name=name, keep_unused=keep_unused,
        inline=inline)
    fastpath_data = _get_fastpath_data(
        compiled, tree_structure(out_flat), args, out_flat)
    return out_flat, fastpath_data

  f = _get_jaxpr_as_fun(
      jaxpr, tuple(getattr(i, '_original_sharding', i) for i in in_shardings),
      tuple(getattr(o, '_original_sharding', o) for o in out_shardings),
      resource_env, donated_invars, name, keep_unused, inline)
  donated_argnums = [i for i, d in enumerate(donated_invars) if d]
  return xc._xla.pjit(name, f, call_impl_cache_miss, [], [], donated_argnums,
                      _cpp_pjit_cache)(*args)

pjit_p.def_impl(_pjit_call_impl)


@dataclasses.dataclass(frozen=True)
class SameDeviceAssignmentTuple:
  shardings: Tuple[PjitSharding, ...]
  # device_assignment is Optional because shardings can contain `AUTO` and in
  # that case `mesh` is compulsory to be used. So in that case
  # `_pjit_lower_cached` cache, resource_env will check against the devices.
  device_assignment: Optional[XLADeviceAssignment]

  def __hash__(self):
    shardings_hash = tuple(
        s._op_sharding_hash if isinstance(s, GSPMDSharding) else s  # type: ignore
        for s in self.shardings)
    if self.device_assignment is None:
      return hash(shardings_hash)
    else:
      return hash((shardings_hash, *self.device_assignment))

  def __eq__(self, other):
    if not isinstance(other, SameDeviceAssignmentTuple):
      return False
    eq = []
    for s, o in zip(self.shardings, other.shardings):
      s = getattr(s, "_original_sharding", s)
      o = getattr(o, "_original_sharding", o)
      if isinstance(s, GSPMDSharding) and isinstance(o, GSPMDSharding):
        eq.append(op_shardings.are_op_shardings_equal(
            s._op_sharding, o._op_sharding))
      else:
        eq.append(s == o)
    return all(eq) and self.device_assignment == other.device_assignment


def _pjit_lower(
    jaxpr: core.ClosedJaxpr,
    in_shardings,
    out_shardings,
    *args, **kwargs):
  da = _fast_path_get_device_assignment(it.chain(in_shardings, out_shardings))
  in_shardings = SameDeviceAssignmentTuple(tuple(in_shardings), da)
  out_shardings = SameDeviceAssignmentTuple(tuple(out_shardings), da)
  return _pjit_lower_cached(jaxpr, in_shardings, out_shardings, *args, **kwargs)


@weakref_lru_cache
def _pjit_lower_cached(
    jaxpr: core.ClosedJaxpr,
    sdat_in_shardings: SameDeviceAssignmentTuple,
    sdat_out_shardings: SameDeviceAssignmentTuple,
    resource_env,
    donated_invars,
    name: str,
    keep_unused: bool,
    inline: bool,
    always_lower: bool,
    *,
    lowering_platform: Optional[str]):
  in_shardings: Tuple[PjitShardingMinusUnspecified, ...] = cast(
      Tuple[PjitShardingMinusUnspecified, ...], sdat_in_shardings.shardings)
  out_shardings: Tuple[PjitSharding, ...] = sdat_out_shardings.shardings

  if resource_env is not None:
    pxla.resource_typecheck(jaxpr, resource_env, {}, lambda: "pjit")

  if resource_env is not None:
    mesh = resource_env.physical_mesh
    api_name = 'pjit'
  else:
    # resource_env is `None` in the jit wrapper around pjit.
    mesh = None
    api_name = 'jit'

  # For `pjit(xmap)` cases, it needs to take the `lower_mesh_computation` path
  # because `xmap` only supports SPMDAxisContext right now.
  if dispatch.jaxpr_has_primitive(jaxpr.jaxpr, 'xmap'):
    return pxla.lower_mesh_computation(
      jaxpr, api_name, name, mesh,
      in_shardings, out_shardings, donated_invars,
      True, jaxpr.in_avals, tiling_method=None,
      lowering_platform=lowering_platform)
  else:
    return pxla.lower_sharding_computation(
        jaxpr, api_name, name, in_shardings, out_shardings,
        tuple(donated_invars), tuple(jaxpr.in_avals),
        keep_unused=keep_unused, inline=inline, always_lower=always_lower,
        devices_from_context=(
            None if mesh is None or mesh.empty else list(mesh.devices.flat)),
        lowering_platform=lowering_platform)


def pjit_staging_rule(trace, *args, **params):
  if (params["inline"] and
      all(is_unspecified(i) for i in params["in_shardings"]) and
      all(is_unspecified(o) for o in params["out_shardings"])):
    jaxpr = params['jaxpr']
    return core.eval_jaxpr(jaxpr.jaxpr, jaxpr.consts, *args,
                           propagate_source_info=False)
  elif config.jax_dynamic_shapes:
    source_info = source_info_util.current()
    out_tracers = []
    for aval in _out_type(params['jaxpr']):
      if type(aval) is core.DShapedArray:
        shape = [args[d.val] if type(d) is core.InDBIdx else
                 out_tracers[d.val] if type(d) is core.OutDBIdx else
                 d for d in aval.shape]
        aval = aval.update(shape=tuple(core.get_referent(d) for d in shape))
      out_tracers.append(pe.DynamicJaxprTracer(trace, aval, source_info))
    eqn = core.new_jaxpr_eqn(
      map(trace.getvar, args), map(trace.makevar, out_tracers), pjit_p, params,
      params['jaxpr'].effects, source_info)
    trace.frame.add_eqn(eqn)
    return out_tracers
  else:
    return trace.default_process_primitive(pjit_p, args, params)
pe.custom_staging_rules[pjit_p] = pjit_staging_rule

# TODO(mattjj): remove/trivialize this when jaxprs have type annotation on them,
# since it's actually not possible in general to infer the type from the term
def _out_type(jaxpr: core.ClosedJaxpr) -> List[core.AbstractValue]:
  out = []
  in_idx = {v: i for i, v in enumerate(jaxpr.jaxpr.invars)}
  out_idx = {x: i for i, x in enumerate(jaxpr.jaxpr.invars)
             if type(x) is core.Var}
  for x in jaxpr.jaxpr.outvars:
    aval = x.aval
    if type(aval) is core.DShapedArray:
      shape = [core.InDBIdx(in_idx[d]) if d in in_idx else
               core.OutDBIdx(out_idx[d]) if d in out_idx else
               d for d in x.aval.shape]
      aval = aval.update(shape=tuple(shape))
    out.append(aval)
  return out


def _pjit_typecheck(ctx_factory, *in_atoms, jaxpr, **params):
  return core._check_call(ctx_factory, pjit_p, in_atoms,
                          dict(params, call_jaxpr=jaxpr.jaxpr))
core.custom_typechecks[pjit_p] = _pjit_typecheck


def _pjit_abstract_eval(*args, jaxpr, out_shardings, resource_env, **_):
  return jaxpr.out_avals, jaxpr.effects
pjit_p.def_effectful_abstract_eval(_pjit_abstract_eval)


def _pjit_lowering(ctx, *args, name, jaxpr, in_shardings,
                   out_shardings, resource_env, donated_invars,
                   keep_unused, inline):
  effects = list(ctx.tokens_in.effects())
  output_types = map(mlir.aval_to_ir_types, ctx.avals_out)
  output_types = [mlir.token_type()] * len(effects) + output_types
  flat_output_types = flatten(output_types)

  arg_shardings = [None if is_unspecified(i) else i._to_xla_op_sharding(aval.ndim)
                   for aval, i in zip(ctx.avals_in, in_shardings)]
  result_shardings = [None if is_unspecified(o) else o._to_xla_op_sharding(aval.ndim)
                      for aval, o in zip(ctx.avals_out, out_shardings)]

  # TODO(b/228598865): inlined calls cannot have shardings set directly on the
  # inputs or outputs because they are lost during MLIR->HLO conversion.
  # using_sharding_annotation=False means we add an identity operation instead.
  func = mlir.lower_jaxpr_to_fun(
      ctx.module_context, name, jaxpr, effects, arg_shardings=arg_shardings,
      result_shardings=result_shardings, use_sharding_annotations=False,
      api_name=('jit' if resource_env is None else 'pjit'))
  tokens_in = [ctx.tokens_in.get(eff) for eff in effects]
  args = (*ctx.dim_var_values, *tokens_in, *args)
  call = func_dialect.CallOp(flat_output_types,
                             ir.FlatSymbolRefAttr.get(func.name.value),
                             mlir.flatten_lowering_ir_args(args))
  out_nodes = unflatten(call.results, map(len, output_types))
  tokens, out_nodes = split_list(out_nodes, [len(effects)])
  tokens_out = ctx.tokens_in.update_tokens(mlir.TokenSet(zip(effects, tokens)))
  ctx.set_tokens_out(tokens_out)
  return out_nodes

mlir.register_lowering(pjit_p, _pjit_lowering)


def _pjit_batcher(insert_axis, spmd_axis_name,
                  axis_size, axis_name, main_type,
                  vals_in, dims_in,
                  jaxpr, in_shardings, out_shardings,
                  resource_env, donated_invars, name, keep_unused, inline):
  new_jaxpr, axes_out = batching.batch_jaxpr2(
      jaxpr, axis_size, dims_in, axis_name=axis_name,
      spmd_axis_name=spmd_axis_name, main_type=main_type)

  # `insert_axis` is set to True only for some `xmap` uses.
  new_parts = (axis_name,) if insert_axis else (
      () if spmd_axis_name is None else spmd_axis_name)

  if resource_env is not None:
    mesh = resource_env.physical_mesh
  else:
    mesh = None

  in_shardings = tuple(
      _pjit_batcher_for_sharding(i, axis_in, new_parts, mesh, aval.ndim)
      if axis_in is not None else i
      for axis_in, i, aval in zip(dims_in, in_shardings, new_jaxpr.in_avals))
  out_shardings = tuple(
      _pjit_batcher_for_sharding(o, axis_out, new_parts, mesh, aval.ndim)
      if axis_out is not None else o
      for axis_out, o, aval in zip(axes_out, out_shardings, new_jaxpr.out_avals))
  vals_out = pjit_p.bind(
    *vals_in,
    jaxpr=new_jaxpr,
    in_shardings=in_shardings,
    out_shardings=out_shardings,
    resource_env=resource_env,
    donated_invars=donated_invars,
    name=name,
    keep_unused=keep_unused,
    inline=inline)
  return vals_out, axes_out

batching.spmd_axis_primitive_batchers[pjit_p] = partial(_pjit_batcher, False)
batching.axis_primitive_batchers[pjit_p] = partial(_pjit_batcher, False, None)
pxla.spmd_primitive_batchers[pjit_p] = partial(_pjit_batcher, True, None)

def _pjit_batcher_for_sharding(
    s: Union[GSPMDSharding, UnspecifiedValue],
    dim: int, val: Tuple[str, ...], mesh, ndim: int):
  if is_unspecified(s):
    return s
  if not val:
    if sharding_impls.is_op_sharding_replicated(s._op_sharding):  # type: ignore
      return s
    new_op = s._op_sharding.clone()  # type: ignore
    tad = list(new_op.tile_assignment_dimensions)
    tad.insert(dim, 1)
    new_op.tile_assignment_dimensions = tad
    new_gs = GSPMDSharding(s._device_assignment, new_op)  # type: ignore
    if hasattr(s, '_original_sharding'):
      vmapped_s, _ = pxla._get_out_sharding_from_orig_sharding(
          [new_gs], [None], s._original_sharding, None, [False])[0]  # type: ignore
      new_gs = to_gspmd_sharding(vmapped_s, ndim)
    return new_gs
  else:
    assert isinstance(s, GSPMDSharding)
    if isinstance(getattr(s, '_original_sharding', None), NamedSharding):
      mesh = s._original_sharding.mesh  # type: ignore
    assert mesh is not None and not mesh.empty
    parsed_pspec = parse_flatten_op_sharding(s._op_sharding, mesh)[0]  # type: ignore
    parsed_pspec = parsed_pspec.insert_axis_partitions(dim, val)
    mps = NamedSharding._from_parsed_pspec(mesh, parsed_pspec)
    return GSPMDSharding(mps._device_assignment, mps._to_xla_op_sharding(ndim))


def _pjit_jvp(primals_in, tangents_in,
              jaxpr, in_shardings, out_shardings,
              resource_env, donated_invars, name, keep_unused, inline):
  is_nz_tangents_in = [type(t) is not ad.Zero for t in tangents_in]
  jaxpr_jvp, is_nz_tangents_out = ad.jvp_jaxpr(
      jaxpr, is_nz_tangents_in, instantiate=False)

  def _filter_zeros(is_nz_l, l):
    return (x for nz, x in zip(is_nz_l, l) if nz)
  _filter_zeros_in = partial(_filter_zeros, is_nz_tangents_in)
  _filter_zeros_out = partial(_filter_zeros, is_nz_tangents_out)
  outputs = pjit_p.bind(
      *primals_in, *_filter_zeros_in(tangents_in),
      jaxpr=jaxpr_jvp,
      in_shardings=(*in_shardings, *_filter_zeros_in(in_shardings)),
      out_shardings=(*out_shardings, *_filter_zeros_out(out_shardings)),
      resource_env=resource_env,
      donated_invars=(*donated_invars, *_filter_zeros_in(donated_invars)),
      name=name,
      keep_unused=keep_unused,
      inline=inline)

  primals_out, tangents_out = split_list(outputs, [len(jaxpr.jaxpr.outvars)])
  assert len(primals_out) == len(jaxpr.jaxpr.outvars)
  tangents_out_it = iter(tangents_out)
  return primals_out, [next(tangents_out_it) if nz else ad.Zero(aval)
                       for nz, aval in zip(is_nz_tangents_out, jaxpr.out_avals)]
ad.primitive_jvps[pjit_p] = _pjit_jvp


@weakref_lru_cache
def _known_jaxpr_fwd(known_jaxpr: core.ClosedJaxpr,
                     fwds_known: Tuple[Optional[int]]) -> core.ClosedJaxpr:
  updated_jaxpr = known_jaxpr.jaxpr.replace(
      outvars=[x for x, i in zip(known_jaxpr.jaxpr.outvars, fwds_known)
               if i is None])
  return known_jaxpr.replace(jaxpr=updated_jaxpr)


def _pjit_partial_eval(trace, *in_tracers,
                       jaxpr, in_shardings, out_shardings,
                       resource_env, donated_invars, name, keep_unused, inline):
  in_pvals = [t.pval for t in in_tracers]

  known_ins = tuple(pv.is_known() for pv in in_pvals)
  unknown_ins = tuple(not k for k in known_ins)
  known_jaxpr, unknown_jaxpr, unknown_outs, res_avals = pe.partial_eval_jaxpr_nounits(
      jaxpr, unknown_ins, instantiate=False)
  unknown_outs = tuple(unknown_outs)
  known_outs = tuple(not uk for uk in unknown_outs)
  num_residuals = len(res_avals)

  def keep_where(l, should_keep):
    return tuple(x for x, keep in unsafe_zip(l, should_keep) if keep)

  residual_shardings = (UNSPECIFIED,) * num_residuals
  # Compute the known outputs
  known_params = dict(
      jaxpr=known_jaxpr,
      in_shardings=keep_where(in_shardings, known_ins),
      out_shardings=(
          keep_where(out_shardings, known_outs) + residual_shardings),
      resource_env=resource_env,
      donated_invars=keep_where(donated_invars, known_ins),
      name=name,
      keep_unused=keep_unused,
      inline=inline)

  fwds_known = pe._jaxpr_forwarding(known_params['jaxpr'].jaxpr)

  # Only forward the outvars where the out_sharding is UNSPECIFIED.
  known_user_out_shardings = keep_where(known_params['out_shardings'], known_outs)
  fwds_known_user = [
      fwd if is_unspecified(os) else None
      for os, fwd in zip(known_user_out_shardings,
                              fwds_known[:len(known_user_out_shardings)])]
  fwds_known = fwds_known_user + fwds_known[len(known_user_out_shardings):]
  del fwds_known_user

  # Remove forwarded outvars and out_shardings
  known_params['jaxpr'] = _known_jaxpr_fwd(known_params['jaxpr'], tuple(fwds_known))
  known_out_shardings = tuple(
      s for s, i in zip(known_params['out_shardings'], fwds_known) if i is None)
  known_params['out_shardings'] = known_out_shardings
  del known_out_shardings

  assert len(known_params['out_shardings']) == len(known_params['jaxpr'].out_avals)

  # Bind known things to pjit_p.
  known_inputs = [pv.get_known() for pv in in_pvals if pv.is_known()]
  all_known_outs = pjit_p.bind(*known_inputs, **known_params)

  known_outs_iter = iter(all_known_outs)
  all_known_outs = [next(known_outs_iter)
                    if fwd_idx is None else known_inputs[fwd_idx]
                    for fwd_idx in fwds_known]
  assert next(known_outs_iter, None) is None
  del known_outs_iter, known_inputs

  if num_residuals:
    known_out_vals, residual_vals = \
        split_list(all_known_outs, [len(all_known_outs) - num_residuals])
  else:
    known_out_vals, residual_vals = all_known_outs, ()
  residual_tracers = [trace.new_instantiated_const(residual) for residual in residual_vals]

  # The convention of partial_eval_jaxpr_nounits is to place residual binders
  # at the front of the jaxpr produced, so we move them to the back since both
  # the jaxpr equation built below and the pjit transpose rule assume a
  # residual-inputs-last convention.
  unknown_jaxpr = pe.move_binders_to_back(
      unknown_jaxpr, [True] * num_residuals + [False] * sum(unknown_ins))
  # Prepare unknown tracers
  unknown_params = dict(
      jaxpr=unknown_jaxpr,
      in_shardings=(keep_where(in_shardings, unknown_ins) + residual_shardings),
      out_shardings=keep_where(out_shardings, unknown_outs),
      resource_env=resource_env,
      donated_invars=(keep_where(donated_invars, unknown_ins) +
                      (False,) * num_residuals),
      name=name,
      keep_unused=keep_unused,
      inline=inline)
  unknown_tracers_in = [t for t in in_tracers if not t.pval.is_known()]
  unknown_out_avals = unknown_jaxpr.out_avals
  unknown_tracers_out = [
      pe.JaxprTracer(trace, pe.PartialVal.unknown(aval), None)
      for aval in unknown_out_avals
  ]
  eqn = pe.new_eqn_recipe((*unknown_tracers_in, *residual_tracers),
                          unknown_tracers_out,
                          pjit_p,
                          unknown_params,
                          unknown_jaxpr.effects,
                          source_info_util.current())
  for t in unknown_tracers_out: t.recipe = eqn
  return merge_lists(unknown_outs, known_out_vals, unknown_tracers_out)

pe.custom_partial_eval_rules[pjit_p] = _pjit_partial_eval


def _pjit_partial_eval_custom_params_updater(
    unks_in: Sequence[bool], inst_in: Sequence[bool],
    kept_outs_known: Sequence[bool], kept_outs_staged: Sequence[bool],
    num_res: int, params_known: dict, params_staged: dict
  ) -> Tuple[dict, dict]:
  # prune inputs to jaxpr_known according to unks_in
  donated_invars_known, _ = pe.partition_list(unks_in, params_known['donated_invars'])
  in_shardings_known, _ = pe.partition_list(unks_in, params_known['in_shardings'])
  if num_res == 0:
    residual_shardings = []
  else:
    residual_shardings = [UNSPECIFIED] * num_res
  _, out_shardings_known = pe.partition_list(kept_outs_known, params_known['out_shardings'])
  new_params_known = dict(params_known,
                          in_shardings=tuple(in_shardings_known),
                          out_shardings=(*out_shardings_known, *residual_shardings),
                          donated_invars=tuple(donated_invars_known))
  assert len(new_params_known['in_shardings']) == len(params_known['jaxpr'].in_avals)
  assert len(new_params_known['out_shardings']) == len(params_known['jaxpr'].out_avals)

  # added num_res new inputs to jaxpr_staged, and pruning according to inst_in
  _, donated_invars_staged = pe.partition_list(inst_in, params_staged['donated_invars'])
  donated_invars_staged = [False] * num_res + donated_invars_staged
  _, in_shardings_staged = pe.partition_list(inst_in, params_staged['in_shardings'])
  in_shardings_staged = [*residual_shardings, *in_shardings_staged]

  _, out_shardings_staged = pe.partition_list(kept_outs_staged, params_staged['out_shardings'])

  new_params_staged = dict(params_staged,
                           in_shardings=tuple(in_shardings_staged),
                           out_shardings=tuple(out_shardings_staged),
                           donated_invars=tuple(donated_invars_staged))
  assert len(new_params_staged['in_shardings']) == len(params_staged['jaxpr'].in_avals)
  assert len(new_params_staged['out_shardings']) == len(params_staged['jaxpr'].out_avals)
  return new_params_known, new_params_staged

pe.partial_eval_jaxpr_custom_rules[pjit_p] = \
    partial(pe.closed_call_partial_eval_custom_rule, 'jaxpr',
            _pjit_partial_eval_custom_params_updater)


@lu.cache
def _pjit_transpose_trace(fun, in_avals):
  transpose_jaxpr, _, consts = pe.trace_to_jaxpr_dynamic(fun, in_avals)
  transpose_jaxpr = core.ClosedJaxpr(transpose_jaxpr, consts)
  return transpose_jaxpr


def _pjit_transpose(reduce_axes, cts_in, *primals_in,
                    jaxpr, in_shardings, out_shardings,
                    resource_env, donated_invars, name, keep_unused, inline):
  def prune_type(ty, xs, maybe_zeros):
    return tuple(x for x, mz in zip(xs, maybe_zeros) if type(mz) is not ty)

  body = lu.wrap_init(ad.closed_backward_pass)
  body = lu.hashable_partial(body, jaxpr, reduce_axes, False)
  primals_and_nz_cts_in, in_treedef = tree_flatten((primals_in, cts_in))
  body, cts_out_treedef_thunk = flatten_fun_nokwargs(body, in_treedef)

  transpose_in_shardings = (
    *prune_type(ad.UndefinedPrimal, in_shardings, primals_in),
    *prune_type(ad.Zero, out_shardings, cts_in)
  )
  global_cts_in_avals = tuple(core.raise_to_shaped(core.get_aval(ct))
                              for ct in primals_and_nz_cts_in)

  transpose_jaxpr = _pjit_transpose_trace(body, global_cts_in_avals)
  cts_out_treedef = cts_out_treedef_thunk()
  transpose_out_shardings = prune_type(
      ad.Zero,
      in_shardings,
      tree_unflatten(cts_out_treedef, [object()] * cts_out_treedef.num_leaves))

  nz_cts_out = pjit_p.bind(
      *primals_and_nz_cts_in,
      jaxpr=transpose_jaxpr,
      in_shardings=transpose_in_shardings,
      out_shardings=transpose_out_shardings,
      resource_env=resource_env,
      donated_invars=(False,) * len(primals_and_nz_cts_in),
      name=name,
      keep_unused=keep_unused,
      inline=inline)
  return tree_unflatten(cts_out_treedef, nz_cts_out)
ad.reducing_transposes[pjit_p] = _pjit_transpose


@weakref_lru_cache
def _dce_jaxpr_pjit(
    jaxpr: core.ClosedJaxpr, used_outputs: Tuple[bool]
) -> Tuple[core.ClosedJaxpr, List[bool]]:
  new_jaxpr, used_inputs = pe.dce_jaxpr(jaxpr.jaxpr, used_outputs)
  return core.ClosedJaxpr(new_jaxpr, jaxpr.consts), used_inputs


def dce_jaxpr_pjit_rule(used_outputs: List[bool], eqn: core.JaxprEqn
                        ) -> Tuple[List[bool], Optional[core.JaxprEqn]]:
  dced_jaxpr, used_inputs = _dce_jaxpr_pjit(
      eqn.params['jaxpr'], tuple(used_outputs))

  def keep_where(xs, keeps):
    return tuple(x for x, keep in zip(xs, keeps) if keep)

  eqn_params = eqn.params
  new_params = dict(
      eqn_params,
      jaxpr=dced_jaxpr,
      in_shardings=keep_where(eqn_params["in_shardings"], used_inputs),
      out_shardings=keep_where(eqn_params["out_shardings"], used_outputs),
      donated_invars=keep_where(eqn_params["donated_invars"], used_inputs),
  )
  if not any(used_inputs) and not any(used_outputs) and not dced_jaxpr.effects:
    return used_inputs, None
  else:
    new_eqn = core.new_jaxpr_eqn(
        [v for v, used in zip(eqn.invars, used_inputs) if used],
        [v for v, used in zip(eqn.outvars, used_outputs) if used],
        eqn.primitive, new_params, dced_jaxpr.effects, eqn.source_info)
    return used_inputs, new_eqn

pe.dce_rules[pjit_p] = dce_jaxpr_pjit_rule


def _check_resources_against_named_axes(what, aval, pos_axis_resources, named_axis_resources):
  pjit_resources = set(
      it.chain.from_iterable([d for d in pos_axis_resources if d is not None]))
  aval_resources = set(it.chain.from_iterable(
    named_axis_resources[a] for a in aval.named_shape))
  overlap = pjit_resources & aval_resources
  if overlap:
    raise JAXTypeError(
        f"{what} has an axis resources specification of "
        f"{pos_axis_resources.unsynced_user_spec(SpecSync.DIM_PERMUTE)} "
        f"that uses one or more mesh axes already used by xmap to partition "
        f"a named axis appearing in its named_shape (both use mesh axes "
        f"{mesh_lib.show_axes(overlap)})")

def _resource_typing_pjit(avals, params, source_info, resource_env, named_axis_resources):
  jaxpr = params["jaxpr"]
  what = "pjit input"
  if (resource_env is not None and params['resource_env'] is not None and
      resource_env.physical_mesh != params['resource_env'].physical_mesh):
      raise RuntimeError("Changing the physical mesh is not allowed inside pjit.")

  for aval, s in zip(jaxpr.in_avals, params['in_shardings']):
    if is_unspecified(s) or is_auto(s):
      continue
    elif hasattr(s, '_original_sharding') and hasattr(
        s._original_sharding, '_parsed_pspec'):
      parsed_pspec = s._original_sharding._parsed_pspec
    else:
      if resource_env is not None:
        parsed_pspec = parse_flatten_op_sharding(
            s._op_sharding, resource_env.physical_mesh)[0]
      else:
        parsed_pspec = None
    if parsed_pspec is not None:
      _check_resources_against_named_axes(what, aval, parsed_pspec,
                                          named_axis_resources)

  pxla.resource_typecheck(
      jaxpr.jaxpr, resource_env, named_axis_resources,
      lambda: (f"a pjit'ed function {params['name']} "
               f"(pjit called at {source_info_util.summarize(source_info)})"))

  what = "pjit output"
  for aval, s in zip(jaxpr.out_avals, params['out_shardings']):
    if is_unspecified(s) or is_auto(s):
      continue
    elif hasattr(s, '_original_sharding') and hasattr(
        s._original_sharding, '_parsed_pspec'):
      parsed_pspec = s._original_sharding._parsed_pspec
    else:
      if resource_env is not None:
        parsed_pspec = parse_flatten_op_sharding(
            s._op_sharding, resource_env.physical_mesh)[0]
      else:
        parsed_pspec = None
    if parsed_pspec is not None:
      _check_resources_against_named_axes(what, aval, parsed_pspec,
                                          named_axis_resources)

pxla.custom_resource_typing_rules[pjit_p] = _resource_typing_pjit


def _pjit_pp_rule(eqn, context, settings):
  params = dict(eqn.params)
  del params['inline']
  if not any(params['donated_invars']):
    del params['donated_invars']
  if all(is_unspecified(s) for s in params['in_shardings']):
    del params['in_shardings']
  if all(is_unspecified(s) for s in params['out_shardings']):
    del params['out_shardings']
  if not params['keep_unused']:
    del params['keep_unused']
  if (params['resource_env'] is None or
      params['resource_env'].physical_mesh.empty):
    del params['resource_env']
  return core._pp_eqn(eqn.replace(params=params), context, settings)
core.pp_eqn_rules[pjit_p] = _pjit_pp_rule


# -------------------- with_sharding_constraint --------------------

def with_sharding_constraint(x, shardings):
  """Mechanism to constrain the sharding of an Array inside a jitted computation

  This is a strict constraint for the GSPMD partitioner and not a hint. For examples
  of how to use this function, see `Distributed arrays and automatic parallelization`_.

  Args:
    x: PyTree of jax.Arrays which will have their shardings constrainted
    shardings: PyTree of sharding specifications. Valid values are the same as for
      the ``in_shardings`` argument of :func:`jax.experimental.pjit`.
  Returns:
    x_with_shardings: PyTree of jax.Arrays with specified sharding constraints.

  .. _Distributed arrays and automatic parallelization: https://jax.readthedocs.io/en/latest/notebooks/Distributed_arrays_and_automatic_parallelization.html
  """
  x_flat, tree = tree_flatten(x)
  user_shardings, _, _ = prepare_axis_resources(
      shardings, "shardings", allow_unconstrained_dims=True)
  del shardings

  user_shardings_flat = tuple(
      flatten_axes("with_sharding_constraint shardings", tree, user_shardings))
  del user_shardings

  resource_env = mesh_lib.thread_resources.env
  mesh = resource_env.physical_mesh

  shardings_flat = [_create_sharding_for_array(mesh, a, 'shardings',
                                               'with_sharding_constraint')
                    for a in user_shardings_flat]
  unconstrained_dims = [get_unconstrained_dims(s)
                        if isinstance(s, NamedSharding) else {}
                        for s in shardings_flat]
  del user_shardings_flat

  pjit_check_aval_sharding(
      shardings_flat, x_flat, None, "with_sharding_constraint arguments",
      allow_uneven_sharding=True)

  outs = [sharding_constraint_p.bind(xf, sharding=to_gspmd_sharding(i, xf.ndim),
                                     resource_env=resource_env,
                                     unconstrained_dims=ud)
          for xf, i, ud in zip(x_flat, shardings_flat, unconstrained_dims)]
  return tree_unflatten(tree, outs)

def _identity_fn(x): return x

def _sharding_constraint_impl(x, sharding, resource_env, unconstrained_dims):
  if hasattr(x, 'sharding') and x.sharding.is_equivalent_to(sharding, x.ndim):
    return x
  # Run a jit here to raise good errors when device assignment don't match.
  return api.jit(_identity_fn, out_shardings=sharding)(x)


sharding_constraint_p = core.Primitive("sharding_constraint")
sharding_constraint_p.def_impl(_sharding_constraint_impl)
sharding_constraint_p.def_abstract_eval(lambda x, **_: x)
ad.deflinear2(sharding_constraint_p,
              lambda ct, _, **params: (sharding_constraint_p.bind(ct, **params),))

def _sharding_constraint_hlo_lowering(ctx, x_node, *, sharding,
                                      resource_env, unconstrained_dims):
  aval, = ctx.avals_in
  out_aval, = ctx.avals_out
  axis_ctx = ctx.module_context.axis_context
  # axis_ctx and manual_axes is *only used with xmap* and xmap only works with
  # NamedSharding. So convert the GSPMDSharding to NamedSharding
  # and then convert it back with the added special axes.
  if isinstance(axis_ctx, sharding_impls.SPMDAxisContext):
    mesh = resource_env.physical_mesh
    parsed_pspec = parse_flatten_op_sharding(sharding._op_sharding, mesh)[0]
    mps = NamedSharding._from_parsed_pspec(mesh, parsed_pspec)
    sharding = GSPMDSharding(
        mps._device_assignment, mps._to_xla_op_sharding(aval.ndim, axis_ctx=axis_ctx))
  return [
      mlir.wrap_with_sharding_op(ctx,
          x_node, out_aval,
          sharding._to_xla_op_sharding(aval.ndim),
          unspecified_dims=unconstrained_dims)
  ]
mlir.register_lowering(sharding_constraint_p,
                       _sharding_constraint_hlo_lowering)


def _sharding_constraint_batcher(insert_axis, spmd_axis_name, axis_size,
                                 axis_name, main_type, vals_in, dims_in,
                                 sharding, resource_env, unconstrained_dims):
  x, = vals_in
  d, = dims_in
  # None means unconstrained in ParsedPartitionSpec
  new_parts = (axis_name,) if insert_axis else (
      None if spmd_axis_name is None else spmd_axis_name)
  unconstrained_dims = {ud + (d <= ud) for ud in unconstrained_dims}
  if new_parts is None:
    unconstrained_dims.add(d)
  y = sharding_constraint_p.bind(
      x,
      sharding=_pjit_batcher_for_sharding(
          sharding, d, new_parts, resource_env.physical_mesh, x.ndim),
      resource_env=resource_env,
      unconstrained_dims=unconstrained_dims)
  return y, d
batching.spmd_axis_primitive_batchers[sharding_constraint_p] = partial(
    _sharding_constraint_batcher, False)
batching.axis_primitive_batchers[sharding_constraint_p] = partial(
    _sharding_constraint_batcher, False, None)
pxla.spmd_primitive_batchers[sharding_constraint_p] = partial(
    _sharding_constraint_batcher, True, None)


def _resource_typing_sharding_constraint(avals, params, source_info,
                                         resource_env, named_axis_resources):
  aval, = avals
  if hasattr(params['sharding'], '_original_sharding'):
    parsed_pspec = params['sharding']._original_sharding._parsed_pspec
  else:
    parsed_pspec = parse_flatten_op_sharding(
        params['sharding']._op_sharding, resource_env.physical_mesh)[0]
  _check_resources_against_named_axes(
    "with_sharding_constraint input", aval, parsed_pspec, named_axis_resources)

pxla.custom_resource_typing_rules[sharding_constraint_p] = \
    _resource_typing_sharding_constraint

# -------------------- helpers --------------------

@lru_cache(maxsize=2048)
def to_gspmd_sharding(s: XLACompatibleSharding, ndim: int,
                      device_or_backend_set: bool = False) -> GSPMDSharding:
  if isinstance(s, GSPMDSharding):
    return s
  gs = GSPMDSharding(s._device_assignment, s._to_xla_op_sharding(ndim))
  gs._original_sharding = s
  if device_or_backend_set:
    gs._original_sharding._device_backend = device_or_backend_set
  return gs


def get_unconstrained_dims(sharding: NamedSharding):
  assert sharding._parsed_pspec is not None
  return {i for i, axes in enumerate(sharding._parsed_pspec)
          if axes is None}


def _fast_path_get_device_assignment(
    shardings: Iterable[PjitSharding]) -> Optional[XLADeviceAssignment]:
  da = None
  for i in shardings:
    if is_unspecified(i):
      continue
    if is_auto(i):
      return i.mesh._flat_devices_tuple  # type: ignore
    return i._device_assignment  # type: ignore
  return da


def _get_partition_spec(ppspec: Sequence[ParsedPartitionSpec]) -> Sequence[PartitionSpec]:
  return [get_single_pspec(p) for p in ppspec]


def _get_op_sharding_from_executable(
    executable) -> Tuple[Sequence[xc.OpSharding], Sequence[xc.OpSharding]]:
  in_op_shardings: List[xc.OpSharding] = []
  parameter_shardings_from_xla = executable.get_parameter_shardings()
  if parameter_shardings_from_xla is not None:
    in_op_shardings = parameter_shardings_from_xla

  out_op_shardings: List[xc.OpSharding] = []
  output_shardings_from_xla = executable.get_output_shardings()
  if output_shardings_from_xla is not None:
    out_op_shardings = output_shardings_from_xla

  return in_op_shardings, out_op_shardings


def _get_ppspec_from_executable(executable, mesh) -> Tuple[Sequence[ParsedPartitionSpec], Sequence[ParsedPartitionSpec]]:
  input_op_shardings: Sequence[xc.OpSharding] = executable.hlo_modules()[0].spmd_parameters_shardings
  output_op_sharding: xc.OpSharding = executable.hlo_modules()[0].spmd_output_sharding
  in_ppspec: List[ParsedPartitionSpec] = []
  for s in input_op_shardings:
    in_ppspec.extend(parse_flatten_op_sharding(s, mesh))
  out_ppspec = parse_flatten_op_sharding(output_op_sharding, mesh)
  return in_ppspec, out_ppspec


def _get_pspec_from_executable(
    executable, mesh: pxla.Mesh
) -> Tuple[Tuple[PartitionSpec, ...], Tuple[PartitionSpec, ...]]:
  in_ppspec, out_ppspec = _get_ppspec_from_executable(executable, mesh)
  out_partition_spec = _get_partition_spec(out_ppspec)
  in_partition_spec = _get_partition_spec(in_ppspec)
  return tuple(in_partition_spec), tuple(out_partition_spec)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								# Copyright 2021 The JAX Authors.
 								#
 								# Licensed under the Apache License, Version 2.0 (the "License");
 								# you may not use this file except in compliance with the License.
 								# You may obtain a copy of the License at
 								#
 								#     https://www.apache.org/licenses/LICENSE-2.0
 								#
 								# Unless required by applicable law or agreed to in writing, software
 								# distributed under the License is distributed on an "AS IS" BASIS,
 								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								# See the License for the specific language governing permissions and
 								# limitations under the License.
 								import dataclasses
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								import inspect
-												Add logging if we get a C++ cache miss

PiperOrigin-RevId: 531555996

											
										
										
											2023-05-12 11:14:53 -07:00
+								import logging
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								import weakref
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								import numpy as np
 								from typing import (Callable, Sequence, Tuple, Union, cast, List, Optional,
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								                    Iterable, NamedTuple, Any)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								import itertools as it
 								from functools import partial, lru_cache
 								import threading
 								import warnings
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								from jax._src import core
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src import stages
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								from jax._src import dispatch
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src import mesh as mesh_lib
-												migrate internal dependencies from `jax.interpreters.ad` to `jax._src.interpreters.ad`

... in preparation for paring down `jax.interpreters.ad`'s exported symbols.

Includes some import fixups along the way.

PiperOrigin-RevId: 507684262

											
										
										
											2023-02-06 22:51:50 -08:00
+								from jax._src import linear_util as lu
-												Rename jax._src.sharding_utils to jax._src.op_shardings.

Move some more op_sharding related helpers to that module.

PiperOrigin-RevId: 522343010

											
										
										
											2023-04-06 08:31:47 -07:00
+								from jax._src import op_shardings
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								from jax._src import sharding_impls
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								from jax._src import source_info_util
-												Make `error_test` a jax_test so that we can test other configs and fix it with `jit`/`pjit` merge.

PiperOrigin-RevId: 502743523

											
										
										
											2023-01-17 18:42:21 -08:00
+								from jax._src import traceback_util
-												Add impl rule for with_sharding_constraint so that users can use their functions with and without a jit.

The semantics of eager wsc is the same as within a jit i.e. it will reshard to the given sharding only if the devices are the same and in the same order.

eager wsc won't work as expected with AD transpose because there is no `src` argument to reverse the shardings when transposing and was decided that it is fine for now. jax.device_put should be the API to use for that.

PiperOrigin-RevId: 532858670

											
										
										
											2023-05-17 11:49:31 -07:00
+								from jax._src import api
-												[JAX] Move jax._src.lib.xla_bridge to jax._src.xla_bridge.

Limit jax._src.lib to shims around jaxlib and nothing else.

The goal of this change is to avoid a dependency cycle between the rest of jax and jax._src.lib in a Bazel build. This allows the types for jax._src.lib to be inferred by pytype in isolation without referring to the rest of JAX.

PiperOrigin-RevId: 512922397

											
										
										
											2023-02-28 07:01:14 -08:00
+								from jax._src import xla_bridge as xb
-												migrate internal dependencies from `jax.interpreters.ad` to `jax._src.interpreters.ad`

... in preparation for paring down `jax.interpreters.ad`'s exported symbols.

Includes some import fixups along the way.

PiperOrigin-RevId: 507684262

											
										
										
											2023-02-06 22:51:50 -08:00
+								from jax._src.api_util import (
 								    argnums_partial_except, flatten_axes, flatten_fun, flatten_fun_nokwargs,
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								    donation_vector, shaped_abstractify, check_callable, resolve_argnums,
-												Remove `experimental_cpp_jit` since that flag is unused and also remove `experimental_cpp_pjit`.

For dynamic shapes experimentation and normal debugging, `python_pjit` still exists so that problem doesn't exist which makes us free to remove these 2 flags.

I am leaving pmap's flag alone for now.

PiperOrigin-RevId: 522602754

											
										
										
											2023-04-07 08:28:46 -07:00
+								    argnames_partial_except, debug_info, result_paths, jaxpr_debug_info)
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src.errors import JAXTypeError
 								from jax._src.interpreters import partial_eval as pe
-												Move PartitionSpec into its own file (jax/_src/partition_spec.py).

No functional changes intended.

A subsequent change will move ParsedPartitionSpec and array mapping utilities here also.

PiperOrigin-RevId: 522393166

											
										
										
											2023-04-06 11:42:45 -07:00
+								from jax._src.partition_spec import PartitionSpec
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src.interpreters import xla
-												migrate internal dependencies from `jax.interpreters.ad` to `jax._src.interpreters.ad`

... in preparation for paring down `jax.interpreters.ad`'s exported symbols.

Includes some import fixups along the way.

PiperOrigin-RevId: 507684262

											
										
										
											2023-02-06 22:51:50 -08:00
+								from jax._src.config import config
 								from jax._src.interpreters import ad
-												migrate internal dependencies from `jax.interpreters.batching` to `jax._src.interpreters.batching`

... in preparation for paring down `jax.interpreters.batching`'s exported symbols.

PiperOrigin-RevId: 508487887

											
										
										
											2023-02-09 15:11:20 -08:00
+								from jax._src.interpreters import batching
 								from jax._src.interpreters import mlir
-												Prune accidental exports from jax.interpreters.pxla.

These imports do not appear to have users outside JAX itself.

PiperOrigin-RevId: 507835295

											
										
										
											2023-02-07 11:16:01 -08:00
+								from jax._src.interpreters import pxla
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								from jax._src.lib.mlir import ir
 								from jax._src.lib.mlir.dialects import func as func_dialect
 								from jax._src.lib import xla_client as xc
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src.sharding_impls import (
 								    NamedSharding, XLACompatibleSharding, GSPMDSharding,
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    XLADeviceAssignment, SingleDeviceSharding, PmapSharding,
-												Allow pjit.AUTO to be used with jax.jit. This introduces an API change which requires a mesh to be provided to pjit.AUTO(mesh).

`with mesh:` is no longer required with pjit to use the auto spmd pass of GSPMD.

PiperOrigin-RevId: 533801596

											
										
										
											2023-05-20 22:59:52 -07:00
+								    AUTO, UNSPECIFIED, UnspecifiedValue,
-												Move parse_flatten_op_sharding to sharding_impls.py to remove local import of pjit using that function from pxla.py

PiperOrigin-RevId: 523573375

											
										
										
											2023-04-11 19:25:56 -07:00
+								    ParsedPartitionSpec, SpecSync, get_single_pspec, is_auto, is_unspecified,
 								    is_unspecified_or_auto, prepare_axis_resources, parse_flatten_op_sharding)
-												migrate internal dependencies from `jax.interpreters.ad` to `jax._src.interpreters.ad`

... in preparation for paring down `jax.interpreters.ad`'s exported symbols.

Includes some import fixups along the way.

PiperOrigin-RevId: 507684262

											
										
										
											2023-02-06 22:51:50 -08:00
+								from jax._src.traceback_util import api_boundary
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src.tree_util import (
 								    tree_map, tree_flatten, tree_unflatten, treedef_is_leaf, tree_structure,
 								    treedef_tuple, broadcast_prefix, all_leaves,
 								    prefix_errors, generate_key_paths)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								from jax._src.util import (
-												Fix `name_stack` usage of pjit. Now all the metadata of transformations in hlo are correct.

PiperOrigin-RevId: 501918212

											
										
										
											2023-01-13 12:53:42 -08:00
+								    HashableFunction, safe_map, safe_zip, wraps,
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    distributed_debug_log, split_list, weakref_lru_cache,
-												For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the tracing time.

PiperOrigin-RevId: 532155068

											
										
										
											2023-05-15 10:31:38 -07:00
+								    merge_lists, flatten, unflatten)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								map, unsafe_map = safe_map, map
 								zip, unsafe_zip = safe_zip, zip
-												Make `error_test` a jax_test so that we can test other configs and fix it with `jit`/`pjit` merge.

PiperOrigin-RevId: 502743523

											
										
										
											2023-01-17 18:42:21 -08:00
+								traceback_util.register_exclusion(__file__)
-												Allow pjit.AUTO to be used with jax.jit. This introduces an API change which requires a mesh to be provided to pjit.AUTO(mesh).

`with mesh:` is no longer required with pjit to use the auto spmd pass of GSPMD.

PiperOrigin-RevId: 533801596

											
										
										
											2023-05-20 22:59:52 -07:00
+								PjitSharding = Union[GSPMDSharding, UnspecifiedValue, AUTO]
 								PjitShardingMinusUnspecified = Union[GSPMDSharding, AUTO]
 								MeshSharding = Union[NamedSharding, UnspecifiedValue, AUTO]
 								MeshShardingMinusUnspecified = Union[NamedSharding, AUTO]
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
-												Add logging if we get a C++ cache miss

PiperOrigin-RevId: 531555996

											
										
										
											2023-05-12 11:14:53 -07:00
+								logger = logging.getLogger(__name__)
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								def _try_infer_args(f, tree):
 								  dummy_args = tree_unflatten(tree, [False] * tree.num_leaves)
 								  try:
 								    return inspect.signature(f).bind(*dummy_args)
 								  except (TypeError, ValueError):
 								    return None
 								def _find_arg_mismatch(arg_list, fails, fun_name):
 								  first_err, second_err = fails
 								  mismatched_args_msg = []
 								  for name, inp_da, aval in arg_list:
 								    if first_err.m_type == pxla.MismatchType.ARG_SHARDING:
 								      if first_err.da == inp_da:
 								        mismatched_args_msg.append(
-												Point to the exact primitive name nested under jit/pjit instead of mentioning all possible ones.

PiperOrigin-RevId: 508770290

											
										
										
											2023-02-10 15:36:04 -08:00
+								            (f"argument {name} of {fun_name} with shape {aval.str_short()} and "
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								             f"{first_err._dev_ids_plat_str}"))
 								        break
 								  for name, inp_da, aval in arg_list:
 								    if second_err.m_type == pxla.MismatchType.ARG_SHARDING:
 								      if second_err.da == inp_da:
 								        mismatched_args_msg.append(
-												Point to the exact primitive name nested under jit/pjit instead of mentioning all possible ones.

PiperOrigin-RevId: 508770290

											
										
										
											2023-02-10 15:36:04 -08:00
+								            (f"argument {name} of {fun_name} with shape {aval.str_short()} and "
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								             f"{second_err._dev_ids_plat_str}"))
 								        break
 								  return mismatched_args_msg
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
+								# TODO(yashkatariya): Try to use debug_info that is populated in
 								# common_infer_params.
 								def _get_arg_names(fun, in_tree, args_flat):
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								  sig = _try_infer_args(fun, in_tree)
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
+								  args_aug = generate_key_paths(tree_unflatten(in_tree, args_flat))
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
+								  arg_names = []
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								  for arg_key, val in args_aug:
-												Internal change

PiperOrigin-RevId: 513953876

											
										
										
											2023-03-04 00:48:29 +00:00
+								    ak, *rem_keys = arg_key
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								    if sig is not None:
-												Internal change

PiperOrigin-RevId: 513953876

											
										
										
											2023-03-04 00:48:29 +00:00
+								      loc = ''.join(str(k) for k in rem_keys)
 								      arg_name = f'{list(sig.arguments.keys())[ak.idx]}{loc}'
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								    else:
 								      arg_name = ''
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
+								    arg_names.append(arg_name)
 								  return arg_names
 								def _device_assignment_mismatch_error(fun_name, fails, args_flat, api_name,
 								                                      arg_names):
 								  arg_list = []
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  for a, n in zip(args_flat, arg_names):
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
+								    da = a.sharding._device_assignment if hasattr(a, 'sharding') else None
 								    arg_list.append((n, da, shaped_abstractify(a)))
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
 								  mismatched_args_msg = _find_arg_mismatch(arg_list, fails, fun_name)
 								  if len(mismatched_args_msg) == 2:
 								    first, second = mismatched_args_msg  # pylint: disable=unbalanced-tuple-unpacking
 								    extra_msg = f" Got {first} and {second}"
 								  elif len(mismatched_args_msg) == 1:
 								    first, second  = fails
 								    # Choose the failure left which is not already covered by ARG_SHARDING.
 								    left = second if first.m_type == pxla.MismatchType.ARG_SHARDING else first
 								    extra_msg = f" Got {mismatched_args_msg[0]} and{left._str(api_name)}"
 								  else:
 								    first, second = fails
 								    extra_msg = f" Got{first._str(api_name)} and{second._str(api_name)}"
 								  msg = (f"Received incompatible devices for {api_name}ted computation.{extra_msg}")
 								  return msg
 								def _python_pjit_helper(fun, infer_params_fn, *args, **kwargs):
 								  args_flat, _, params, in_tree, out_tree, _ = infer_params_fn(
 								      *args, **kwargs)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  for arg in args_flat:
-												Move functions into `api_util.py` and `dispatch.py` to remove circular import error when pjit is imported in `api.py` for merging the `jit` and `pjit` frontend API.

PiperOrigin-RevId: 497172760

											
										
										
											2022-12-22 08:40:36 -08:00
+								    dispatch.check_arg(arg)
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								  try:
 								    out_flat = pjit_p.bind(*args_flat, **params)
 								  except pxla.DeviceAssignmentMismatchError as e:
 								    fails, = e.args
 								    api_name = 'jit' if params['resource_env'] is None else 'pjit'
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
+								    arg_names = _get_arg_names(fun, in_tree, args_flat)
 								    fun_name = getattr(fun, '__qualname__', getattr(fun, '__name__', str(fun)))
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								    msg = _device_assignment_mismatch_error(
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
+								        fun_name, fails, args_flat, api_name, arg_names)
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								    raise ValueError(msg) from None
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  outs = tree_unflatten(out_tree, out_flat)
 								  return outs, out_flat, out_tree, args_flat
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								def _python_pjit(fun: Callable, infer_params_fn):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  @wraps(fun)
-												Make `error_test` a jax_test so that we can test other configs and fix it with `jit`/`pjit` merge.

PiperOrigin-RevId: 502743523

											
										
										
											2023-01-17 18:42:21 -08:00
+								  @api_boundary
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  def wrapped(*args, **kwargs):
-												Respect jax_disable_jit in pjit

PiperOrigin-RevId: 503297194

											
										
										
											2023-01-19 16:35:23 -08:00
+								    if config.jax_disable_jit:
 								      return fun(*args, **kwargs)
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								    return _python_pjit_helper(fun, infer_params_fn, *args, **kwargs)[0]
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Add clear_cache endpoint to python pjit and cpp pjit functions.

PiperOrigin-RevId: 509696516

											
										
										
											2023-02-14 18:45:31 -08:00
+								  def _python_pjit_evict_fn():
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
+								    _create_pjit_jaxpr.evict_function(fun)  # type: ignore
-												Add clear_cache endpoint to python pjit and cpp pjit functions.

PiperOrigin-RevId: 509696516

											
										
										
											2023-02-14 18:45:31 -08:00
+								  wrapped.clear_cache = _python_pjit_evict_fn
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return wrapped
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
 								def _get_fastpath_data(executable, out_tree, args_flat, out_flat):
 								  use_fastpath = (
 								      executable is not None and
 								      isinstance(executable, pxla.MeshExecutable) and
 								      isinstance(executable.unsafe_call, pxla.ExecuteReplicated) and
 								      # No effects in computation
 								      not executable.unsafe_call.ordered_effects and
 								      not executable.unsafe_call.has_unordered_effects and
 								      not executable.unsafe_call.has_host_callbacks and
 								      all(isinstance(x, xc.ArrayImpl) for x in out_flat)
 								  )
 								  if use_fastpath:
 								    out_avals = [o.aval for o in out_flat]
 								    out_committed = [o._committed for o in out_flat]
 								    kept_var_bitvec = [i in executable._kept_var_idx
 								                       for i in range(len(args_flat))]
 								    fastpath_data = pxla.MeshExecutableFastpathData(
 								        executable.xla_executable, out_tree, executable._in_shardings,
 								        executable._out_shardings, out_avals, out_committed, kept_var_bitvec)
 								  else:
 								    fastpath_data = None
 								  return fastpath_data
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								class _MostRecentPjitCallExecutable(threading.local):
 								  def __init__(self):
 								    self.value = None
 								_most_recent_pjit_call_executable = _MostRecentPjitCallExecutable()
-												Add static_argnames to the _cpp_pjit path.

PiperOrigin-RevId: 499311688

											
										
										
											2023-01-03 14:05:17 -08:00
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								def _read_most_recent_pjit_call_executable():
 								  executable = _most_recent_pjit_call_executable.value
 								  _most_recent_pjit_call_executable.value = None
 								  return executable
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Add clear_cache endpoint to python pjit and cpp pjit functions.

PiperOrigin-RevId: 509696516

											
										
										
											2023-02-14 18:45:31 -08:00
+								def _cpp_pjit_evict_fn(self):
 								  self._clear_cache()
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
+								  _create_pjit_jaxpr.evict_function(self._fun)  # type: ignore
-												Add clear_cache endpoint to python pjit and cpp pjit functions.

PiperOrigin-RevId: 509696516

											
										
										
											2023-02-14 18:45:31 -08:00
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								# The entries are doubled here from the default 4096 because _pjit_call_impl
 								# also has a cpp dispatch path and that would double the number of entries in
 								# the global shared cache.
 								_cpp_pjit_cache = xc._xla.PjitFunctionCache(capacity=8192)
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
 								def _cpp_pjit(fun: Callable, infer_params_fn, static_argnums, static_argnames,
 								              donate_argnums, pjit_has_explicit_sharding):
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
-												Make `error_test` a jax_test so that we can test other configs and fix it with `jit`/`pjit` merge.

PiperOrigin-RevId: 502743523

											
										
										
											2023-01-17 18:42:21 -08:00
+								  @api_boundary
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  def cache_miss(*args, **kwargs):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    outs, out_flat, out_tree, args_flat = _python_pjit_helper(
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								        fun, infer_params_fn, *args, **kwargs)
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								    executable = _read_most_recent_pjit_call_executable()
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								    fastpath_data = _get_fastpath_data(executable, out_tree, args_flat, out_flat)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return outs, fastpath_data
-												Bump minimum jaxlib version to 0.4.4 which means xla_extension_version >= 127

PiperOrigin-RevId: 512173011

											
										
										
											2023-02-24 15:05:12 -08:00
+								  if pjit_has_explicit_sharding:
 								    global_cache = xc._xla.PjitFunctionCache()
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
+								  else:
-												Bump minimum jaxlib version to 0.4.4 which means xla_extension_version >= 127

PiperOrigin-RevId: 512173011

											
										
										
											2023-02-24 15:05:12 -08:00
+								    global_cache = _cpp_pjit_cache
 								  cpp_pjit_f = xc._xla.pjit(  # type: ignore
 								      getattr(fun, "__name__", "<unnamed function>"),  # type: ignore
 								      fun, cache_miss, static_argnums, static_argnames,  # type: ignore
 								      donate_argnums, global_cache)  # type: ignore
-												Add clear_cache endpoint to python pjit and cpp pjit functions.

PiperOrigin-RevId: 509696516

											
										
										
											2023-02-14 18:45:31 -08:00
 								  cpp_pjitted_f = wraps(fun)(cpp_pjit_f)
 								  cpp_pjitted_f._fun = fun
 								  type(cpp_pjitted_f).clear_cache = _cpp_pjit_evict_fn
 								  return cpp_pjitted_f
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								def _resolve_axis_resources_and_shardings_arg(
 								    in_shardings, out_shardings, in_axis_resources, out_axis_resources):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if not is_unspecified(in_shardings) and not is_unspecified(in_axis_resources):
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    raise ValueError(
 								        'Setting both in_shardings and in_axis_resources is not '
 								        'allowed. in_axis_resources is deprecated. Please use in_shardings.')
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if not is_unspecified(out_shardings) and not is_unspecified(out_axis_resources):
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    raise ValueError(
 								        'Setting both out_shardings and out_axis_resources is not '
 								        'allowed. out_axis_resources is deprecated. Please use out_shardings.')
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if (not is_unspecified(in_axis_resources) or
 								      not is_unspecified(out_axis_resources)):
-												Raise deprecation warnings for `{in|out}_axis_resources` for pjit and `axis_resources` for with_sharding_constraint

PiperOrigin-RevId: 520748845

											
										
										
											2023-03-30 14:50:07 -07:00
+								    warnings.warn(
 								        'in_axis_resources and out_axis_resources are deprecated. Please use '
 								        'in_shardings and out_shardings as their replacement.',
 								        DeprecationWarning)
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if not is_unspecified(in_axis_resources):
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    final_in_shardings = in_axis_resources
 								  else:
 								    final_in_shardings = in_shardings
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if not is_unspecified(out_axis_resources):
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    final_out_shardings = out_axis_resources
 								  else:
 								    final_out_shardings = out_shardings
 								  return final_in_shardings, final_out_shardings
 								def pre_infer_params(fun, in_shardings, out_shardings,
-												Add abstracted axes to pjit to make jax2tf tests pass. abstracted_axes and dynamic_shapes is not supported by pjit yet.

PiperOrigin-RevId: 502138836

											
										
										
											2023-01-14 20:16:57 -08:00
+								                     donate_argnums, static_argnums, static_argnames, device,
 								                     backend, abstracted_axes):
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								  if abstracted_axes and not config.jax_dynamic_shapes:
-												Add abstracted axes to pjit to make jax2tf tests pass. abstracted_axes and dynamic_shapes is not supported by pjit yet.

PiperOrigin-RevId: 502138836

											
										
										
											2023-01-14 20:16:57 -08:00
+								    raise ValueError("abstracted_axes must be used with --jax_dynamic_shapes")
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  check_callable(fun)
 								  if backend is not None or device is not None:
 								    warnings.warn(
 								        'backend and device argument on jit is deprecated. You can use a '
 								        '`jax.sharding.Mesh` context manager or device_put the arguments '
 								        'before passing them to `jit`. Please see '
 								        'https://jax.readthedocs.io/en/latest/notebooks/Distributed_arrays_and_automatic_parallelization.html '
 								        'for more information.', DeprecationWarning)
 								    if device is not None and backend is not None:
 								      raise ValueError("can't specify both a device and a backend for jit, "
 								                       f"got {device=} and {backend=}")
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    if not is_unspecified(in_shardings):
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      raise ValueError('If backend or device is specified on jit, then '
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								                       'in_shardings should not be specified.')
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    if not is_unspecified(out_shardings):
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      raise ValueError('If backend or device is specified on jit, then '
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								                       'out_shardings should not be specified.')
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  if isinstance(in_shardings, list):
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    # To be a tree prefix of the positional args tuple, in_axes can never be a
 								    # list: if in_axes is not a leaf, it must be a tuple of trees. However,
 								    # in cases like these users expect tuples and lists to be treated
 								    # essentially interchangeably, so we canonicalize lists to tuples here
 								    # rather than raising an error. https://github.com/google/jax/issues/2367
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    in_shardings = tuple(in_shardings)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  in_shardings, _, _ = prepare_axis_resources(in_shardings, 'in_shardings')
 								  out_shardings, _, _ = prepare_axis_resources(out_shardings, 'out_shardings')
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
 								  donate_argnums, static_argnums, static_argnames = resolve_argnums(
 								      fun, donate_argnums, static_argnums, static_argnames)
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  return (in_shardings, out_shardings, donate_argnums, static_argnums,
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								          static_argnames)
-												Add abstracted axes to pjit to make jax2tf tests pass. abstracted_axes and dynamic_shapes is not supported by pjit yet.

PiperOrigin-RevId: 502138836

											
										
										
											2023-01-14 20:16:57 -08:00
+								def post_infer_params(fun, infer_params_fn, static_argnums, static_argnames,
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
+								                      donate_argnums, abstracted_axes,
 								                      pjit_has_explicit_sharding):
-												Remove `experimental_cpp_jit` since that flag is unused and also remove `experimental_cpp_pjit`.

For dynamic shapes experimentation and normal debugging, `python_pjit` still exists so that problem doesn't exist which makes us free to remove these 2 flags.

I am leaving pmap's flag alone for now.

PiperOrigin-RevId: 522602754

											
										
										
											2023-04-07 08:28:46 -07:00
+								  if abstracted_axes is None:
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
+								    wrapped = _cpp_pjit(fun, infer_params_fn, static_argnums, static_argnames,
 								                        donate_argnums, pjit_has_explicit_sharding)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  else:
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								    wrapped = _python_pjit(fun, infer_params_fn)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Make `error_test` a jax_test so that we can test other configs and fix it with `jit`/`pjit` merge.

PiperOrigin-RevId: 502743523

											
										
										
											2023-01-17 18:42:21 -08:00
+								  @api_boundary
-												Smuggle _experimental_lowering_platform via kwargs to make it hidden and extremely private temporary.

PiperOrigin-RevId: 532644979

											
										
										
											2023-05-16 19:47:19 -07:00
+								  def lower(*args, **kwargs):
 								    _experimental_lowering_platform = kwargs.pop(
 								        '_experimental_lowering_platform', None)
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								    (args_flat, flat_global_in_avals, params, in_tree, out_tree,
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								     donate_argnums) = infer_params_fn(*args, **kwargs)
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								    resource_env = params['resource_env']
 								    mesh = None if resource_env is None else resource_env.physical_mesh
-												Allow pjit.AUTO to be used with jax.jit. This introduces an API change which requires a mesh to be provided to pjit.AUTO(mesh).

`with mesh:` is no longer required with pjit to use the auto spmd pass of GSPMD.

PiperOrigin-RevId: 533801596

											
										
										
											2023-05-20 22:59:52 -07:00
+								    try:
 								      in_shardings = _resolve_in_shardings(
 								          args_flat, params['in_shardings'], params['out_shardings'], mesh)
 								      lowering = _pjit_lower(
 								          params['jaxpr'], in_shardings, params['out_shardings'],
 								          params['resource_env'], params['donated_invars'], params['name'],
 								          params['keep_unused'], params['inline'], always_lower=True,
 								          lowering_platform=_experimental_lowering_platform)
 								    except pxla.DeviceAssignmentMismatchError as e:
 								      fails, = e.args
 								      api_name = 'jit' if params['resource_env'] is None else 'pjit'
 								      arg_names = _get_arg_names(fun, in_tree, args_flat)
 								      fun_name = getattr(fun, '__qualname__', getattr(fun, '__name__', str(fun)))
 								      msg = _device_assignment_mismatch_error(
 								          fun_name, fails, args_flat, api_name, arg_names)
 								      raise ValueError(msg) from None
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
 								    if kwargs:
 								      args_kwargs_in_tree = in_tree
 								    else:
 								      args_kwargs_in_tree = treedef_tuple([in_tree, tree_flatten({})[1]])
 								    return stages.Lowered.from_flat_info(
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								        lowering, args_kwargs_in_tree, flat_global_in_avals, donate_argnums,
-												Skip unneccessary unflattening of avals in pjit lowering path.

The avals get flattened again when calling `from_flat_info` (here:
https://github.com/google/jax/blob/1641c8f1415a837f6f6c2537110f4be698621055/jax/_src/stages.py#L347),
so skip unflattening here.

PiperOrigin-RevId: 504260643

											
										
										
											2023-01-24 06:44:51 -08:00
+								        out_tree)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
 								  wrapped.lower = lower
 								  return wrapped
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								def _pjit_explicit_sharding(in_shardings, out_shardings, device,
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
+								                            backend) -> bool:
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  in_shardings_flat, _ = tree_flatten(in_shardings)
 								  out_shardings_flat, _ = tree_flatten(out_shardings)
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
+								  return (device is not None or
 								          backend is not None or
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								          any(not is_unspecified(i) for i in in_shardings_flat) or
 								          any(not is_unspecified(i) for i in out_shardings_flat))
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								class PjitInfo(NamedTuple):
 								  fun: Callable
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  in_shardings: Any
 								  out_shardings: Any
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  static_argnums: Tuple[int, ...]
 								  static_argnames: Tuple[str, ...]
 								  donate_argnums: Tuple[int, ...]
 								  device: Optional[xc.Device]
 								  backend: Optional[str]
 								  keep_unused: bool
 								  inline: bool
 								  resource_env: Any
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  abstracted_axes: Optional[Any]
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
 								def common_infer_params(pjit_info_args, *args, **kwargs):
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  (fun, user_in_shardings, user_out_shardings, static_argnums, static_argnames,
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								   donate_argnums, device, backend, keep_unused, inline,
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								   resource_env, abstracted_axes) = pjit_info_args
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if kwargs and not is_unspecified(user_in_shardings):
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    raise ValueError(
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								        "pjit does not support kwargs when in_shardings is specified.")
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
 								  if resource_env is not None:
 								    pjit_mesh = resource_env.physical_mesh
 								  else:
 								    pjit_mesh = None
 								  if (backend or device) and pjit_mesh is not None and not pjit_mesh.empty:
 								    raise ValueError(
 								        "Mesh context manager should not be used with jit when backend or "
 								        "device is also specified as an argument to jit.")
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  axes_specs = _flat_axes_specs(abstracted_axes, *args, **kwargs)
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								  jit_name = 'jit' if resource_env is None else 'pjit'
 								  dbg = debug_info(jit_name, fun, args, kwargs, static_argnums, static_argnames)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  f = lu.wrap_init(fun)
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								  f, res_paths = result_paths(f)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  f, dyn_args = argnums_partial_except(f, static_argnums, args,
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								                                       allow_invalid=True)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  del args
 								  # TODO(yashkatariya): Merge the nokwargs and kwargs path. One blocker is
 								  # flatten_axes which if kwargs are present in the treedef (even empty {}),
 								  # leads to wrong expansion.
 								  if kwargs:
 								    f, dyn_kwargs = argnames_partial_except(f, static_argnames, kwargs)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    explicit_args, in_tree = tree_flatten((dyn_args, dyn_kwargs))
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    flat_fun, out_tree = flatten_fun(f, in_tree)
 								  else:
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    explicit_args, in_tree = tree_flatten(dyn_args)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    flat_fun, out_tree = flatten_fun_nokwargs(f, in_tree)
 								    dyn_kwargs = ()
 								  del kwargs
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								  if donate_argnums and not config.jax_debug_nans:
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    donated_invars = donation_vector(donate_argnums, dyn_args, dyn_kwargs)
 								  else:
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    donated_invars = (False,) * len(explicit_args)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  # If backend or device is set as an arg on jit, then resolve them to
 								  # in_shardings and out_shardings as if user passed in in_shardings
 								  # and out_shardings.
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								  device_or_backend_set = False
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  if backend or device:
 								    in_shardings = out_shardings = _create_sharding_with_device_backend(
 								        device, backend)
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								    device_or_backend_set = True
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  else:
 								    in_shardings = tree_map(
-												Improve the error raised when wsc is passed a PartitionSpec without a mesh context manager

PiperOrigin-RevId: 529260748

											
										
										
											2023-05-03 19:28:54 -07:00
+								        lambda x: _create_sharding_for_array(pjit_mesh, x, 'in_shardings',
 								                                             jit_name),
-												Improve the error message raised from jax.jit if Pspec or None is passed

PiperOrigin-RevId: 522377813

											
										
										
											2023-04-06 10:49:57 -07:00
+								        user_in_shardings)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    out_shardings = tree_map(
-												Improve the error raised when wsc is passed a PartitionSpec without a mesh context manager

PiperOrigin-RevId: 529260748

											
										
										
											2023-05-03 19:28:54 -07:00
+								        lambda x: _create_sharding_for_array(pjit_mesh, x, 'out_shardings',
 								                                             jit_name),
-												Improve the error message raised from jax.jit if Pspec or None is passed

PiperOrigin-RevId: 522377813

											
										
										
											2023-04-06 10:49:57 -07:00
+								        user_out_shardings)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  del user_in_shardings, user_out_shardings
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  if config.jax_dynamic_shapes:
 								    in_type = pe.infer_lambda_input_type(axes_specs, explicit_args)
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								    in_avals = tuple(a for a, e in in_type if e)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  else:
-												Report the argument path when encountering an overflow error for a Python value.

PiperOrigin-RevId: 522106244

											
										
										
											2023-04-05 11:23:02 -07:00
+								    avals = []
 								    for i, a in enumerate(explicit_args):
 								      try:
 								        avals.append(shaped_abstractify(a))
 								      except OverflowError as e:
 								        arg_path = (f"argument path is {dbg.arg_names[i]}" if dbg
 								                    else f"flattened argument number is {i}")
 								        raise OverflowError(
 								          "An overflow was encountered while parsing an argument to a jitted "
 								          f"computation, whose {arg_path}."
 								        ) from e
 								    in_type = in_avals = tuple(avals)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								  canonicalized_in_shardings_flat = _process_in_axis_resources(
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								      hashable_pytree(in_shardings), in_avals, in_tree, resource_env, dbg,
 								      device_or_backend_set)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
 								  jaxpr, consts, canonicalized_out_shardings_flat = _pjit_jaxpr(
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								      flat_fun, hashable_pytree(out_shardings), in_type, dbg,
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								      device_or_backend_set, HashableFunction(out_tree, closure=()),
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								      HashableFunction(res_paths, closure=()))
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  assert len(explicit_args) == len(canonicalized_in_shardings_flat)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  if config.jax_dynamic_shapes:
 								    implicit_args = _extract_implicit_args(in_type, explicit_args)
 								  else:
 								    implicit_args = []
 								  args_flat = [*implicit_args, *explicit_args]
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  num_extra_args = len(implicit_args) + len(consts)
 								  canonicalized_in_shardings_flat = \
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								      (UNSPECIFIED,) * num_extra_args + canonicalized_in_shardings_flat
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  donated_invars = (False,) * num_extra_args + donated_invars
 								  assert (len(canonicalized_in_shardings_flat) == len(donated_invars) ==
 								          len(consts) + len(args_flat))
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Rename `jax.sharding.OpShardingSharding` to `jax.sharding.GSPMDSharding`. `jax.sharding.OpShardingSharding` will be removed in 3 months from Feb 17, 2023.

PiperOrigin-RevId: 510556189

											
										
										
											2023-02-17 17:10:27 -08:00
+								  # in_shardings and out_shardings here are all GSPMDSharding.
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  params = dict(
 								      jaxpr=jaxpr,
 								      in_shardings=canonicalized_in_shardings_flat,
 								      out_shardings=canonicalized_out_shardings_flat,
 								      resource_env=resource_env,
 								      donated_invars=donated_invars,
 								      name=getattr(flat_fun, '__name__', '<unnamed function>'),
 								      keep_unused=keep_unused,
 								      inline=inline,
 								  )
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  return (consts + args_flat, in_type, params, in_tree, out_tree(),
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								          donate_argnums)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								def _extract_implicit_args(
 								  in_type: Sequence[Tuple[core.AbstractValue, bool]],
 								  explicit_args: Sequence[Any]
 								) -> Sequence[core.Tracer]:
 								  """
 								  Given an input type and explicitly-passed arguments (per the user-facing API
 								  calling convention), extract implicit axis size arguments from shapes of
 								  explicit arguments (for the trace-time / jaxpr-level calling convention).
 								  """
 								  # First, using `in_type` construct a list to represent the full argument list,
 								  # leaving the implicit arguments as None placeholders for now.
 								  explicit_args_ = iter(explicit_args)
 								  args = [next(explicit_args_) if expl else None for _, expl in in_type]
 								  assert next(explicit_args_, None) is None
 								  del explicit_args, explicit_args_
 								  # Next, populate the implicit arguments using the DBIdxs in `in_type`.
 								  for i, (aval, explicit) in enumerate(in_type):
 								    if not explicit or not isinstance(aval, core.DShapedArray):
 								      continue  # can't populate an implicit argument
 								    arg = args[i]
 								    assert arg is not None
 								    for d1, d2 in zip(aval.shape, arg.aval.shape):
 								      if isinstance(d1, core.DBIdx):
 								        if args[d1.val] is None:
 								          args[d1.val] = d2
 								        assert core.same_referent(args[d1.val], d2)
 								  assert all(x is not None for x in args)
 								  return [x for x, (_, e) in zip(args, in_type) if not e]  # type: ignore
 								def _flat_axes_specs(abstracted_axes, *args, **kwargs
 								                     ) -> Optional[List[pe.AbstractedAxesSpec]]:
 								  if abstracted_axes is None: return None
 								  if kwargs: raise NotImplementedError
 								  def ax_leaf(l):
 								    return (isinstance(l, dict) and all_leaves(l.values()) or
 								            isinstance(l, tuple) and all_leaves(l, lambda x: x is None))
 								  return broadcast_prefix(abstracted_axes, args, ax_leaf)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								# in_shardings and out_shardings can't be None as the default value
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								# because `None` means that the input is fully replicated.
 								def pjit(
 								    fun: Callable,
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    in_shardings=UNSPECIFIED,
 								    out_shardings=UNSPECIFIED,
 								    in_axis_resources=UNSPECIFIED,
 								    out_axis_resources=UNSPECIFIED,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    static_argnums: Union[int, Sequence[int], None] = None,
 								    static_argnames: Union[str, Iterable[str], None] = None,
 								    donate_argnums: Union[int, Sequence[int]] = (),
 								    keep_unused: bool = False,
 								    device: Optional[xc.Device] = None,
 								    backend: Optional[str] = None,
 								    inline: bool = False,
-												Add abstracted axes to pjit to make jax2tf tests pass. abstracted_axes and dynamic_shapes is not supported by pjit yet.

PiperOrigin-RevId: 502138836

											
										
										
											2023-01-14 20:16:57 -08:00
+								    abstracted_axes: Optional[Any] = None,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								) -> stages.Wrapped:
 								  """Makes ``fun`` compiled and automatically partitioned across multiple devices.
-												Adds a note that pjit is equivalent to jit.

PiperOrigin-RevId: 535296532

											
										
										
											2023-05-25 10:13:50 -07:00
+								  NOTE: This function is now equivalent to jax.jit please use that instead.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  The returned function has semantics equivalent to those of ``fun``, but is
 								  compiled to an XLA computation that runs across multiple devices
 								  (e.g. multiple GPUs or multiple TPU cores). This can be useful if the jitted
 								  version of ``fun`` would not fit in a single device's memory, or to speed up
 								  ``fun`` by running each operation in parallel across multiple devices.
 								  The partitioning over devices happens automatically based on the
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  propagation of the input partitioning specified in ``in_shardings`` and
 								  the output partitioning specified in ``out_shardings``. The resources
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  specified in those two arguments must refer to mesh axes, as defined by
-												Replace uses of deprecated JAX sharding APIs with their new names in jax.sharding.

This change updates:
* {jax.experimental.maps.Mesh, jax.interpreters.pxla.Mesh} to jax.sharding.Mesh
* {jax.experimental.PartitionSpec, jax.experimental.pjit.PartitionSpec, jax.interpreters.pxla.PartitionSpec, jax.pxla.PartitionSpec} to jax.sharding.PartitionSpec
* jax.experimental.maps.NamedSharding to jax.sharding.NamedSharding.

PiperOrigin-RevId: 506994892

											
										
										
											2023-02-03 14:28:07 -08:00
+								  the :py:func:`jax.sharding.Mesh` context manager. Note that the mesh
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  definition at :func:`~pjit` application time is ignored, and the returned function
 								  will use the mesh definition available at each call site.
 								  Inputs to a :func:`~pjit`'d function will be automatically partitioned across devices
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  if they're not already correctly partitioned based on ``in_shardings``.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  In some scenarios, ensuring that the inputs are already correctly pre-partitioned
 								  can increase performance. For example, if passing the output of one
 								  :func:`~pjit`'d function to another :func:`~pjit`’d function (or the same
 								  :func:`~pjit`’d function in a loop), make sure the relevant
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  ``out_shardings`` match the corresponding ``in_shardings``.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  .. note::
 								    **Multi-process platforms:** On multi-process platforms such as TPU pods,
 								    :func:`~pjit` can be used to run computations across all available devices across
 								    processes. To achieve this, :func:`~pjit` is designed to be used in SPMD Python
 								    programs, where every process is running the same Python code such that all
 								    processes run the same :func:`~pjit`'d function in the same order.
 								    When running in this configuration, the mesh should contain devices across
 								    all processes. However, any input argument dimensions partitioned over
 								    multi-process mesh axes should be of size equal to the corresponding *local*
 								    mesh axis size, and outputs will be similarly sized according to the local
 								    mesh. ``fun`` will still be executed across *all* devices in the mesh,
 								    including those from other processes, and will be given a global view of the
 								    data spread across multiple processes as a single array. However, outside
 								    of :func:`~pjit` every process only "sees" its local piece of the input and output,
 								    corresponding to its local sub-mesh.
 								    This means that each process's participating local devices must form a
 								    _contiguous_ local sub-mesh within the full global mesh. A contiguous
 								    sub-mesh is one where all of its devices are adjacent within the global
 								    mesh, and form a rectangular prism.
 								    The SPMD model also requires that the same multi-process :func:`~pjit`'d
 								    functions must be run in the same order on all processes, but they can be
 								    interspersed with arbitrary operations running in a single process.
 								  Args:
 								    fun: Function to be compiled. Should be a pure function, as side-effects may
 								      only be executed once. Its arguments and return value should be arrays,
 								      scalars, or (nested) standard Python containers (tuple/list/dict) thereof.
 								      Positional arguments indicated by ``static_argnums`` can be anything at
 								      all, provided they are hashable and have an equality operation defined.
 								      Static arguments are included as part of a compilation cache key, which is
 								      why hash and equality operators must be defined.
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    in_shardings: Pytree of structure matching that of arguments to ``fun``,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      with all actual arguments replaced by resource assignment specifications.
 								      It is also valid to specify a pytree prefix (e.g. one value in place of a
 								      whole subtree), in which case the leaves get broadcast to all values in
 								      that subtree.
-												Improve the error message raised from jax.jit if Pspec or None is passed

PiperOrigin-RevId: 522377813

											
										
										
											2023-04-06 10:49:57 -07:00
+								      The ``in_shardings`` argument is optional. JAX will infer the shardings
 								      from the input :py:class:`jax.Array`'s, and defaults to replicating the input
 								      if the sharding cannot be inferred.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      The valid resource assignment specifications are:
-												fix formatting in pjit doc

											
										
										
											2023-04-07 09:35:51 -07:00
 								      - :py:class:`XLACompatibleSharding`, which will decide how the value
 								        will be partitioned. With this, using a mesh context manager is not
 								        required.
 								      - For backwards compatibility, in_shardings still supports ingesting
 								        :py:class:`PartitionSpec` and :py:obj:`None`. These 2 options can
 								        *only* be used with the mesh context manager.
 								        - :py:obj:`None`, in which case the value will be replicated on all devices
 								        - :py:class:`PartitionSpec`, a tuple of length at most equal to the rank
 								          of the partitioned value. Each element can be a :py:obj:`None`, a mesh
 								          axis or a tuple of mesh axes, and specifies the set of resources assigned
 								          to partition the value's dimension matching its position in the spec.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								      The size of every dimension has to be a multiple of the total number of
 								      resources assigned to it.
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    out_shardings: Like ``in_shardings``, but specifies resource
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      assignment for function outputs.
-												fix formatting in pjit doc

											
										
										
											2023-04-07 09:35:51 -07:00
+								      The ``out_shardings`` argument is optional. If not specified, :py:func:`jax.jit`
 								      will use GSPMD's sharding propagation to determine how to shard the outputs.
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    in_axis_resources: (Deprecated) Please use in_shardings.
 								    out_axis_resources: (Deprecated) Please use out_shardings.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    static_argnums: An optional int or collection of ints that specify which
 								      positional arguments to treat as static (compile-time constant).
 								      Operations that only depend on static arguments will be constant-folded in
 								      Python (during tracing), and so the corresponding argument values can be
 								      any Python object.
 								      Static arguments should be hashable, meaning both ``__hash__`` and
 								      ``__eq__`` are implemented, and immutable. Calling the jitted function
 								      with different values for these constants will trigger recompilation.
 								      Arguments that are not arrays or containers thereof must be marked as
 								      static.
 								      If ``static_argnums`` is not provided, no arguments are treated as static.
 								    static_argnames: An optional string or collection of strings specifying
 								      which named arguments to treat as static (compile-time constant). See the
 								      comment on ``static_argnums`` for details. If not
 								      provided but ``static_argnums`` is set, the default is based on calling
 								      ``inspect.signature(fun)`` to find corresponding named arguments.
 								    donate_argnums: Specify which argument buffers are "donated" to the computation.
 								      It is safe to donate argument buffers if you no longer need them once the
 								      computation has finished. In some cases XLA can make use of donated
 								      buffers to reduce the amount of memory needed to perform a computation,
 								      for example recycling one of your input buffers to store a result. You
 								      should not reuse buffers that you donate to a computation, JAX will raise
 								      an error if you try to.
-												[docs] donate_argnums FAQ link to rst format

											
										
										
											2023-01-10 18:11:08 +09:00
+								      For more details on buffer donation see the `FAQ <https://jax.readthedocs.io/en/latest/faq.html#buffer-donation>`_.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    keep_unused: If `False` (the default), arguments that JAX determines to be
 								      unused by `fun` *may* be dropped from resulting compiled XLA executables.
 								      Such arguments will not be transferred to the device nor provided to the
 								      underlying executable. If `True`, unused arguments will not be pruned.
 								    device: This argument is deprecated. Please put your arguments on the
 								      device you want before passing them to jit.
 								      Optional, the Device the jitted function will run on. (Available devices
 								      can be retrieved via :py:func:`jax.devices`.) The default is inherited
 								      from XLA's DeviceAssignment logic and is usually to use
 								      ``jax.devices()[0]``.
 								    backend: This argument is deprecated. Please put your arguments on the
 								      backend you want before passing them to jit.
 								      Optional, a string representing the XLA backend: ``'cpu'``, ``'gpu'``, or
 								      ``'tpu'``.
-												fix formatting in pjit doc

											
										
										
											2023-04-07 09:35:51 -07:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  Returns:
 								    A wrapped version of ``fun``, set up for just-in-time compilation and
 								    automatically partitioned by the mesh available at each call site.
 								  For example, a convolution operator can be automatically partitioned over
 								  an arbitrary set of devices by a single :func:`~pjit` application:
 								  >>> import jax
 								  >>> import jax.numpy as jnp
 								  >>> import numpy as np
-												Move functions into `api_util.py` and `dispatch.py` to remove circular import error when pjit is imported in `api.py` for merging the `jit` and `pjit` frontend API.

PiperOrigin-RevId: 497172760

											
										
										
											2022-12-22 08:40:36 -08:00
+								  >>> from jax.sharding import Mesh, PartitionSpec
 								  >>> from jax.experimental.pjit import pjit
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  >>>
 								  >>> x = jnp.arange(8, dtype=jnp.float32)
 								  >>> f = pjit(lambda x: jax.numpy.convolve(x, jnp.asarray([0.5, 1.0, 0.5]), 'same'),
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  ...         in_shardings=None, out_shardings=PartitionSpec('devices'))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  >>> with Mesh(np.array(jax.devices()), ('devices',)):
 								  ...   print(f(x))  # doctest: +SKIP
 								  [ 0.5  2.   4.   6.   8.  10.  12.  10. ]
 								  """
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  in_shardings, out_shardings = _resolve_axis_resources_and_shardings_arg(
 								      in_shardings, out_shardings, in_axis_resources, out_axis_resources)
 								  (in_shardings, out_shardings, donate_argnums, static_argnums,
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								   static_argnames) = pre_infer_params(
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								       fun, in_shardings, out_shardings, donate_argnums,
-												Add abstracted axes to pjit to make jax2tf tests pass. abstracted_axes and dynamic_shapes is not supported by pjit yet.

PiperOrigin-RevId: 502138836

											
										
										
											2023-01-14 20:16:57 -08:00
+								       static_argnums, static_argnames, device, backend, abstracted_axes)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Remove _global_avals from infer_params because everything is global in pjit after jax.Array was enabled.

PiperOrigin-RevId: 500012042

											
										
										
											2023-01-05 14:38:58 -08:00
+								  def infer_params(*args, **kwargs):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    # Putting this outside of wrapped would make resources lexically scoped
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								    resource_env = mesh_lib.thread_resources.env
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    pjit_info_args = PjitInfo(
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								          fun=fun, in_shardings=in_shardings,
 								          out_shardings=out_shardings, static_argnums=static_argnums,
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								          static_argnames=static_argnames, donate_argnums=donate_argnums,
 								          device=device, backend=backend, keep_unused=keep_unused,
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								          inline=inline, resource_env=resource_env,
 								          abstracted_axes=abstracted_axes)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    return common_infer_params(pjit_info_args, *args, **kwargs)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
+								  has_explicit_sharding = _pjit_explicit_sharding(
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								      in_shardings, out_shardings, device, backend)
-												Add abstracted axes to pjit to make jax2tf tests pass. abstracted_axes and dynamic_shapes is not supported by pjit yet.

PiperOrigin-RevId: 502138836

											
										
										
											2023-01-14 20:16:57 -08:00
+								  return post_infer_params(fun, infer_params, static_argnums, static_argnames,
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
+								                           donate_argnums, abstracted_axes,
 								                           has_explicit_sharding)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def hashable_pytree(pytree):
 								  vals, treedef = tree_flatten(pytree)
 								  vals = tuple(vals)
 								  return HashableFunction(lambda: tree_unflatten(treedef, vals),
 								                          closure=(treedef, vals))
 								@lru_cache(maxsize=4096)
 								def _create_mesh_pspec_sharding_from_parsed_pspec(mesh, x):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if is_unspecified_or_auto(x):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return x
-												Prune accidental exports from jax.interpreters.pxla.

These imports do not appear to have users outside JAX itself.

PiperOrigin-RevId: 507835295

											
										
										
											2023-02-07 11:16:01 -08:00
+								  return pxla.create_mesh_pspec_sharding(mesh, x.user_spec, x)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Improve the error raised when wsc is passed a PartitionSpec without a mesh context manager

PiperOrigin-RevId: 529260748

											
										
										
											2023-05-03 19:28:54 -07:00
+								def _create_sharding_for_array(mesh, x, name, api_name):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if isinstance(x, XLACompatibleSharding) or is_unspecified_or_auto(x):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return x
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  if mesh is None:
-												Improve the error message raised from jax.jit if Pspec or None is passed

PiperOrigin-RevId: 522377813

											
										
										
											2023-04-06 10:49:57 -07:00
+								    msg = ('jax.jit only supports `XLACompatibleSharding`s being passed to'
 								           f' {name}. Looks like you are passing either `PartitionSpec` or `None`'
 								           f' which is not allowed in jax.jit.\n')
 								    if name == 'in_shardings':
 								      msg += (f'Note that {name} argument is optional. JAX will infer the shardings'
 								              " from the input jax.Array's and will default to replicating the"
 								              ' input if the sharding cannot be inferred.')
 								    elif name == 'out_shardings':
 								      msg += (f'Note that {name} is optional. If not specified, jax.jit will'
 								              " use GSPMD's sharding propagation to figure out what the sharding"
 								              ' of the output(s) should be.')
 								    raise RuntimeError(msg)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  if mesh.empty:
-												Improve the empty mesh error message raised in pjit if mesh is not used and Pspec is passed to in|out_shardings

PiperOrigin-RevId: 517495400

											
										
										
											2023-03-17 13:33:45 -07:00
+								    raise RuntimeError(
-												Improve the error raised when wsc is passed a PartitionSpec without a mesh context manager

PiperOrigin-RevId: 529260748

											
										
										
											2023-05-03 19:28:54 -07:00
+								        f'{api_name} requires a non-empty mesh if you are passing'
 								        f' `PartitionSpec`s or `None` to {name}! Is a mesh defined at the call'
 								        f' site? Alternatively, provide `XLACompatibleSharding`s to {name} and'
 								        ' then the mesh context manager is not required.')
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  # A nice user error is raised in prepare_axis_resources.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  assert isinstance(x, ParsedPartitionSpec), x
 								  return _create_mesh_pspec_sharding_from_parsed_pspec(mesh, x)
 								def _create_sharding_with_device_backend(device, backend):
 								  if device is not None:
 								    assert backend is None
 								    out = SingleDeviceSharding(device)
 								  elif backend is not None:
 								    assert device is None
 								    out = SingleDeviceSharding(
 								        xb.get_backend(backend).get_default_device_assignment(1)[0])
 								  return out
 								def flatten_axis_resources(what, tree, shardings, tupled_args):
 								  try:
 								    return tuple(flatten_axes(what, tree, shardings, tupled_args=tupled_args))
 								  except ValueError:
 								    pass  # Raise a tree prefix error below
 								  # Tree leaves are always valid prefixes, so if there was a prefix error as
 								  # assumed here, axis_resources must not be a leaf.
 								  assert not treedef_is_leaf(tree_structure(shardings))
 								  # Check the type directly rather than using isinstance because of namedtuples.
 								  if tupled_args and (type(shardings) is not tuple or
 								                      len(shardings) != len(tree.children())):
 								    # We know axis_resources is meant to be a tuple corresponding to the args
 								    # tuple, but while it is a non-leaf pytree, either it wasn't a tuple or it
 								    # wasn't the right length.
 								    msg = (f"{what} specification must be a tree prefix of the positional "
 								           f"arguments tuple passed to the `pjit`-decorated function. In "
 								           f"particular, {what} must either be a None, a PartitionSpec, or "
 								           f"a tuple of length equal to the number of positional arguments.")
 								    # If `tree` represents an args tuple, then `axis_resources` must be a tuple.
 								    # TODO(mattjj,apaszke): disable implicit list casts, remove 'or list' below
 								    if type(shardings) is not tuple:
 								      msg += f" But {what} is not a tuple: got {type(shardings)} instead."
 								    elif len(shardings) != len(tree.children()):
 								      msg += (f" But {what} is the wrong length: got a tuple or list of length "
 								              f"{len(shardings)} for an args tuple of length "
 								              f"{len(tree.children())}.")
 								    # As an extra hint, let's check if the user just forgot to wrap
 								    # shardings in a singleton tuple.
 								    if len(tree.children()) == 1:
 								      try: flatten_axes(what, tree, (shardings,))
 								      except ValueError: pass  # That's not the issue.
 								      else:
 								        msg += (f" Given the corresponding argument being "
 								                f"passed, it looks like {what} might need to be wrapped in "
 								                f"a singleton tuple.")
 								    raise ValueError(msg)
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  axis_tree = shardings
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[jax2tf] Fix grad of pjit in native lowering.

Since jax2tf.convert is called recursively for the purpose of
serializing the vjp function, we must ensure that if the primal
function is a pjit with shardings then the vjp function must also
be converted as a pjit.

Without this fix the serialization with gradients of a pjit function
will fail the an error that there are shardings but not pjit at
the top-level.

											
										
										
											2023-03-21 06:01:10 +01:00
+								  # Because we only have the `tree` treedef and not the full pytree here,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  # we construct a dummy tree to compare against. Revise this in callers?
 								  dummy_tree = tree_unflatten(tree, [PytreeLeaf()] * tree.num_leaves)
 								  errors = prefix_errors(axis_tree, dummy_tree)
 								  if errors:
 								    e = errors[0]  # Only show information about the first disagreement found.
 								    raise e(what)
 								  # At this point we've failed to find a tree prefix error.
 								  assert False, "Please open a bug report!"  # This should be unreachable.
 								class PytreeLeaf:
 								  def __repr__(self): return "pytree leaf"
 								@lru_cache(maxsize=4096)
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								def _process_in_axis_resources(in_shardings_thunk, in_avals, in_tree,
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								                               resource_env, debug_info, device_or_backend_set):
-												Don't depend on `flatten_axis_resources` which will error because `flatten_axes` passes a dummy `object()` which doesn't work with checks in user pytrees.

Only do this if the original {in|out}_shardings are _UNSPECIFIED.

PiperOrigin-RevId: 502792305

											
										
										
											2023-01-18 00:12:25 -08:00
+								  orig_in_shardings = in_shardings_thunk()
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								  # Only do this if original in_shardings are unspecified. If it is AUTO, go
 								  # via flatten_axis_resources.
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if is_unspecified(orig_in_shardings):
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								    in_shardings_flat = (orig_in_shardings,) * len(in_avals)
-												Don't depend on `flatten_axis_resources` which will error because `flatten_axes` passes a dummy `object()` which doesn't work with checks in user pytrees.

Only do this if the original {in|out}_shardings are _UNSPECIFIED.

PiperOrigin-RevId: 502792305

											
										
										
											2023-01-18 00:12:25 -08:00
+								  else:
 								    in_shardings_flat = flatten_axis_resources(
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								          "pjit in_shardings", in_tree, orig_in_shardings,
-												Don't depend on `flatten_axis_resources` which will error because `flatten_axes` passes a dummy `object()` which doesn't work with checks in user pytrees.

Only do this if the original {in|out}_shardings are _UNSPECIFIED.

PiperOrigin-RevId: 502792305

											
										
										
											2023-01-18 00:12:25 -08:00
+								          tupled_args=True)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  if not config.jax_dynamic_shapes:
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								    pjit_check_aval_sharding(in_shardings_flat, in_avals,
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								                             None if debug_info is None else debug_info.arg_names,
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								                             "pjit arguments", allow_uneven_sharding=False)
-												Remove GDA from JAX since jax.Array is the default type and cannot be disabled anymore as per https://jax.readthedocs.io/en/latest/jax_array_migration.html#how-can-i-disable-jax-array-for-now

PiperOrigin-RevId: 516905931

											
										
										
											2023-03-15 12:59:33 -07:00
+								  canonicalized_shardings = tuple(
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								      i if is_unspecified_or_auto(i) else
 								      to_gspmd_sharding(i, aval.ndim, device_or_backend_set)
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								      for i, aval in zip(in_shardings_flat, in_avals))
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								  return canonicalized_shardings
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								@lu.cache
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								def _create_pjit_jaxpr(fun, in_type, debug_info, out_paths):
-												Remove the f-string evaluation during logging the elapsed time by passing in fun_name to log_elapsed_time

PiperOrigin-RevId: 532132574

											
										
										
											2023-05-15 09:15:22 -07:00
+								  with dispatch.log_elapsed_time(
 								      "Finished tracing + transforming {fun_name} for pjit in {elapsed_time} sec",
 								      fun_name=fun.__name__, event=dispatch.JAXPR_TRACE_EVENT):
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								    pe_debug = debug_info and pe.debug_info_final(fun, debug_info.traced_for)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    if config.jax_dynamic_shapes:
 								      jaxpr, global_out_avals, consts = pe.trace_to_jaxpr_dynamic2(
 								          lu.annotate(fun, in_type), debug_info=pe_debug)
 								    else:
 								      jaxpr, global_out_avals, consts = pe.trace_to_jaxpr_dynamic(
 								          fun, in_type, debug_info=pe_debug)
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  if not config.jax_dynamic_shapes:
 								    jaxpr = jaxpr_debug_info(jaxpr, debug_info, out_paths())
-												Fix pjit's initial style usage of consts.

Instead of smuggling them via the jaxpr, pull it out and pass them with args. This is because consts can be tracers and that fails down the stack when lowering to mlir.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 500544141

											
										
										
											2023-01-08 10:37:40 -08:00
 								  if any(isinstance(c, core.Tracer) for c in consts):
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								    closed_jaxpr = pe.close_jaxpr(pe.convert_constvars_jaxpr(jaxpr))
-												Fix pjit's initial style usage of consts.

Instead of smuggling them via the jaxpr, pull it out and pass them with args. This is because consts can be tracers and that fails down the stack when lowering to mlir.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 500544141

											
										
										
											2023-01-08 10:37:40 -08:00
+								    final_consts = consts
 								  else:
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								    closed_jaxpr = core.ClosedJaxpr(jaxpr, consts)
-												Fix pjit's initial style usage of consts.

Instead of smuggling them via the jaxpr, pull it out and pass them with args. This is because consts can be tracers and that fails down the stack when lowering to mlir.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 500544141

											
										
										
											2023-01-08 10:37:40 -08:00
+								    final_consts = []
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								  return closed_jaxpr, final_consts, global_out_avals
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
+								@lru_cache(maxsize=4096)
 								def _check_and_canonicalize_out_shardings(
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								    out_shardings_thunk, out_tree, out_type, debug_info, device_or_backend_set):
-												Don't depend on `flatten_axis_resources` which will error because `flatten_axes` passes a dummy `object()` which doesn't work with checks in user pytrees.

Only do this if the original {in|out}_shardings are _UNSPECIFIED.

PiperOrigin-RevId: 502792305

											
										
										
											2023-01-18 00:12:25 -08:00
+								  orig_out_shardings = out_shardings_thunk()
-												Merge pull request #14248 from jakevdp:dead-code

PiperOrigin-RevId: 506405131

											
										
										
											2023-02-01 21:25:46 +00:00
+								  # TODO(yashkatariya): Remove the if branch and fix flatten_axis_resources
 								  # instead. This condition exists because flatten_axis_resources passes in an
 								  # `object()` while unflattening which breaks assertion is user defined
 								  # pytrees (which shouldn't exist but they do).
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if (is_unspecified(orig_out_shardings) or
-												Merge pull request #14248 from jakevdp:dead-code

PiperOrigin-RevId: 506405131

											
										
										
											2023-02-01 21:25:46 +00:00
+								      isinstance(orig_out_shardings, XLACompatibleSharding)):
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    out_shardings_flat = (orig_out_shardings,) * len(out_type)
-												Don't depend on `flatten_axis_resources` which will error because `flatten_axes` passes a dummy `object()` which doesn't work with checks in user pytrees.

Only do this if the original {in|out}_shardings are _UNSPECIFIED.

PiperOrigin-RevId: 502792305

											
										
										
											2023-01-18 00:12:25 -08:00
+								  else:
 								    out_shardings_flat = flatten_axis_resources(
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								        "pjit out_shardings", out_tree(), orig_out_shardings,
-												Don't depend on `flatten_axis_resources` which will error because `flatten_axes` passes a dummy `object()` which doesn't work with checks in user pytrees.

Only do this if the original {in|out}_shardings are _UNSPECIFIED.

PiperOrigin-RevId: 502792305

											
										
										
											2023-01-18 00:12:25 -08:00
+								        tupled_args=False)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  if not config.jax_dynamic_shapes:
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								    pjit_check_aval_sharding(
 								        out_shardings_flat, out_type,
 								        None if debug_info is None else debug_info.result_paths,
 								        "pjit outputs", allow_uneven_sharding=False)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  canonicalized_out_shardings_flat = tuple(
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								      o if is_unspecified(o) or is_auto(o) else
 								      to_gspmd_sharding(o, aval.ndim, device_or_backend_set)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								      for o, aval in zip(out_shardings_flat, out_type)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  )
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
+								  return canonicalized_out_shardings_flat
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								def _pjit_jaxpr(fun, out_shardings_thunk, in_type, debug_info,
 								                device_or_backend_set, out_tree, result_paths):
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  jaxpr, final_consts, out_type = _create_pjit_jaxpr(
 								      fun, in_type, debug_info, result_paths)
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
+								  canonicalized_out_shardings_flat = _check_and_canonicalize_out_shardings(
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								      out_shardings_thunk, out_tree, tuple(out_type), jaxpr.jaxpr.debug_info,
 								      device_or_backend_set)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  # lu.cache needs to be able to create weakrefs to outputs, so we can't return a plain tuple
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
+								  return jaxpr, final_consts, canonicalized_out_shardings_flat
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def pjit_check_aval_sharding(
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								    shardings, flat_avals, names: Optional[Tuple[str, ...]],
 								    what_aval: str, allow_uneven_sharding: bool):
 								  new_names = [''] * len(shardings) if names is None else names
 								  for aval, s, name in zip(flat_avals, shardings, new_names):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    if is_unspecified_or_auto(s):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      continue
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								    name_str = f' with pytree key path {name}' if name else ''
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    shape = aval.shape
 								    try:
 								      # Sharding interfaces can implement `is_compatible_aval` as an optional
 								      # method to raise a more meaningful error.
 								      if hasattr(s, 'is_compatible_aval'):
 								        s.is_compatible_aval(shape)
 								      else:
 								        s._to_xla_op_sharding(len(shape))
 								    except ValueError as e:
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								      raise ValueError(
 								          f'One of {what_aval}{name_str} is incompatible with its sharding '
 								          f'annotation {s}: {str(e)}')
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    # Use the `OpSharding` proto to find out how many ways each dimension of
 								    # the aval is sharded. This approach will work across all
 								    # XLACompatibleSharding.
 								    op_sharding = s._to_xla_op_sharding(len(shape))
 								    assert op_sharding is not None
-												Rename jax._src.sharding_utils to jax._src.op_shardings.

Move some more op_sharding related helpers to that module.

PiperOrigin-RevId: 522343010

											
										
										
											2023-04-06 08:31:47 -07:00
+								    num_ways_dim_sharded, _ = op_shardings.get_num_ways_dim_sharded(
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								        cast(xc.OpSharding, op_sharding))
 								    for i, size in enumerate(num_ways_dim_sharded):
 								      if not allow_uneven_sharding and shape[i] % size != 0:
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								        raise ValueError(f"One of {what_aval}{name_str} was given the sharding "
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								                         f"of {s}, which implies that "
-												Remove global_str since all avals in pjit are global

PiperOrigin-RevId: 522443476

											
										
										
											2023-04-06 14:51:30 -07:00
+								                         f"the global size of its dimension {i} should be "
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								                         f"divisible by {size}, but it is equal to {shape[i]} "
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								                         f"(full shape: {shape})")
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								# -------------------- pjit rules --------------------
-												Make `pjit` an AxisPrimitive so that it can run the batching rules even if the argument is not batched but there is a axis_index/named shapes inside the pjitted function.

PiperOrigin-RevId: 502955369

											
										
										
											2023-01-18 12:55:31 -08:00
+								pjit_p = core.AxisPrimitive("pjit")
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								pjit_p.multiple_results = True
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								def _resolve_in_shardings(
 								    args, pjit_in_shardings: Sequence[PjitSharding],
 								    out_shardings: Sequence[PjitSharding],
 								    pjit_mesh: Optional[pxla.Mesh]) -> Sequence[PjitSharding]:
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  # If True, means that device or backend is set by the user on pjit and it
 								  # has the same semantics as device_put i.e. doesn't matter which device the
 								  # arg is on, reshard it to the device mentioned. So don't do any of the
 								  # checks and just return the pjit_in_shardings directly. `shard_args` will
 								  # handle the resharding.
-												Prune accidental exports from jax.interpreters.pxla.

These imports do not appear to have users outside JAX itself.

PiperOrigin-RevId: 507835295

											
										
										
											2023-02-07 11:16:01 -08:00
+								  if pxla.check_device_backend_on_shardings(pjit_in_shardings):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return pjit_in_shardings
 								  committed_arg_shardings = []
 								  for a in args:
 								    if hasattr(a, 'sharding'):
 								      arg_s = a.sharding
 								      if not isinstance(arg_s, XLACompatibleSharding):
 								        raise ValueError(f'One of the argument to pjit got sharding {arg_s} '
 								                         'which is not a subclass of XLACompatibleSharding.')
-												Reshard pmap unconditionally if arguments with PmapSharding are passed to pjit. This is to support all the jit use cases with pjit to merge their API.

PiperOrigin-RevId: 499338100

											
										
										
											2023-01-03 16:08:07 -08:00
+								      # Don't consider PmapSharding inputs as committed. They will get resharded
 								      # unconditionally.
 								      if isinstance(arg_s, PmapSharding):
 								        continue
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      if getattr(a, '_committed', True):
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								        committed_arg_shardings.append((arg_s, pxla.MismatchType.ARG_SHARDING, None))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  # Check if the device_assignment across inputs, outputs and arguments is the
 								  # same.
 								  pxla._get_and_check_device_assignment(
 								      it.chain(
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								          committed_arg_shardings,
 								          [(i, pxla.MismatchType.IN_SHARDING, None) for i in pjit_in_shardings],
 								          [(o, pxla.MismatchType.OUT_SHARDING, None) for o in out_shardings]),
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      (None if pjit_mesh is None or pjit_mesh.empty else list(pjit_mesh.devices.flat)))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  resolved_in_shardings = []
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  for arg, pjit_in_s in zip(args, pjit_in_shardings):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    arg_s, committed = ((arg.sharding, getattr(arg, '_committed', True))
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								                        if hasattr(arg, 'sharding') else (UNSPECIFIED, False))
 								    if is_unspecified(pjit_in_s):
 								      if is_unspecified(arg_s):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								        resolved_in_shardings.append(arg_s)
 								      else:
 								        if committed:
-												Reshard pmap unconditionally if arguments with PmapSharding are passed to pjit. This is to support all the jit use cases with pjit to merge their API.

PiperOrigin-RevId: 499338100

											
										
										
											2023-01-03 16:08:07 -08:00
+								          # If the arg has a PmapSharding, then reshard it unconditionally.
 								          if isinstance(arg_s, PmapSharding):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								            resolved_in_shardings.append(UNSPECIFIED)
-												Reshard pmap unconditionally if arguments with PmapSharding are passed to pjit. This is to support all the jit use cases with pjit to merge their API.

PiperOrigin-RevId: 499338100

											
										
										
											2023-01-03 16:08:07 -08:00
+								          else:
-												Replace `op_sharding_sharding` with `gspmd_sharding`. This is purely an internal change.

PiperOrigin-RevId: 510562354

											
										
										
											2023-02-17 17:52:37 -08:00
+								            resolved_in_shardings.append(to_gspmd_sharding(
-												Reshard pmap unconditionally if arguments with PmapSharding are passed to pjit. This is to support all the jit use cases with pjit to merge their API.

PiperOrigin-RevId: 499338100

											
										
										
											2023-01-03 16:08:07 -08:00
+								                cast(XLACompatibleSharding, arg_s), arg.ndim))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								        else:
 								          if dispatch.is_single_device_sharding(arg_s):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								            resolved_in_shardings.append(UNSPECIFIED)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								          else:
 								            raise NotImplementedError('Having uncommitted Array sharded on '
 								                                      'multiple devices is not supported.')
 								    else:
-												Rename jax._src.sharding_utils to jax._src.op_shardings.

Move some more op_sharding related helpers to that module.

PiperOrigin-RevId: 522343010

											
										
										
											2023-04-06 08:31:47 -07:00
+								      if (isinstance(arg, np.ndarray) and
-												Add is_fully_replicated method to Shardings. This allows to scrub the usage of is_op_sharding_replicated from JAX because we can just query it on Shardings and save an expensive round trip to OpSharding creation.

PiperOrigin-RevId: 524379122

											
										
										
											2023-04-14 13:55:52 -07:00
+								          not pjit_in_s.is_fully_replicated and  # type: ignore
 								          xb.process_count() > 1):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								        raise ValueError(
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								            'Passing non-trivial shardings for numpy '
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								            'inputs is not allowed. To fix this error, either specify a '
 								            'replicated sharding explicitly or use '
 								            '`jax.experimental.multihost_utils.host_local_array_to_global_array(...)` '
 								            'to convert your host local numpy inputs to a jax.Array which you '
 								            'can pass to pjit. '
 								            'If the numpy input is the same on each process, then you can use '
 								            '`jax.make_array_from_callback(...) to create a `jax.Array` which '
 								            'you can pass to pjit. '
 								            'Please see the jax.Array migration guide for more information '
 								            'https://jax.readthedocs.io/en/latest/jax_array_migration.html#handling-of-host-local-inputs-to-pjit-like-batch-etc. '
 								            f'Got arg shape: {arg.shape}, arg value: {arg}')
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								      if not is_unspecified(arg_s):
-												Reshard pmap unconditionally if arguments with PmapSharding are passed to pjit. This is to support all the jit use cases with pjit to merge their API.

PiperOrigin-RevId: 499338100

											
										
										
											2023-01-03 16:08:07 -08:00
+								        if (committed and
 								            not isinstance(arg_s, PmapSharding) and
-												Rename jax._src.sharding_utils to jax._src.op_shardings.

Move some more op_sharding related helpers to that module.

PiperOrigin-RevId: 522343010

											
										
										
											2023-04-06 08:31:47 -07:00
+								            not op_shardings.are_op_shardings_equal(
-												Fix pjit's initial style usage of consts.

Instead of smuggling them via the jaxpr, pull it out and pass them with args. This is because consts can be tracers and that fails down the stack when lowering to mlir.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 500544141

											
										
										
											2023-01-08 10:37:40 -08:00
+								                pjit_in_s._to_xla_op_sharding(arg.ndim),  # type: ignore
-												Reshard pmap unconditionally if arguments with PmapSharding are passed to pjit. This is to support all the jit use cases with pjit to merge their API.

PiperOrigin-RevId: 499338100

											
										
										
											2023-01-03 16:08:07 -08:00
+								                arg_s._to_xla_op_sharding(arg.ndim))):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								          op =  getattr(pjit_in_s, '_original_sharding', pjit_in_s)
 								          raise ValueError('Sharding passed to pjit does not match the sharding '
 								                           'on the respective arg. '
 								                           f'Got pjit sharding: {op},\n'
 								                           f'arg sharding: {arg_s} for arg shape: {arg.shape}, '
 								                           f'arg value: {arg}')
 								      resolved_in_shardings.append(pjit_in_s)
 								  return tuple(resolved_in_shardings)
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								def _pjit_call_impl_python(
 								    *args, jaxpr, in_shardings, out_shardings, resource_env, donated_invars,
 								    name, keep_unused, inline):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  global _most_recent_pjit_call_executable
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  in_shardings = _resolve_in_shardings(
 								      args, in_shardings, out_shardings,
 								      resource_env.physical_mesh if resource_env is not None else None)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  compiled = _pjit_lower(
 								      jaxpr, in_shardings, out_shardings, resource_env,
-												Default jax_spmd_mode to allow_jit which will allow explicit jax.jit to not raise the multihost error (since jit and pjit have been merged).

Implicit jit and apply_primitive will still raise an error though (which is recognized via inline parameter). Majority of jnp operations in JAX should be inlined.

PiperOrigin-RevId: 527398394

											
										
										
											2023-04-26 15:54:50 -07:00
+								      donated_invars, name, keep_unused, inline,
-												Remove _allow_propagation_to_outputs from `compile` in MeshComputation since after jax.Array it is not required and can just default to being set to True if a sharding is unspecified.

PiperOrigin-RevId: 523851611

											
										
										
											2023-04-12 17:37:52 -07:00
+								      always_lower=False, lowering_platform=None).compile()
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  _most_recent_pjit_call_executable.value = compiled
 								  # This check is expensive so only do it if enable_checks is on.
 								  if compiled._auto_spmd_lowering and config.jax_enable_checks:
-												Plumb debug_info to meshExecutable as a optional arg to raise better error messages.

PiperOrigin-RevId: 525521694

											
										
										
											2023-04-19 12:35:15 -07:00
+								    pxla.check_gda_or_array_xla_sharding_match(args, compiled._in_shardings,
 								                                               jaxpr.jaxpr.debug_info)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  if config.jax_distributed_debug:
 								    # Defensively only perform fingerprint logic if debug logging is enabled
 								    # NOTE(skyewm): I didn't benchmark this
 								    fingerprint = None
 								    if hasattr(compiled.runtime_executable(), "fingerprint"):
 								      fingerprint = compiled.runtime_executable().fingerprint
 								    if fingerprint is not None:
 								      fingerprint = fingerprint.hex()
 								    distributed_debug_log(("Running pjit'd function", name),
 								                          ("in_shardings", in_shardings),
 								                          ("out_shardings", out_shardings),
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								                          ("abstract args", map(xla.abstractify, args)),
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								                          ("fingerprint", fingerprint))
-												Fix debug nans test after merging `jit` and `pjit` codepaths

PiperOrigin-RevId: 501122848

											
										
										
											2023-01-10 16:26:18 -08:00
+								  try:
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								    return compiled.unsafe_call(*args), compiled
-												Fix debug nans test after merging `jit` and `pjit` codepaths

PiperOrigin-RevId: 501122848

											
										
										
											2023-01-10 16:26:18 -08:00
+								  except FloatingPointError:
 								    assert config.jax_debug_nans or config.jax_debug_infs  # compiled_fun can only raise in this case
-												Catch the NaN's and raise a better error message when jax_debug_nans flag is True.

PiperOrigin-RevId: 509552717

											
										
										
											2023-02-14 09:26:53 -08:00
 								    _ = core.jaxpr_as_fun(jaxpr)(*args)  # may raise, not return
 								    # If control reaches this line, we got a NaN on the output of `compiled`
 								    # but not `fun.call_wrapped` on the same arguments. Let's tell the user.
-												Fix debug nans test after merging `jit` and `pjit` codepaths

PiperOrigin-RevId: 501122848

											
										
										
											2023-01-10 16:26:18 -08:00
+								    msg = ("An invalid value was encountered in the output of the "
 								           f"`jit`-decorated function {name}. Because "
 								           "config.jax_debug_nans and/or config.jax_debug_infs is set, the "
 								           "de-optimized function (i.e., the function as if the `jit` "
 								           "decorator were removed) was called in an attempt to get a more "
 								           "precise error message. However, the de-optimized function did not "
 								           "produce invalid values during its execution. This behavior can "
-												Fix typo "invalud" -> "invalid" in error message.

PiperOrigin-RevId: 503452691

											
										
										
											2023-01-20 08:47:45 -08:00
+								           "result from `jit` optimizations causing the invalid value to be "
-												Fix debug nans test after merging `jit` and `pjit` codepaths

PiperOrigin-RevId: 501122848

											
										
										
											2023-01-10 16:26:18 -08:00
+								           "produced. It may also arise from having nan/inf constants as "
 								           "outputs, like `jax.jit(lambda ...: jax.numpy.nan)(...)`. "
 								           "\n\n"
 								           "It may be possible to avoid the invalid value by removing the "
 								           "`jit` decorator, at the cost of losing optimizations. "
 								           "\n\n"
 								           "If you see this error, consider opening a bug report at "
 								           "https://github.com/google/jax.")
 								    raise FloatingPointError(msg)
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
 								@weakref_lru_cache
 								def _get_jaxpr_as_fun(jaxpr, in_shardings, out_shardings, resource_env,
 								                      donated_invars, name, keep_unused, inline):
 								  # The input jaxpr to `_get_jaxpr_as_fun` is under a weakref_lru_cache so
 								  # returning `core.jaxpr_as_fun(jaxpr)` directly creates a strong reference to
 								  # the jaxpr defeating the purpose of weakref_lru_cache. So return a function
 								  # that closes over a weakrefed jaxpr and gets called inside that function.
 								  # This way there won't be a strong reference to the jaxpr from the output
 								  # function.
 								  jaxpr = weakref.ref(jaxpr)
 								  return lambda *args: core.jaxpr_as_fun(jaxpr())(*args)  # pylint: disable=unnecessary-lambda
 								def _pjit_call_impl(*args, jaxpr,
 								                    in_shardings, out_shardings, resource_env,
 								                    donated_invars, name, keep_unused, inline):
 								  def call_impl_cache_miss(*args_, **kwargs_):
 								    out_flat, compiled = _pjit_call_impl_python(
 								        *args, jaxpr=jaxpr, in_shardings=in_shardings,
 								        out_shardings=out_shardings, resource_env=resource_env,
 								        donated_invars=donated_invars, name=name, keep_unused=keep_unused,
 								        inline=inline)
 								    fastpath_data = _get_fastpath_data(
 								        compiled, tree_structure(out_flat), args, out_flat)
 								    return out_flat, fastpath_data
 								  f = _get_jaxpr_as_fun(
 								      jaxpr, tuple(getattr(i, '_original_sharding', i) for i in in_shardings),
 								      tuple(getattr(o, '_original_sharding', o) for o in out_shardings),
 								      resource_env, donated_invars, name, keep_unused, inline)
 								  donated_argnums = [i for i, d in enumerate(donated_invars) if d]
 								  return xc._xla.pjit(name, f, call_impl_cache_miss, [], [], donated_argnums,
 								                      _cpp_pjit_cache)(*args)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								pjit_p.def_impl(_pjit_call_impl)
 								@dataclasses.dataclass(frozen=True)
 								class SameDeviceAssignmentTuple:
 								  shardings: Tuple[PjitSharding, ...]
 								  # device_assignment is Optional because shardings can contain `AUTO` and in
 								  # that case `mesh` is compulsory to be used. So in that case
 								  # `_pjit_lower_cached` cache, resource_env will check against the devices.
 								  device_assignment: Optional[XLADeviceAssignment]
 								  def __hash__(self):
-												Preserve shardings on the output of pjit that were provided on the arguments.

Following are the changes:

* Make _pjit_lower_cached depend on exact sharding equality if `_original_sharding` exists. This top level cache should fill up eventually if users are passing different shardings into the pjit function.
* Split lower_sharding_computation into 3 caches:
  * _trace_to_jaxpr_and_dce cache -- This will return a closed jaxpr which is DCE'd
  * _cached_lowering_to_hlo cache -- This will cache the generation of MHLO. This cache is dependent on the semantic equality of shardings i.e. if 2 shardings lower to the same OpSharding, then there will be a cache hit
  * _cached_compilation cache -- This caches the compilation so that we don't recompile if the shardings are semantically equal.

The way this works is the out_handlers are created again if we pass in different shardings to pjit (but there is no recompilation). This allows us to maintain the shardings passed by the user.

For ops like `jnp.squeeze` where we infer the sharding from the executable, we try to recreate a NamedSharding (right now, more support will be added in following CLs) from the GSPMDSharding since it will be available on the input.

PiperOrigin-RevId: 522991145

											
										
										
											2023-04-09 15:41:32 -07:00
+								    shardings_hash = tuple(
 								        s._op_sharding_hash if isinstance(s, GSPMDSharding) else s  # type: ignore
 								        for s in self.shardings)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    if self.device_assignment is None:
 								      return hash(shardings_hash)
 								    else:
 								      return hash((shardings_hash, *self.device_assignment))
 								  def __eq__(self, other):
 								    if not isinstance(other, SameDeviceAssignmentTuple):
 								      return False
-												Preserve shardings on the output of pjit that were provided on the arguments.

Following are the changes:

* Make _pjit_lower_cached depend on exact sharding equality if `_original_sharding` exists. This top level cache should fill up eventually if users are passing different shardings into the pjit function.
* Split lower_sharding_computation into 3 caches:
  * _trace_to_jaxpr_and_dce cache -- This will return a closed jaxpr which is DCE'd
  * _cached_lowering_to_hlo cache -- This will cache the generation of MHLO. This cache is dependent on the semantic equality of shardings i.e. if 2 shardings lower to the same OpSharding, then there will be a cache hit
  * _cached_compilation cache -- This caches the compilation so that we don't recompile if the shardings are semantically equal.

The way this works is the out_handlers are created again if we pass in different shardings to pjit (but there is no recompilation). This allows us to maintain the shardings passed by the user.

For ops like `jnp.squeeze` where we infer the sharding from the executable, we try to recreate a NamedSharding (right now, more support will be added in following CLs) from the GSPMDSharding since it will be available on the input.

PiperOrigin-RevId: 522991145

											
										
										
											2023-04-09 15:41:32 -07:00
+								    eq = []
 								    for s, o in zip(self.shardings, other.shardings):
 								      s = getattr(s, "_original_sharding", s)
 								      o = getattr(o, "_original_sharding", o)
 								      if isinstance(s, GSPMDSharding) and isinstance(o, GSPMDSharding):
 								        eq.append(op_shardings.are_op_shardings_equal(
 								            s._op_sharding, o._op_sharding))
 								      else:
 								        eq.append(s == o)
 								    return all(eq) and self.device_assignment == other.device_assignment
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def _pjit_lower(
 								    jaxpr: core.ClosedJaxpr,
 								    in_shardings,
 								    out_shardings,
 								    *args, **kwargs):
 								  da = _fast_path_get_device_assignment(it.chain(in_shardings, out_shardings))
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  in_shardings = SameDeviceAssignmentTuple(tuple(in_shardings), da)
 								  out_shardings = SameDeviceAssignmentTuple(tuple(out_shardings), da)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return _pjit_lower_cached(jaxpr, in_shardings, out_shardings, *args, **kwargs)
 								@weakref_lru_cache
 								def _pjit_lower_cached(
 								    jaxpr: core.ClosedJaxpr,
 								    sdat_in_shardings: SameDeviceAssignmentTuple,
 								    sdat_out_shardings: SameDeviceAssignmentTuple,
 								    resource_env,
 								    donated_invars,
 								    name: str,
 								    keep_unused: bool,
-												Default jax_spmd_mode to allow_jit which will allow explicit jax.jit to not raise the multihost error (since jit and pjit have been merged).

Implicit jit and apply_primitive will still raise an error though (which is recognized via inline parameter). Majority of jnp operations in JAX should be inlined.

PiperOrigin-RevId: 527398394

											
										
										
											2023-04-26 15:54:50 -07:00
+								    inline: bool,
-												[jax2tf] Clean up the support for cross-lowering.

In a previous CL we introduced cross-lowering support without any
changes in JAX core, but at the expense of some overly complex code
in jax2tf, along with overriding a JAX core function. Plus, those
changes were not enough to handle some xmap and pmap cases.

Here we introduce a `_experimental_lowering_platform: Optional[str]` parameter
to the `.lower()` methods and then we thread the `lowering_platform`
all the way to the calls to `mlir.lower_jaxpr_to_module2`. That's it.

Note that this parameter to `.lower()` is experimental and not supposed
to be used outside jax2tf. It may also gobble user kwargs.

											
										
										
											2023-02-28 11:30:23 +01:00
+								    always_lower: bool,
 								    *,
 								    lowering_platform: Optional[str]):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  in_shardings: Tuple[PjitShardingMinusUnspecified, ...] = cast(
 								      Tuple[PjitShardingMinusUnspecified, ...], sdat_in_shardings.shardings)
 								  out_shardings: Tuple[PjitSharding, ...] = sdat_out_shardings.shardings
-												Make `pjit` an AxisPrimitive so that it can run the batching rules even if the argument is not batched but there is a axis_index/named shapes inside the pjitted function.

PiperOrigin-RevId: 502955369

											
										
										
											2023-01-18 12:55:31 -08:00
+								  if resource_env is not None:
 								    pxla.resource_typecheck(jaxpr, resource_env, {}, lambda: "pjit")
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
 								  if resource_env is not None:
 								    mesh = resource_env.physical_mesh
-												Fix `name_stack` usage of pjit. Now all the metadata of transformations in hlo are correct.

PiperOrigin-RevId: 501918212

											
										
										
											2023-01-13 12:53:42 -08:00
+								    api_name = 'pjit'
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  else:
-												Fix `name_stack` usage of pjit. Now all the metadata of transformations in hlo are correct.

PiperOrigin-RevId: 501918212

											
										
										
											2023-01-13 12:53:42 -08:00
+								    # resource_env is `None` in the jit wrapper around pjit.
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    mesh = None
-												Fix `name_stack` usage of pjit. Now all the metadata of transformations in hlo are correct.

PiperOrigin-RevId: 501918212

											
										
										
											2023-01-13 12:53:42 -08:00
+								    api_name = 'jit'
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  # For `pjit(xmap)` cases, it needs to take the `lower_mesh_computation` path
 								  # because `xmap` only supports SPMDAxisContext right now.
-												Allow pjit.AUTO to be used with jax.jit. This introduces an API change which requires a mesh to be provided to pjit.AUTO(mesh).

`with mesh:` is no longer required with pjit to use the auto spmd pass of GSPMD.

PiperOrigin-RevId: 533801596

											
										
										
											2023-05-20 22:59:52 -07:00
+								  if dispatch.jaxpr_has_primitive(jaxpr.jaxpr, 'xmap'):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return pxla.lower_mesh_computation(
-												Pass the `jaxpr` from `pjit` since there is no need to trace it again in lower_sharding_computation. It also helps in preserving debug_info that already exists on the jaxpr to surface it in MHLO eventually.

PiperOrigin-RevId: 513268085

											
										
										
											2023-03-01 10:04:59 -08:00
+								      jaxpr, api_name, name, mesh,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      in_shardings, out_shardings, donated_invars,
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								      True, jaxpr.in_avals, tiling_method=None,
-												[jax2tf] Clean up the support for cross-lowering.

In a previous CL we introduced cross-lowering support without any
changes in JAX core, but at the expense of some overly complex code
in jax2tf, along with overriding a JAX core function. Plus, those
changes were not enough to handle some xmap and pmap cases.

Here we introduce a `_experimental_lowering_platform: Optional[str]` parameter
to the `.lower()` methods and then we thread the `lowering_platform`
all the way to the calls to `mlir.lower_jaxpr_to_module2`. That's it.

Note that this parameter to `.lower()` is experimental and not supposed
to be used outside jax2tf. It may also gobble user kwargs.

											
										
										
											2023-02-28 11:30:23 +01:00
+								      lowering_platform=lowering_platform)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  else:
 								    return pxla.lower_sharding_computation(
-												Preserve shardings on the output of pjit that were provided on the arguments.

Following are the changes:

* Make _pjit_lower_cached depend on exact sharding equality if `_original_sharding` exists. This top level cache should fill up eventually if users are passing different shardings into the pjit function.
* Split lower_sharding_computation into 3 caches:
  * _trace_to_jaxpr_and_dce cache -- This will return a closed jaxpr which is DCE'd
  * _cached_lowering_to_hlo cache -- This will cache the generation of MHLO. This cache is dependent on the semantic equality of shardings i.e. if 2 shardings lower to the same OpSharding, then there will be a cache hit
  * _cached_compilation cache -- This caches the compilation so that we don't recompile if the shardings are semantically equal.

The way this works is the out_handlers are created again if we pass in different shardings to pjit (but there is no recompilation). This allows us to maintain the shardings passed by the user.

For ops like `jnp.squeeze` where we infer the sharding from the executable, we try to recreate a NamedSharding (right now, more support will be added in following CLs) from the GSPMDSharding since it will be available on the input.

PiperOrigin-RevId: 522991145

											
										
										
											2023-04-09 15:41:32 -07:00
+								        jaxpr, api_name, name, in_shardings, out_shardings,
-												Default jax_spmd_mode to allow_jit which will allow explicit jax.jit to not raise the multihost error (since jit and pjit have been merged).

Implicit jit and apply_primitive will still raise an error though (which is recognized via inline parameter). Majority of jnp operations in JAX should be inlined.

PiperOrigin-RevId: 527398394

											
										
										
											2023-04-26 15:54:50 -07:00
+								        tuple(donated_invars), tuple(jaxpr.in_avals),
 								        keep_unused=keep_unused, inline=inline, always_lower=always_lower,
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								        devices_from_context=(
-												[jax2tf] Clean up the support for cross-lowering.

In a previous CL we introduced cross-lowering support without any
changes in JAX core, but at the expense of some overly complex code
in jax2tf, along with overriding a JAX core function. Plus, those
changes were not enough to handle some xmap and pmap cases.

Here we introduce a `_experimental_lowering_platform: Optional[str]` parameter
to the `.lower()` methods and then we thread the `lowering_platform`
all the way to the calls to `mlir.lower_jaxpr_to_module2`. That's it.

Note that this parameter to `.lower()` is experimental and not supposed
to be used outside jax2tf. It may also gobble user kwargs.

											
										
										
											2023-02-28 11:30:23 +01:00
+								            None if mesh is None or mesh.empty else list(mesh.devices.flat)),
 								        lowering_platform=lowering_platform)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def pjit_staging_rule(trace, *args, **params):
 								  if (params["inline"] and
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								      all(is_unspecified(i) for i in params["in_shardings"]) and
 								      all(is_unspecified(o) for o in params["out_shardings"])):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    jaxpr = params['jaxpr']
-												Use the traceback of the call site when assigning a source location to an inlined function.

Improves but does not completely fix https://github.com/google/jax/issues/15663 . The non-inlined case still has similar problems.

											
										
										
											2023-04-19 13:46:33 -04:00
+								    return core.eval_jaxpr(jaxpr.jaxpr, jaxpr.consts, *args,
 								                           propagate_source_info=False)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  elif config.jax_dynamic_shapes:
 								    source_info = source_info_util.current()
 								    out_tracers = []
 								    for aval in _out_type(params['jaxpr']):
 								      if type(aval) is core.DShapedArray:
 								        shape = [args[d.val] if type(d) is core.InDBIdx else
 								                 out_tracers[d.val] if type(d) is core.OutDBIdx else
 								                 d for d in aval.shape]
 								        aval = aval.update(shape=tuple(core.get_referent(d) for d in shape))
 								      out_tracers.append(pe.DynamicJaxprTracer(trace, aval, source_info))
 								    eqn = core.new_jaxpr_eqn(
 								      map(trace.getvar, args), map(trace.makevar, out_tracers), pjit_p, params,
 								      params['jaxpr'].effects, source_info)
 								    trace.frame.add_eqn(eqn)
 								    return out_tracers
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  else:
 								    return trace.default_process_primitive(pjit_p, args, params)
 								pe.custom_staging_rules[pjit_p] = pjit_staging_rule
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								# TODO(mattjj): remove/trivialize this when jaxprs have type annotation on them,
 								# since it's actually not possible in general to infer the type from the term
 								def _out_type(jaxpr: core.ClosedJaxpr) -> List[core.AbstractValue]:
 								  out = []
 								  in_idx = {v: i for i, v in enumerate(jaxpr.jaxpr.invars)}
 								  out_idx = {x: i for i, x in enumerate(jaxpr.jaxpr.invars)
 								             if type(x) is core.Var}
 								  for x in jaxpr.jaxpr.outvars:
 								    aval = x.aval
 								    if type(aval) is core.DShapedArray:
 								      shape = [core.InDBIdx(in_idx[d]) if d in in_idx else
 								               core.OutDBIdx(out_idx[d]) if d in out_idx else
 								               d for d in x.aval.shape]
 								      aval = aval.update(shape=tuple(shape))
 								    out.append(aval)
 								  return out
-												enable pjit recursive typechecking

Give pjit_p a custom typecheck rule, which basically just calls the
core._check_call utility (which was made for xla_call_p and core.call_p).

This revealed the need for a slight generalization of the custom_typecheck rule
signature, for better "context-aware" printing of jaxpr type errors: the rules
should have a `ctx_factory` first argument. **The reason this PR touches so
many files is just that it makes the trivial tweaks to all existing typecheck
rules to accomodate that new signature.** I didn't adapt any other higher-order
primitives' rules to actually use the context, but presumably errors for HOPs
like scan would be improved by using it. Follow-up work!

It's key that core._check_call works with dynamic shapes; this PR is soon to be
followed by some djax+pjit PRs!

											
										
										
											2023-03-21 21:43:20 -07:00
+								def _pjit_typecheck(ctx_factory, *in_atoms, jaxpr, **params):
 								  return core._check_call(ctx_factory, pjit_p, in_atoms,
 								                          dict(params, call_jaxpr=jaxpr.jaxpr))
 								core.custom_typechecks[pjit_p] = _pjit_typecheck
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								def _pjit_abstract_eval(*args, jaxpr, out_shardings, resource_env, **_):
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  return jaxpr.out_avals, jaxpr.effects
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								pjit_p.def_effectful_abstract_eval(_pjit_abstract_eval)
 								def _pjit_lowering(ctx, *args, name, jaxpr, in_shardings,
 								                   out_shardings, resource_env, donated_invars,
 								                   keep_unused, inline):
-												Fix nondeterminism issue with ordered effects

											
										
										
											2023-02-23 16:03:00 -08:00
+								  effects = list(ctx.tokens_in.effects())
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  output_types = map(mlir.aval_to_ir_types, ctx.avals_out)
-												Fix debugging primitives for pjit. This came up during jit/pjit merge

PiperOrigin-RevId: 501710198

											
										
										
											2023-01-12 17:40:06 -08:00
+								  output_types = [mlir.token_type()] * len(effects) + output_types
-												For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the tracing time.

PiperOrigin-RevId: 532155068

											
										
										
											2023-05-15 10:31:38 -07:00
+								  flat_output_types = flatten(output_types)
-												For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the tracing time.

PiperOrigin-RevId: 533263584

											
										
										
											2023-05-18 15:09:00 -07:00
+								  arg_shardings = [None if is_unspecified(i) else i._to_xla_op_sharding(aval.ndim)
 								                   for aval, i in zip(ctx.avals_in, in_shardings)]
 								  result_shardings = [None if is_unspecified(o) else o._to_xla_op_sharding(aval.ndim)
 								                      for aval, o in zip(ctx.avals_out, out_shardings)]
 								  # TODO(b/228598865): inlined calls cannot have shardings set directly on the
 								  # inputs or outputs because they are lost during MLIR->HLO conversion.
 								  # using_sharding_annotation=False means we add an identity operation instead.
 								  func = mlir.lower_jaxpr_to_fun(
 								      ctx.module_context, name, jaxpr, effects, arg_shardings=arg_shardings,
 								      result_shardings=result_shardings, use_sharding_annotations=False,
 								      api_name=('jit' if resource_env is None else 'pjit'))
-												Fix nondeterminism issue with ordered effects

											
										
										
											2023-02-23 16:03:00 -08:00
+								  tokens_in = [ctx.tokens_in.get(eff) for eff in effects]
 								  args = (*ctx.dim_var_values, *tokens_in, *args)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  call = func_dialect.CallOp(flat_output_types,
 								                             ir.FlatSymbolRefAttr.get(func.name.value),
 								                             mlir.flatten_lowering_ir_args(args))
-												For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the tracing time.

PiperOrigin-RevId: 532155068

											
										
										
											2023-05-15 10:31:38 -07:00
+								  out_nodes = unflatten(call.results, map(len, output_types))
-												Fix debugging primitives for pjit. This came up during jit/pjit merge

PiperOrigin-RevId: 501710198

											
										
										
											2023-01-12 17:40:06 -08:00
+								  tokens, out_nodes = split_list(out_nodes, [len(effects)])
 								  tokens_out = ctx.tokens_in.update_tokens(mlir.TokenSet(zip(effects, tokens)))
 								  ctx.set_tokens_out(tokens_out)
 								  return out_nodes
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								mlir.register_lowering(pjit_p, _pjit_lowering)
 								def _pjit_batcher(insert_axis, spmd_axis_name,
 								                  axis_size, axis_name, main_type,
 								                  vals_in, dims_in,
 								                  jaxpr, in_shardings, out_shardings,
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								                  resource_env, donated_invars, name, keep_unused, inline):
-												Add batch_jaxpr2 which tells the caller where batch dims are.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 501746795

											
										
										
											2023-01-12 21:16:18 -08:00
+								  new_jaxpr, axes_out = batching.batch_jaxpr2(
-												Plumb spmd_axis_name through batch_jaxpr2 and batch_jaxpr

PiperOrigin-RevId: 509341618

											
										
										
											2023-02-13 14:57:50 -08:00
+								      jaxpr, axis_size, dims_in, axis_name=axis_name,
 								      spmd_axis_name=spmd_axis_name, main_type=main_type)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  # `insert_axis` is set to True only for some `xmap` uses.
 								  new_parts = (axis_name,) if insert_axis else (
-												generalize vmap spmd_axis_name to accept tuples of axis names

This brings the argument more in line with what can appear as positional
arguments to the PartitionSpec constructor.

											
										
										
											2023-02-10 14:43:54 -08:00
+								      () if spmd_axis_name is None else spmd_axis_name)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
 								  if resource_env is not None:
 								    mesh = resource_env.physical_mesh
 								  else:
 								    mesh = None
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  in_shardings = tuple(
-												Add batch_jaxpr2 which tells the caller where batch dims are.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 501746795

											
										
										
											2023-01-12 21:16:18 -08:00
+								      _pjit_batcher_for_sharding(i, axis_in, new_parts, mesh, aval.ndim)
 								      if axis_in is not None else i
 								      for axis_in, i, aval in zip(dims_in, in_shardings, new_jaxpr.in_avals))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  out_shardings = tuple(
-												Add batch_jaxpr2 which tells the caller where batch dims are.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 501746795

											
										
										
											2023-01-12 21:16:18 -08:00
+								      _pjit_batcher_for_sharding(o, axis_out, new_parts, mesh, aval.ndim)
 								      if axis_out is not None else o
 								      for axis_out, o, aval in zip(axes_out, out_shardings, new_jaxpr.out_avals))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  vals_out = pjit_p.bind(
 								    *vals_in,
 								    jaxpr=new_jaxpr,
 								    in_shardings=in_shardings,
 								    out_shardings=out_shardings,
 								    resource_env=resource_env,
 								    donated_invars=donated_invars,
 								    name=name,
 								    keep_unused=keep_unused,
 								    inline=inline)
-												Add batch_jaxpr2 which tells the caller where batch dims are.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 501746795

											
										
										
											2023-01-12 21:16:18 -08:00
+								  return vals_out, axes_out
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								batching.spmd_axis_primitive_batchers[pjit_p] = partial(_pjit_batcher, False)
 								batching.axis_primitive_batchers[pjit_p] = partial(_pjit_batcher, False, None)
 								pxla.spmd_primitive_batchers[pjit_p] = partial(_pjit_batcher, True, None)
 								def _pjit_batcher_for_sharding(
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    s: Union[GSPMDSharding, UnspecifiedValue],
-												Add batch_jaxpr2 which tells the caller where batch dims are.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 501746795

											
										
										
											2023-01-12 21:16:18 -08:00
+								    dim: int, val: Tuple[str, ...], mesh, ndim: int):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if is_unspecified(s):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return s
 								  if not val:
-												Fix pjit + vmap when `device` is passed as an argument to pjit/jit

PiperOrigin-RevId: 529155035

											
										
										
											2023-05-03 11:54:46 -07:00
+								    if sharding_impls.is_op_sharding_replicated(s._op_sharding):  # type: ignore
 								      return s
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    new_op = s._op_sharding.clone()  # type: ignore
 								    tad = list(new_op.tile_assignment_dimensions)
 								    tad.insert(dim, 1)
 								    new_op.tile_assignment_dimensions = tad
-												Try to preserve shardings with vmap(pjit) by converting the GSPMDShardings to original sharding type via the pxla.py helper

PiperOrigin-RevId: 524966654

											
										
										
											2023-04-17 15:32:21 -07:00
+								    new_gs = GSPMDSharding(s._device_assignment, new_op)  # type: ignore
 								    if hasattr(s, '_original_sharding'):
 								      vmapped_s, _ = pxla._get_out_sharding_from_orig_sharding(
-												Only return the same input Sharding object is the original aval's ndim and out_aval's ndim are the same.

This is because if both the OpShardings are replicated then the ndim is not encoded in the OpSharding and it will return True even if the Sharding is incompatible with the output's ndim. Concretely `NamedSharding({'x': 1, y: '2'}, P('x'))` is not compatible with a input with `ndim == 0`.

PiperOrigin-RevId: 528621971

											
										
										
											2023-05-01 17:39:16 -07:00
+								          [new_gs], [None], s._original_sharding, None, [False])[0]  # type: ignore
-												Try to preserve shardings with vmap(pjit) by converting the GSPMDShardings to original sharding type via the pxla.py helper

PiperOrigin-RevId: 524966654

											
										
										
											2023-04-17 15:32:21 -07:00
+								      new_gs = to_gspmd_sharding(vmapped_s, ndim)
 								    return new_gs
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  else:
-												Rename `jax.sharding.OpShardingSharding` to `jax.sharding.GSPMDSharding`. `jax.sharding.OpShardingSharding` will be removed in 3 months from Feb 17, 2023.

PiperOrigin-RevId: 510556189

											
										
										
											2023-02-17 17:10:27 -08:00
+								    assert isinstance(s, GSPMDSharding)
-												Make `jax.jit` work with vmap(..., spmd_axis_name) when there is no mesh context manager.

This will only work if the input Array's sharding is a NamedSharding

Fixes https://github.com/google/jax/issues/15886

PiperOrigin-RevId: 529758233

											
										
										
											2023-05-05 10:47:53 -07:00
+								    if isinstance(getattr(s, '_original_sharding', None), NamedSharding):
 								      mesh = s._original_sharding.mesh  # type: ignore
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    assert mesh is not None and not mesh.empty
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    parsed_pspec = parse_flatten_op_sharding(s._op_sharding, mesh)[0]  # type: ignore
 								    parsed_pspec = parsed_pspec.insert_axis_partitions(dim, val)
 								    mps = NamedSharding._from_parsed_pspec(mesh, parsed_pspec)
-												Rename `jax.sharding.OpShardingSharding` to `jax.sharding.GSPMDSharding`. `jax.sharding.OpShardingSharding` will be removed in 3 months from Feb 17, 2023.

PiperOrigin-RevId: 510556189

											
										
										
											2023-02-17 17:10:27 -08:00
+								    return GSPMDSharding(mps._device_assignment, mps._to_xla_op_sharding(ndim))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def _pjit_jvp(primals_in, tangents_in,
 								              jaxpr, in_shardings, out_shardings,
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								              resource_env, donated_invars, name, keep_unused, inline):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  is_nz_tangents_in = [type(t) is not ad.Zero for t in tangents_in]
 								  jaxpr_jvp, is_nz_tangents_out = ad.jvp_jaxpr(
 								      jaxpr, is_nz_tangents_in, instantiate=False)
 								  def _filter_zeros(is_nz_l, l):
 								    return (x for nz, x in zip(is_nz_l, l) if nz)
 								  _filter_zeros_in = partial(_filter_zeros, is_nz_tangents_in)
 								  _filter_zeros_out = partial(_filter_zeros, is_nz_tangents_out)
 								  outputs = pjit_p.bind(
 								      *primals_in, *_filter_zeros_in(tangents_in),
 								      jaxpr=jaxpr_jvp,
 								      in_shardings=(*in_shardings, *_filter_zeros_in(in_shardings)),
 								      out_shardings=(*out_shardings, *_filter_zeros_out(out_shardings)),
 								      resource_env=resource_env,
 								      donated_invars=(*donated_invars, *_filter_zeros_in(donated_invars)),
-												Fix `name_stack` usage of pjit. Now all the metadata of transformations in hlo are correct.

PiperOrigin-RevId: 501918212

											
										
										
											2023-01-13 12:53:42 -08:00
+								      name=name,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      keep_unused=keep_unused,
 								      inline=inline)
 								  primals_out, tangents_out = split_list(outputs, [len(jaxpr.jaxpr.outvars)])
 								  assert len(primals_out) == len(jaxpr.jaxpr.outvars)
 								  tangents_out_it = iter(tangents_out)
 								  return primals_out, [next(tangents_out_it) if nz else ad.Zero(aval)
 								                       for nz, aval in zip(is_nz_tangents_out, jaxpr.out_avals)]
 								ad.primitive_jvps[pjit_p] = _pjit_jvp
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
+								@weakref_lru_cache
 								def _known_jaxpr_fwd(known_jaxpr: core.ClosedJaxpr,
 								                     fwds_known: Tuple[Optional[int]]) -> core.ClosedJaxpr:
 								  updated_jaxpr = known_jaxpr.jaxpr.replace(
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								      outvars=[x for x, i in zip(known_jaxpr.jaxpr.outvars, fwds_known)
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
+								               if i is None])
 								  return known_jaxpr.replace(jaxpr=updated_jaxpr)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								def _pjit_partial_eval(trace, *in_tracers,
 								                       jaxpr, in_shardings, out_shardings,
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								                       resource_env, donated_invars, name, keep_unused, inline):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  in_pvals = [t.pval for t in in_tracers]
 								  known_ins = tuple(pv.is_known() for pv in in_pvals)
 								  unknown_ins = tuple(not k for k in known_ins)
 								  known_jaxpr, unknown_jaxpr, unknown_outs, res_avals = pe.partial_eval_jaxpr_nounits(
 								      jaxpr, unknown_ins, instantiate=False)
 								  unknown_outs = tuple(unknown_outs)
 								  known_outs = tuple(not uk for uk in unknown_outs)
 								  num_residuals = len(res_avals)
 								  def keep_where(l, should_keep):
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    return tuple(x for x, keep in unsafe_zip(l, should_keep) if keep)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  residual_shardings = (UNSPECIFIED,) * num_residuals
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  # Compute the known outputs
 								  known_params = dict(
 								      jaxpr=known_jaxpr,
 								      in_shardings=keep_where(in_shardings, known_ins),
 								      out_shardings=(
 								          keep_where(out_shardings, known_outs) + residual_shardings),
 								      resource_env=resource_env,
 								      donated_invars=keep_where(donated_invars, known_ins),
 								      name=name,
 								      keep_unused=keep_unused,
 								      inline=inline)
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
+								  fwds_known = pe._jaxpr_forwarding(known_params['jaxpr'].jaxpr)
 								  # Only forward the outvars where the out_sharding is UNSPECIFIED.
 								  known_user_out_shardings = keep_where(known_params['out_shardings'], known_outs)
 								  fwds_known_user = [
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								      fwd if is_unspecified(os) else None
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								      for os, fwd in zip(known_user_out_shardings,
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
+								                              fwds_known[:len(known_user_out_shardings)])]
 								  fwds_known = fwds_known_user + fwds_known[len(known_user_out_shardings):]
 								  del fwds_known_user
 								  # Remove forwarded outvars and out_shardings
 								  known_params['jaxpr'] = _known_jaxpr_fwd(known_params['jaxpr'], tuple(fwds_known))
 								  known_out_shardings = tuple(
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								      s for s, i in zip(known_params['out_shardings'], fwds_known) if i is None)
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
+								  known_params['out_shardings'] = known_out_shardings
 								  del known_out_shardings
 								  assert len(known_params['out_shardings']) == len(known_params['jaxpr'].out_avals)
 								  # Bind known things to pjit_p.
 								  known_inputs = [pv.get_known() for pv in in_pvals if pv.is_known()]
 								  all_known_outs = pjit_p.bind(*known_inputs, **known_params)
 								  known_outs_iter = iter(all_known_outs)
 								  all_known_outs = [next(known_outs_iter)
 								                    if fwd_idx is None else known_inputs[fwd_idx]
 								                    for fwd_idx in fwds_known]
 								  assert next(known_outs_iter, None) is None
 								  del known_outs_iter, known_inputs
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  if num_residuals:
 								    known_out_vals, residual_vals = \
 								        split_list(all_known_outs, [len(all_known_outs) - num_residuals])
 								  else:
 								    known_out_vals, residual_vals = all_known_outs, ()
 								  residual_tracers = [trace.new_instantiated_const(residual) for residual in residual_vals]
 								  # The convention of partial_eval_jaxpr_nounits is to place residual binders
 								  # at the front of the jaxpr produced, so we move them to the back since both
 								  # the jaxpr equation built below and the pjit transpose rule assume a
 								  # residual-inputs-last convention.
 								  unknown_jaxpr = pe.move_binders_to_back(
 								      unknown_jaxpr, [True] * num_residuals + [False] * sum(unknown_ins))
 								  # Prepare unknown tracers
 								  unknown_params = dict(
 								      jaxpr=unknown_jaxpr,
 								      in_shardings=(keep_where(in_shardings, unknown_ins) + residual_shardings),
 								      out_shardings=keep_where(out_shardings, unknown_outs),
 								      resource_env=resource_env,
 								      donated_invars=(keep_where(donated_invars, unknown_ins) +
 								                      (False,) * num_residuals),
 								      name=name,
 								      keep_unused=keep_unused,
 								      inline=inline)
 								  unknown_tracers_in = [t for t in in_tracers if not t.pval.is_known()]
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  unknown_out_avals = unknown_jaxpr.out_avals
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  unknown_tracers_out = [
 								      pe.JaxprTracer(trace, pe.PartialVal.unknown(aval), None)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      for aval in unknown_out_avals
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  ]
 								  eqn = pe.new_eqn_recipe((*unknown_tracers_in, *residual_tracers),
 								                          unknown_tracers_out,
 								                          pjit_p,
 								                          unknown_params,
 								                          unknown_jaxpr.effects,
 								                          source_info_util.current())
 								  for t in unknown_tracers_out: t.recipe = eqn
 								  return merge_lists(unknown_outs, known_out_vals, unknown_tracers_out)
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								pe.custom_partial_eval_rules[pjit_p] = _pjit_partial_eval
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								def _pjit_partial_eval_custom_params_updater(
 								    unks_in: Sequence[bool], inst_in: Sequence[bool],
 								    kept_outs_known: Sequence[bool], kept_outs_staged: Sequence[bool],
 								    num_res: int, params_known: dict, params_staged: dict
 								  ) -> Tuple[dict, dict]:
 								  # prune inputs to jaxpr_known according to unks_in
 								  donated_invars_known, _ = pe.partition_list(unks_in, params_known['donated_invars'])
 								  in_shardings_known, _ = pe.partition_list(unks_in, params_known['in_shardings'])
 								  if num_res == 0:
 								    residual_shardings = []
 								  else:
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    residual_shardings = [UNSPECIFIED] * num_res
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  _, out_shardings_known = pe.partition_list(kept_outs_known, params_known['out_shardings'])
 								  new_params_known = dict(params_known,
 								                          in_shardings=tuple(in_shardings_known),
 								                          out_shardings=(*out_shardings_known, *residual_shardings),
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								                          donated_invars=tuple(donated_invars_known))
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  assert len(new_params_known['in_shardings']) == len(params_known['jaxpr'].in_avals)
 								  assert len(new_params_known['out_shardings']) == len(params_known['jaxpr'].out_avals)
 								  # added num_res new inputs to jaxpr_staged, and pruning according to inst_in
 								  _, donated_invars_staged = pe.partition_list(inst_in, params_staged['donated_invars'])
 								  donated_invars_staged = [False] * num_res + donated_invars_staged
 								  _, in_shardings_staged = pe.partition_list(inst_in, params_staged['in_shardings'])
 								  in_shardings_staged = [*residual_shardings, *in_shardings_staged]
-												Add in_positional_semantics to new_params_known and new_params_staged otherwise it leads to length mismatch error down the stack. It is similar to donated_invars and in_shardings.

PiperOrigin-RevId: 502082828

											
										
										
											2023-01-14 10:18:28 -08:00
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  _, out_shardings_staged = pe.partition_list(kept_outs_staged, params_staged['out_shardings'])
-												Add in_positional_semantics to new_params_known and new_params_staged otherwise it leads to length mismatch error down the stack. It is similar to donated_invars and in_shardings.

PiperOrigin-RevId: 502082828

											
										
										
											2023-01-14 10:18:28 -08:00
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  new_params_staged = dict(params_staged,
 								                           in_shardings=tuple(in_shardings_staged),
 								                           out_shardings=tuple(out_shardings_staged),
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								                           donated_invars=tuple(donated_invars_staged))
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  assert len(new_params_staged['in_shardings']) == len(params_staged['jaxpr'].in_avals)
 								  assert len(new_params_staged['out_shardings']) == len(params_staged['jaxpr'].out_avals)
 								  return new_params_known, new_params_staged
 								pe.partial_eval_jaxpr_custom_rules[pjit_p] = \
 								    partial(pe.closed_call_partial_eval_custom_rule, 'jaxpr',
 								            _pjit_partial_eval_custom_params_updater)
-												Cache the creation of ClosedJaxpr in pjit_transpose which if not cached breaks the compilation cache.

PiperOrigin-RevId: 504304311

											
										
										
											2023-01-24 09:57:55 -08:00
+								@lu.cache
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								def _pjit_transpose_trace(fun, in_avals):
 								  transpose_jaxpr, _, consts = pe.trace_to_jaxpr_dynamic(fun, in_avals)
-												Cache the creation of ClosedJaxpr in pjit_transpose which if not cached breaks the compilation cache.

PiperOrigin-RevId: 504304311

											
										
										
											2023-01-24 09:57:55 -08:00
+								  transpose_jaxpr = core.ClosedJaxpr(transpose_jaxpr, consts)
 								  return transpose_jaxpr
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								def _pjit_transpose(reduce_axes, cts_in, *primals_in,
 								                    jaxpr, in_shardings, out_shardings,
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								                    resource_env, donated_invars, name, keep_unused, inline):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  def prune_type(ty, xs, maybe_zeros):
 								    return tuple(x for x, mz in zip(xs, maybe_zeros) if type(mz) is not ty)
 								  body = lu.wrap_init(ad.closed_backward_pass)
 								  body = lu.hashable_partial(body, jaxpr, reduce_axes, False)
 								  primals_and_nz_cts_in, in_treedef = tree_flatten((primals_in, cts_in))
 								  body, cts_out_treedef_thunk = flatten_fun_nokwargs(body, in_treedef)
 								  transpose_in_shardings = (
 								    *prune_type(ad.UndefinedPrimal, in_shardings, primals_in),
 								    *prune_type(ad.Zero, out_shardings, cts_in)
 								  )
-												Cache the creation of ClosedJaxpr in pjit_transpose which if not cached breaks the compilation cache.

PiperOrigin-RevId: 504304311

											
										
										
											2023-01-24 09:57:55 -08:00
+								  global_cts_in_avals = tuple(core.raise_to_shaped(core.get_aval(ct))
 								                              for ct in primals_and_nz_cts_in)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								  transpose_jaxpr = _pjit_transpose_trace(body, global_cts_in_avals)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  cts_out_treedef = cts_out_treedef_thunk()
 								  transpose_out_shardings = prune_type(
 								      ad.Zero,
 								      in_shardings,
 								      tree_unflatten(cts_out_treedef, [object()] * cts_out_treedef.num_leaves))
 								  nz_cts_out = pjit_p.bind(
 								      *primals_and_nz_cts_in,
 								      jaxpr=transpose_jaxpr,
 								      in_shardings=transpose_in_shardings,
 								      out_shardings=transpose_out_shardings,
 								      resource_env=resource_env,
 								      donated_invars=(False,) * len(primals_and_nz_cts_in),
 								      name=name,
 								      keep_unused=keep_unused,
 								      inline=inline)
 								  return tree_unflatten(cts_out_treedef, nz_cts_out)
 								ad.reducing_transposes[pjit_p] = _pjit_transpose
-												Add dce_rules for pjit primitive so that remat can DCE through the pjit primitive and remove unused residuals

PiperOrigin-RevId: 504123801

											
										
										
											2023-01-23 17:31:33 -08:00
+								@weakref_lru_cache
 								def _dce_jaxpr_pjit(
 								    jaxpr: core.ClosedJaxpr, used_outputs: Tuple[bool]
 								) -> Tuple[core.ClosedJaxpr, List[bool]]:
 								  new_jaxpr, used_inputs = pe.dce_jaxpr(jaxpr.jaxpr, used_outputs)
 								  return core.ClosedJaxpr(new_jaxpr, jaxpr.consts), used_inputs
 								def dce_jaxpr_pjit_rule(used_outputs: List[bool], eqn: core.JaxprEqn
 								                        ) -> Tuple[List[bool], Optional[core.JaxprEqn]]:
 								  dced_jaxpr, used_inputs = _dce_jaxpr_pjit(
 								      eqn.params['jaxpr'], tuple(used_outputs))
 								  def keep_where(xs, keeps):
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    return tuple(x for x, keep in zip(xs, keeps) if keep)
-												Add dce_rules for pjit primitive so that remat can DCE through the pjit primitive and remove unused residuals

PiperOrigin-RevId: 504123801

											
										
										
											2023-01-23 17:31:33 -08:00
 								  eqn_params = eqn.params
 								  new_params = dict(
 								      eqn_params,
 								      jaxpr=dced_jaxpr,
 								      in_shardings=keep_where(eqn_params["in_shardings"], used_inputs),
 								      out_shardings=keep_where(eqn_params["out_shardings"], used_outputs),
 								      donated_invars=keep_where(eqn_params["donated_invars"], used_inputs),
 								  )
 								  if not any(used_inputs) and not any(used_outputs) and not dced_jaxpr.effects:
 								    return used_inputs, None
 								  else:
 								    new_eqn = core.new_jaxpr_eqn(
 								        [v for v, used in zip(eqn.invars, used_inputs) if used],
 								        [v for v, used in zip(eqn.outvars, used_outputs) if used],
 								        eqn.primitive, new_params, dced_jaxpr.effects, eqn.source_info)
 								    return used_inputs, new_eqn
 								pe.dce_rules[pjit_p] = dce_jaxpr_pjit_rule
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								def _check_resources_against_named_axes(what, aval, pos_axis_resources, named_axis_resources):
 								  pjit_resources = set(
 								      it.chain.from_iterable([d for d in pos_axis_resources if d is not None]))
 								  aval_resources = set(it.chain.from_iterable(
 								    named_axis_resources[a] for a in aval.named_shape))
 								  overlap = pjit_resources & aval_resources
 								  if overlap:
 								    raise JAXTypeError(
 								        f"{what} has an axis resources specification of "
 								        f"{pos_axis_resources.unsynced_user_spec(SpecSync.DIM_PERMUTE)} "
 								        f"that uses one or more mesh axes already used by xmap to partition "
 								        f"a named axis appearing in its named_shape (both use mesh axes "
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								        f"{mesh_lib.show_axes(overlap)})")
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def _resource_typing_pjit(avals, params, source_info, resource_env, named_axis_resources):
 								  jaxpr = params["jaxpr"]
 								  what = "pjit input"
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  if (resource_env is not None and params['resource_env'] is not None and
 								      resource_env.physical_mesh != params['resource_env'].physical_mesh):
 								      raise RuntimeError("Changing the physical mesh is not allowed inside pjit.")
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  for aval, s in zip(jaxpr.in_avals, params['in_shardings']):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    if is_unspecified(s) or is_auto(s):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      continue
 								    elif hasattr(s, '_original_sharding') and hasattr(
 								        s._original_sharding, '_parsed_pspec'):
 								      parsed_pspec = s._original_sharding._parsed_pspec
 								    else:
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      if resource_env is not None:
 								        parsed_pspec = parse_flatten_op_sharding(
 								            s._op_sharding, resource_env.physical_mesh)[0]
 								      else:
 								        parsed_pspec = None
 								    if parsed_pspec is not None:
 								      _check_resources_against_named_axes(what, aval, parsed_pspec,
 								                                          named_axis_resources)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  pxla.resource_typecheck(
 								      jaxpr.jaxpr, resource_env, named_axis_resources,
 								      lambda: (f"a pjit'ed function {params['name']} "
 								               f"(pjit called at {source_info_util.summarize(source_info)})"))
 								  what = "pjit output"
 								  for aval, s in zip(jaxpr.out_avals, params['out_shardings']):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    if is_unspecified(s) or is_auto(s):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      continue
 								    elif hasattr(s, '_original_sharding') and hasattr(
 								        s._original_sharding, '_parsed_pspec'):
 								      parsed_pspec = s._original_sharding._parsed_pspec
 								    else:
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      if resource_env is not None:
 								        parsed_pspec = parse_flatten_op_sharding(
 								            s._op_sharding, resource_env.physical_mesh)[0]
 								      else:
 								        parsed_pspec = None
 								    if parsed_pspec is not None:
 								      _check_resources_against_named_axes(what, aval, parsed_pspec,
 								                                          named_axis_resources)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								pxla.custom_resource_typing_rules[pjit_p] = _resource_typing_pjit
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								def _pjit_pp_rule(eqn, context, settings):
 								  params = dict(eqn.params)
 								  del params['inline']
 								  if not any(params['donated_invars']):
 								    del params['donated_invars']
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if all(is_unspecified(s) for s in params['in_shardings']):
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								    del params['in_shardings']
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if all(is_unspecified(s) for s in params['out_shardings']):
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								    del params['out_shardings']
 								  if not params['keep_unused']:
 								    del params['keep_unused']
 								  if (params['resource_env'] is None or
 								      params['resource_env'].physical_mesh.empty):
 								    del params['resource_env']
 								  return core._pp_eqn(eqn.replace(params=params), context, settings)
 								core.pp_eqn_rules[pjit_p] = _pjit_pp_rule
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								# -------------------- with_sharding_constraint --------------------
-												Remove axis_resources from with_sharding_constraint since it has been 3 months since the deprecation as per the API deprecation policy.

PiperOrigin-RevId: 535687618

											
										
										
											2023-05-26 12:34:32 -07:00
+								def with_sharding_constraint(x, shardings):
-												Document jax.lax.with_sharding_constraint

											
										
										
											2023-04-26 10:19:04 -07:00
+								  """Mechanism to constrain the sharding of an Array inside a jitted computation
 								  This is a strict constraint for the GSPMD partitioner and not a hint. For examples
 								  of how to use this function, see `Distributed arrays and automatic parallelization`_.
 								  Args:
 								    x: PyTree of jax.Arrays which will have their shardings constrainted
 								    shardings: PyTree of sharding specifications. Valid values are the same as for
 								      the ``in_shardings`` argument of :func:`jax.experimental.pjit`.
 								  Returns:
 								    x_with_shardings: PyTree of jax.Arrays with specified sharding constraints.
 								  .. _Distributed arrays and automatic parallelization: https://jax.readthedocs.io/en/latest/notebooks/Distributed_arrays_and_automatic_parallelization.html
 								  """
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  x_flat, tree = tree_flatten(x)
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  user_shardings, _, _ = prepare_axis_resources(
-												Remove axis_resources from with_sharding_constraint since it has been 3 months since the deprecation as per the API deprecation policy.

PiperOrigin-RevId: 535687618

											
										
										
											2023-05-26 12:34:32 -07:00
+								      shardings, "shardings", allow_unconstrained_dims=True)
 								  del shardings
-												Change the `axis_resources` argument of `with_sharding_constraint` to `shardings` to match `pjit` and `jit`.

PiperOrigin-RevId: 509275107

											
										
										
											2023-02-13 10:53:21 -08:00
 								  user_shardings_flat = tuple(
 								      flatten_axes("with_sharding_constraint shardings", tree, user_shardings))
 								  del user_shardings
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								  resource_env = mesh_lib.thread_resources.env
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  mesh = resource_env.physical_mesh
-												Improve the error raised when wsc is passed a PartitionSpec without a mesh context manager

PiperOrigin-RevId: 529260748

											
										
										
											2023-05-03 19:28:54 -07:00
+								  shardings_flat = [_create_sharding_for_array(mesh, a, 'shardings',
 								                                               'with_sharding_constraint')
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								                    for a in user_shardings_flat]
 								  unconstrained_dims = [get_unconstrained_dims(s)
 								                        if isinstance(s, NamedSharding) else {}
 								                        for s in shardings_flat]
-												Change the `axis_resources` argument of `with_sharding_constraint` to `shardings` to match `pjit` and `jit`.

PiperOrigin-RevId: 509275107

											
										
										
											2023-02-13 10:53:21 -08:00
+								  del user_shardings_flat
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								  pjit_check_aval_sharding(
 								      shardings_flat, x_flat, None, "with_sharding_constraint arguments",
 								      allow_uneven_sharding=True)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Replace `op_sharding_sharding` with `gspmd_sharding`. This is purely an internal change.

PiperOrigin-RevId: 510562354

											
										
										
											2023-02-17 17:52:37 -08:00
+								  outs = [sharding_constraint_p.bind(xf, sharding=to_gspmd_sharding(i, xf.ndim),
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								                                     resource_env=resource_env,
 								                                     unconstrained_dims=ud)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								          for xf, i, ud in zip(x_flat, shardings_flat, unconstrained_dims)]
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return tree_unflatten(tree, outs)
-												Add impl rule for with_sharding_constraint so that users can use their functions with and without a jit.

The semantics of eager wsc is the same as within a jit i.e. it will reshard to the given sharding only if the devices are the same and in the same order.

eager wsc won't work as expected with AD transpose because there is no `src` argument to reverse the shardings when transposing and was decided that it is fine for now. jax.device_put should be the API to use for that.

PiperOrigin-RevId: 532858670

											
										
										
											2023-05-17 11:49:31 -07:00
+								def _identity_fn(x): return x
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								def _sharding_constraint_impl(x, sharding, resource_env, unconstrained_dims):
-												Add impl rule for with_sharding_constraint so that users can use their functions with and without a jit.

The semantics of eager wsc is the same as within a jit i.e. it will reshard to the given sharding only if the devices are the same and in the same order.

eager wsc won't work as expected with AD transpose because there is no `src` argument to reverse the shardings when transposing and was decided that it is fine for now. jax.device_put should be the API to use for that.

PiperOrigin-RevId: 532858670

											
										
										
											2023-05-17 11:49:31 -07:00
+								  if hasattr(x, 'sharding') and x.sharding.is_equivalent_to(sharding, x.ndim):
 								    return x
 								  # Run a jit here to raise good errors when device assignment don't match.
 								  return api.jit(_identity_fn, out_shardings=sharding)(x)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								sharding_constraint_p = core.Primitive("sharding_constraint")
 								sharding_constraint_p.def_impl(_sharding_constraint_impl)
 								sharding_constraint_p.def_abstract_eval(lambda x, **_: x)
 								ad.deflinear2(sharding_constraint_p,
 								              lambda ct, _, **params: (sharding_constraint_p.bind(ct, **params),))
 								def _sharding_constraint_hlo_lowering(ctx, x_node, *, sharding,
 								                                      resource_env, unconstrained_dims):
 								  aval, = ctx.avals_in
-												[shape_poly] Lowering sharding annotations in presence of dynamic shapes

Sharding annotations are lowered to custom calls, and in presence of dynamic shapes
we must use the `indices_of_shape_operands` attribute to hlo.CustomCall.
In order to be able to generate the code to compute the result shapes
we must pass the `LoweringRuleContext` and the result abstract value
to the lowering helpers that generate the custom calls.

The above is easy everywhere, except for the sharding annotations for
the inputs and outputs for a function, because we do not yet have
a LoweringRuleContext available.

This code is tested by tests that are still disabled in sharding_test.
They can be enabled once StableHLO improves the support for
dynamic shapes for custom calls: https://github.com/openxla/stablehlo/issues/1367

											
										
										
											2023-04-05 09:38:37 +02:00
+								  out_aval, = ctx.avals_out
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  axis_ctx = ctx.module_context.axis_context
 								  # axis_ctx and manual_axes is *only used with xmap* and xmap only works with
-												Rename `jax.sharding.OpShardingSharding` to `jax.sharding.GSPMDSharding`. `jax.sharding.OpShardingSharding` will be removed in 3 months from Feb 17, 2023.

PiperOrigin-RevId: 510556189

											
										
										
											2023-02-17 17:10:27 -08:00
+								  # NamedSharding. So convert the GSPMDSharding to NamedSharding
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  # and then convert it back with the added special axes.
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if isinstance(axis_ctx, sharding_impls.SPMDAxisContext):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    mesh = resource_env.physical_mesh
 								    parsed_pspec = parse_flatten_op_sharding(sharding._op_sharding, mesh)[0]
 								    mps = NamedSharding._from_parsed_pspec(mesh, parsed_pspec)
-												Rename `jax.sharding.OpShardingSharding` to `jax.sharding.GSPMDSharding`. `jax.sharding.OpShardingSharding` will be removed in 3 months from Feb 17, 2023.

PiperOrigin-RevId: 510556189

											
										
										
											2023-02-17 17:10:27 -08:00
+								    sharding = GSPMDSharding(
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								        mps._device_assignment, mps._to_xla_op_sharding(aval.ndim, axis_ctx=axis_ctx))
 								  return [
-												[shape_poly] Lowering sharding annotations in presence of dynamic shapes

Sharding annotations are lowered to custom calls, and in presence of dynamic shapes
we must use the `indices_of_shape_operands` attribute to hlo.CustomCall.
In order to be able to generate the code to compute the result shapes
we must pass the `LoweringRuleContext` and the result abstract value
to the lowering helpers that generate the custom calls.

The above is easy everywhere, except for the sharding annotations for
the inputs and outputs for a function, because we do not yet have
a LoweringRuleContext available.

This code is tested by tests that are still disabled in sharding_test.
They can be enabled once StableHLO improves the support for
dynamic shapes for custom calls: https://github.com/openxla/stablehlo/issues/1367

											
										
										
											2023-04-05 09:38:37 +02:00
+								      mlir.wrap_with_sharding_op(ctx,
 								          x_node, out_aval,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								          sharding._to_xla_op_sharding(aval.ndim),
 								          unspecified_dims=unconstrained_dims)
 								  ]
 								mlir.register_lowering(sharding_constraint_p,
 								                       _sharding_constraint_hlo_lowering)
 								def _sharding_constraint_batcher(insert_axis, spmd_axis_name, axis_size,
 								                                 axis_name, main_type, vals_in, dims_in,
 								                                 sharding, resource_env, unconstrained_dims):
 								  x, = vals_in
 								  d, = dims_in
 								  # None means unconstrained in ParsedPartitionSpec
 								  new_parts = (axis_name,) if insert_axis else (
-												generalize vmap spmd_axis_name to accept tuples of axis names

This brings the argument more in line with what can appear as positional
arguments to the PartitionSpec constructor.

											
										
										
											2023-02-10 14:43:54 -08:00
+								      None if spmd_axis_name is None else spmd_axis_name)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  unconstrained_dims = {ud + (d <= ud) for ud in unconstrained_dims}
 								  if new_parts is None:
 								    unconstrained_dims.add(d)
 								  y = sharding_constraint_p.bind(
 								      x,
 								      sharding=_pjit_batcher_for_sharding(
 								          sharding, d, new_parts, resource_env.physical_mesh, x.ndim),
 								      resource_env=resource_env,
 								      unconstrained_dims=unconstrained_dims)
 								  return y, d
 								batching.spmd_axis_primitive_batchers[sharding_constraint_p] = partial(
 								    _sharding_constraint_batcher, False)
 								batching.axis_primitive_batchers[sharding_constraint_p] = partial(
 								    _sharding_constraint_batcher, False, None)
 								pxla.spmd_primitive_batchers[sharding_constraint_p] = partial(
 								    _sharding_constraint_batcher, True, None)
 								def _resource_typing_sharding_constraint(avals, params, source_info,
 								                                         resource_env, named_axis_resources):
 								  aval, = avals
 								  if hasattr(params['sharding'], '_original_sharding'):
 								    parsed_pspec = params['sharding']._original_sharding._parsed_pspec
 								  else:
 								    parsed_pspec = parse_flatten_op_sharding(
 								        params['sharding']._op_sharding, resource_env.physical_mesh)[0]
 								  _check_resources_against_named_axes(
 								    "with_sharding_constraint input", aval, parsed_pspec, named_axis_resources)
 								pxla.custom_resource_typing_rules[sharding_constraint_p] = \
 								    _resource_typing_sharding_constraint
 								# -------------------- helpers --------------------
-												Create same Sharding objects wherever possible to get maximum cache hits

PiperOrigin-RevId: 524116574

											
										
										
											2023-04-13 15:18:56 -07:00
+								@lru_cache(maxsize=2048)
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								def to_gspmd_sharding(s: XLACompatibleSharding, ndim: int,
 								                      device_or_backend_set: bool = False) -> GSPMDSharding:
-												Rename `jax.sharding.OpShardingSharding` to `jax.sharding.GSPMDSharding`. `jax.sharding.OpShardingSharding` will be removed in 3 months from Feb 17, 2023.

PiperOrigin-RevId: 510556189

											
										
										
											2023-02-17 17:10:27 -08:00
+								  if isinstance(s, GSPMDSharding):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return s
-												Cache the entire to_gspmd_sharding function to maximize cache hits even for GSPMDShardings

PiperOrigin-RevId: 524951951

											
										
										
											2023-04-17 14:32:41 -07:00
+								  gs = GSPMDSharding(s._device_assignment, s._to_xla_op_sharding(ndim))
 								  gs._original_sharding = s
-												Fix the cache on `to_gspmd_sharding` to depend on if device/backend is set on pjit/jit.

Before if a SingleDeviceSharding went via `to_gspmd_sharding` and then the same SingleDeviceSharding (created when device/backend is set) went via `to_gspmd_sharding`, we would hit the cache and return the first SingleDeviceSharding which didn't have the dynamic attribute on it.

This would eventually cause errors down the stack. The fix is to explicitly thread this argument through all the caches so we miss them and create the correct sharding.

PiperOrigin-RevId: 530712918

											
										
										
											2023-05-09 14:23:49 -07:00
+								  if device_or_backend_set:
 								    gs._original_sharding._device_backend = device_or_backend_set
-												Cache the entire to_gspmd_sharding function to maximize cache hits even for GSPMDShardings

PiperOrigin-RevId: 524951951

											
										
										
											2023-04-17 14:32:41 -07:00
+								  return gs
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def get_unconstrained_dims(sharding: NamedSharding):
-												Improve pytype inference for Sharding type.

* Define use_cpp_class and use_cpp_method decorators as no-ops for type checking.
* Remove the use of abc.ABC when defining the Sharding type. This triggers a pytype bug: the easiest fix seems to be to skip the use of the ABC.
* Write use_cpp_class decorator differently on ArrayImpl to work around pytype bug.
* Fix a few new type errors.

PiperOrigin-RevId: 516631428

											
										
										
											2023-03-14 14:19:25 -07:00
+								  assert sharding._parsed_pspec is not None
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return {i for i, axes in enumerate(sharding._parsed_pspec)
 								          if axes is None}
 								def _fast_path_get_device_assignment(
 								    shardings: Iterable[PjitSharding]) -> Optional[XLADeviceAssignment]:
 								  da = None
 								  for i in shardings:
-												Allow pjit.AUTO to be used with jax.jit. This introduces an API change which requires a mesh to be provided to pjit.AUTO(mesh).

`with mesh:` is no longer required with pjit to use the auto spmd pass of GSPMD.

PiperOrigin-RevId: 533801596

											
										
										
											2023-05-20 22:59:52 -07:00
+								    if is_unspecified(i):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      continue
-												Allow pjit.AUTO to be used with jax.jit. This introduces an API change which requires a mesh to be provided to pjit.AUTO(mesh).

`with mesh:` is no longer required with pjit to use the auto spmd pass of GSPMD.

PiperOrigin-RevId: 533801596

											
										
										
											2023-05-20 22:59:52 -07:00
+								    if is_auto(i):
 								      return i.mesh._flat_devices_tuple  # type: ignore
 								    return i._device_assignment  # type: ignore
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return da
 								def _get_partition_spec(ppspec: Sequence[ParsedPartitionSpec]) -> Sequence[PartitionSpec]:
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  return [get_single_pspec(p) for p in ppspec]
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def _get_op_sharding_from_executable(
 								    executable) -> Tuple[Sequence[xc.OpSharding], Sequence[xc.OpSharding]]:
 								  in_op_shardings: List[xc.OpSharding] = []
-												Bump the minimum jaxlib version to 0.4.1.

Jaxlib 0.4.1 has XLA client version 109 and MLIR API version 39.

											
										
										
											2022-12-19 17:38:24 +00:00
+								  parameter_shardings_from_xla = executable.get_parameter_shardings()
 								  if parameter_shardings_from_xla is not None:
 								    in_op_shardings = parameter_shardings_from_xla
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  out_op_shardings: List[xc.OpSharding] = []
-												Bump the minimum jaxlib version to 0.4.1.

Jaxlib 0.4.1 has XLA client version 109 and MLIR API version 39.

											
										
										
											2022-12-19 17:38:24 +00:00
+								  output_shardings_from_xla = executable.get_output_shardings()
 								  if output_shardings_from_xla is not None:
 								    out_op_shardings = output_shardings_from_xla
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  return in_op_shardings, out_op_shardings
 								def _get_ppspec_from_executable(executable, mesh) -> Tuple[Sequence[ParsedPartitionSpec], Sequence[ParsedPartitionSpec]]:
 								  input_op_shardings: Sequence[xc.OpSharding] = executable.hlo_modules()[0].spmd_parameters_shardings
 								  output_op_sharding: xc.OpSharding = executable.hlo_modules()[0].spmd_output_sharding
 								  in_ppspec: List[ParsedPartitionSpec] = []
 								  for s in input_op_shardings:
 								    in_ppspec.extend(parse_flatten_op_sharding(s, mesh))
 								  out_ppspec = parse_flatten_op_sharding(output_op_sharding, mesh)
 								  return in_ppspec, out_ppspec
 								def _get_pspec_from_executable(
 								    executable, mesh: pxla.Mesh
 								) -> Tuple[Tuple[PartitionSpec, ...], Tuple[PartitionSpec, ...]]:
 								  in_ppspec, out_ppspec = _get_ppspec_from_executable(executable, mesh)
 								  out_partition_spec = _get_partition_spec(out_ppspec)
 								  in_partition_spec = _get_partition_spec(in_ppspec)
 								  return tuple(in_partition_spec), tuple(out_partition_spec)