rocm_jax/jax/_src/pjit.py

# Copyright 2021 The JAX Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from collections import defaultdict
from collections.abc import Callable, Sequence, Iterable
import dataclasses
from functools import partial
import inspect
import logging
import operator as op
import weakref
from typing import NamedTuple, Any, Union, cast
import threading
import warnings

import numpy as np

from jax._src import api
from jax._src import ad_util
from jax._src import api_util
from jax._src import config
from jax._src import core
from jax._src import dispatch
from jax._src import dtypes
from jax._src import linear_util as lu
from jax._src import mesh as mesh_lib
from jax._src import op_shardings
from jax._src import profiler
from jax._src import sharding_impls
from jax._src import source_info_util
from jax._src import stages
from jax._src import traceback_util
from jax._src import tree_util
from jax._src import util
from jax._src import xla_bridge as xb
from jax._src.api_util import (
    argnums_partial_except, flatten_axes, flatten_fun, flatten_fun_nokwargs,
    donation_vector, shaped_abstractify, check_callable, resolve_argnums,
    argnames_partial_except, debug_info, result_paths, jaxpr_debug_info,
    hoist_obj_attrs)
from jax._src.interpreters import partial_eval as pe
from jax._src.partition_spec import PartitionSpec
from jax._src.interpreters import xla
from jax._src.interpreters import ad
from jax._src.interpreters import batching
from jax._src.interpreters import mlir
from jax._src.interpreters import pxla
from jax._src.lib.mlir import ir
from jax._src.lib.mlir.dialects import func as func_dialect
from jax._src.lib import jax_jit
from jax._src.lib import xla_client as xc
from jax._src.lib import xla_extension_version
from jax._src import sharding
from jax._src.mesh import AbstractMesh
from jax._src.sharding_impls import (
    NamedSharding, GSPMDSharding,
    SingleDeviceSharding, PmapSharding, AUTO, UNSPECIFIED, UnspecifiedValue,
    ParsedPartitionSpec, get_single_pspec, is_unspecified,
    is_unspecified_or_auto, prepare_axis_resources, parse_flatten_op_sharding)
from jax._src.layout import Layout, DeviceLocalLayout, AutoLayout
from jax._src.state import discharge as state_discharge, RefEffect, AbstractRef
from jax._src.traceback_util import api_boundary
from jax._src.tree_util import (
    tree_flatten, tree_unflatten, treedef_is_leaf, tree_structure, tree_leaves,
    treedef_children, broadcast_prefix, all_leaves, prefix_errors, keystr,
    PyTreeDef, none_leaf_registry as none_lr)
from jax._src.util import (
    HashableFunction, safe_map, safe_zip, wraps,
    distributed_debug_log, split_list, weakref_lru_cache,
    merge_lists, subs_list, fun_name, fun_qual_name)

map, unsafe_map = safe_map, map
zip, unsafe_zip = safe_zip, zip

traceback_util.register_exclusion(__file__)

PjitSharding = Union[GSPMDSharding, UnspecifiedValue, AUTO]
PjitShardingMinusUnspecified = Union[GSPMDSharding, AUTO]
MeshSharding = Union[NamedSharding, UnspecifiedValue, AUTO]
MeshShardingMinusUnspecified = Union[NamedSharding, AUTO]

logger = logging.getLogger(__name__)


def _find_arg_mismatch(arg_list, fails, fun_name):
  mismatched_args_msg = []
  def mismatch(err):
    for name, inp_da, aval in arg_list:
      if err.m_type == pxla.MismatchType.ARG_SHARDING and err.da == inp_da:
        mismatched_args_msg.append(
            f"argument {name} of {fun_name} with shape {aval.str_short()} and "
            f"{err._dev_ids_plat_str}")
        break
  first_err, second_err = fails
  mismatch(first_err)
  mismatch(second_err)
  return mismatched_args_msg


def _device_assignment_mismatch_error(fun_name, fails, args_flat, api_name,
                                      arg_names):
  arg_list = []
  if arg_names is None:
    arg_names = [''] * len(args_flat)
  for a, n in zip(args_flat, arg_names):
    da = (a.sharding._device_assignment
          if getattr(a, 'sharding', None) is not None else None)
    arg_list.append((n, da, shaped_abstractify(a)))

  mismatched_args_msg = _find_arg_mismatch(arg_list, fails, fun_name)

  if len(mismatched_args_msg) == 2:
    first, second = mismatched_args_msg  # pytype: disable=bad-unpacking
    extra_msg = f" Got {first} and {second}"
  elif len(mismatched_args_msg) == 1:
    first, second  = fails
    # Choose the failure left which is not already covered by ARG_SHARDING.
    left = second if first.m_type == pxla.MismatchType.ARG_SHARDING else first
    extra_msg = f" Got {mismatched_args_msg[0]} and{left._str(api_name)}"
  else:
    first, second = fails
    extra_msg = f" Got{first._str(api_name)} and{second._str(api_name)}"
  msg = (f"Received incompatible devices for {api_name}ted computation.{extra_msg}")
  return msg


class PjitInfo(NamedTuple):
  """Things that we know about a jit instance before it is called.

  In other words, this structure contains arguments to jit()/pjit(),
  preprocessed and validated.
  """
  fun_sourceinfo: str | None
  fun_signature: inspect.Signature | None
  # Shardings, as specified by the user. These can either be UNSPECIFIED or they
  # can be a tree (prefix) of shardings or None.
  user_specified_in_shardings: bool
  in_shardings_treedef: PyTreeDef
  in_shardings_leaves: tuple[Any, ...]
  out_shardings_treedef: PyTreeDef
  out_shardings_leaves: tuple[Any, ...]
  in_layouts_treedef: PyTreeDef
  in_layouts_leaves: tuple[Any, ...]
  out_layouts_treedef: PyTreeDef
  out_layouts_leaves: tuple[Any, ...]
  static_argnums: tuple[int, ...]
  static_argnames: tuple[str, ...]
  donate_argnums: tuple[int, ...]
  donate_argnames: tuple[str, ...]
  device: xc.Device | None
  backend: str | None
  keep_unused: bool
  inline: bool
  abstracted_axes: Any | None
  use_resource_env: bool  # False for jit, True for pjit

  # Hash and compare PjitInfo by identity when used as a cache key.
  def __hash__(self):
    return id(self)

  def __eq__(self, other):
    return self is other


def _python_pjit_helper(fun, jit_info, *args, **kwargs):
  p, args_flat = _infer_params(fun, jit_info, args, kwargs)

  for arg in args_flat:
    dispatch.check_arg(arg)

  if p.attrs_tracked:
    init_states = _get_states(p.attrs_tracked)
    args_flat = [*init_states, *args_flat]

  try:
    out_flat = pjit_p.bind(*args_flat, **p.params)
  except pxla.DeviceAssignmentMismatchError as e:
    fails, = e.args
    api_name = 'jit' if p.params['resource_env'] is None else 'pjit'
    fun_name = getattr(fun, '__qualname__', getattr(fun, '__name__', str(fun)))
    msg = _device_assignment_mismatch_error(
        fun_name, fails, args_flat, api_name, p.arg_names)
    raise ValueError(msg) from None
  except xla.InvalidInputException as e:
    arg_names = [''] * len(args_flat) if p.arg_names is None else p.arg_names
    # Run canonicalization again to figure out which arg failed.
    if p.params['jaxpr'].consts:
      raise TypeError(e.args[0]) from e
    else:
      for arg, name, aval in zip(args_flat, arg_names, p.in_avals):
        try:
          xla.canonicalize_dtype(arg)
        except xla.InvalidInputException as _:
          # Reraise as TypeError with the new message.
          raise TypeError(
              f"Argument '{name}' of shape {aval.str_short()} of type"
              f' {type(arg)} is not a valid JAX type.') from e
      raise AssertionError("Unreachable") from e

  if p.attrs_tracked:
    num_states_out = sum(end_tree.num_leaves for _, end_tree, _ in p.attrs_tracked)
    final_states, out_flat = split_list(out_flat, [num_states_out])
    _set_states(p.attrs_tracked, final_states)

  outs = tree_unflatten(p.out_tree, out_flat)
  return outs, out_flat, p.out_tree, args_flat, p.params['jaxpr'], p.attrs_tracked


def _set_states(attrs_tracked, vals):
  from jax.experimental.attrs import jax_setattr
  valss = split_list(vals, [td.num_leaves for _, td, _ in attrs_tracked[:-1]])
  for ((_, treedef, (obj, attr)), leaves) in zip(attrs_tracked, valss):
    val = tree_unflatten(treedef, leaves)
    jax_setattr(obj, attr, val)

def _get_states(attrs_tracked):
  from jax.experimental.attrs import jax_getattr
  vals = []
  for treedef, _, (obj, attr) in attrs_tracked:
    tree = jax_getattr(obj, attr)
    leaves, treedef_ = tree_flatten(tree)
    assert treedef == treedef_
    vals.extend(leaves)
  return vals

def _need_to_rebuild_with_fdo(pgle_profiler):
  return (pgle_profiler is not None and pgle_profiler.is_enabled()
          and not pgle_profiler.is_fdo_consumed())

def _get_fastpath_data(
    executable, out_tree, args_flat, out_flat, attrs_tracked, effects,
    consts, abstracted_axes, pgle_profiler
) -> pxla.MeshExecutableFastpathData | None:
  out_reflattened, out_tree = pxla.reflatten_outputs_for_dispatch(out_tree, out_flat)

  use_fastpath = (
      executable is not None
      and isinstance(executable, pxla.MeshExecutable)
      and isinstance(executable.unsafe_call, pxla.ExecuteReplicated)
      # No effects in computation
      and not executable.unsafe_call.ordered_effects
      and not executable.unsafe_call.has_unordered_effects
      and not executable.unsafe_call.has_host_callbacks
      and all(isinstance(x, xc.ArrayImpl) for x in out_reflattened)
      and abstracted_axes is None
      # no attr state effects
      and not attrs_tracked
      # no ref state effects
      and not any(isinstance(e, RefEffect) for e in effects)
      # no prng reuse checking
      and not (config.debug_key_reuse.value and any(
        hasattr(arg, 'dtype') and dtypes.issubdtype(arg.dtype, dtypes.prng_key)
        for arg in (*args_flat, *out_flat, *consts)))
      and not _need_to_rebuild_with_fdo(pgle_profiler)
      )

  if use_fastpath:
    out_avals = [o.aval for o in out_reflattened]
    out_committed = [o._committed for o in out_reflattened]
    kept_var_bitvec = [i in executable._kept_var_idx
                       for i in range(len(args_flat))]
    in_shardings = [
        sharding_impls.physical_sharding(a, s)
        if a is not core.abstract_token and dtypes.issubdtype(a.dtype, dtypes.extended)
        else s
        for s, a in zip(executable._in_shardings, executable.in_avals)
    ]
    fastpath_data = pxla.MeshExecutableFastpathData(
        executable.xla_executable, out_tree, in_shardings,
        executable._out_shardings, out_avals, out_committed, kept_var_bitvec,
        executable._dispatch_in_layouts)
  else:
    fastpath_data = None
  return fastpath_data


class _MostRecentPjitCallExecutable(threading.local):
  def __init__(self):
    self.weak_key_dict = weakref.WeakKeyDictionary()
    self.weak_pgle_profiler_dict = weakref.WeakKeyDictionary()

_most_recent_pjit_call_executable = _MostRecentPjitCallExecutable()


def _read_most_recent_pjit_call_executable(jaxpr):
  return _most_recent_pjit_call_executable.weak_key_dict.get(jaxpr, None)


def _read_pgle_profiler(jaxpr):
  return _most_recent_pjit_call_executable.weak_pgle_profiler_dict.get(jaxpr, None)

def _cpp_pjit_evict_fn(self):
  self._clear_cache()
  _create_pjit_jaxpr.evict_function(self._fun)  # pytype: disable=attribute-error
  _infer_params_cached.cache_clear()


# The entries are doubled here from the default 4096 because _pjit_call_impl
# also has a cpp dispatch path and that would double the number of entries in
# the global shared cache.
# This cache is only used for jit's with only fun. For example: jax.jit(f)
_cpp_pjit_cache_fun_only = xc._xla.PjitFunctionCache(capacity=8192)

# This cache is used for jit where extra arguments are defined other than the
# fun. For example: jax.jit(f, donate_argnums=...) OR
# jax.jit(f, out_shardings=...), etc. We don't use the same cache because the
# capacity might get full very fast because of all the jitted function in JAX
# which might evict train_step for example.
_cpp_pjit_cache_explicit_attributes = xc._xla.PjitFunctionCache(capacity=8192)


if xla_extension_version < 286:
  def _get_cpp_global_cache(pjit_has_explicit_sharding):
    if pjit_has_explicit_sharding:
      return xc._xla.PjitFunctionCache()
    else:
      return _cpp_pjit_cache_fun_only

  def _pjit_explicit_sharding_and_layout(
    in_shardings_flat, out_shardings_flat, in_layouts_flat, out_layouts_flat,
    device, backend) -> bool:
    return (device is not None or
            backend is not None or
            any(not is_unspecified(i) for i in in_shardings_flat) or
            any(not is_unspecified(o) for o in out_shardings_flat) or
            any(i is not None for i in in_layouts_flat) or
            any(o is not None for o in out_layouts_flat))
else:
  def _get_cpp_global_cache(contains_explicit_attributes: bool):  # type: ignore
    if contains_explicit_attributes:
      return _cpp_pjit_cache_explicit_attributes
    else:
      return _cpp_pjit_cache_fun_only


def _cpp_pjit(fun: Callable, jit_info: PjitInfo):

  @api_boundary
  def cache_miss(*args, **kwargs):
    if config.no_tracing.value:
      raise RuntimeError(f"re-tracing function {jit_info.fun_sourceinfo} for "
                         "`jit`, but 'no_tracing' is set")
    outs, out_flat, out_tree, args_flat, jaxpr, attrs_tracked = _python_pjit_helper(
        fun, jit_info, *args, **kwargs)
    executable = _read_most_recent_pjit_call_executable(jaxpr)
    pgle_profiler = _read_pgle_profiler(jaxpr)
    maybe_fastpath_data = _get_fastpath_data(
        executable, out_tree, args_flat, out_flat, attrs_tracked, jaxpr.effects,
        jaxpr.consts, jit_info.abstracted_axes,
        pgle_profiler)

    return outs, maybe_fastpath_data, _need_to_rebuild_with_fdo(pgle_profiler)

  if xla_extension_version >= 286:
    cache_key = pxla.JitGlobalCppCacheKeys(
        donate_argnums=jit_info.donate_argnums,
        donate_argnames=jit_info.donate_argnames,
        device=jit_info.device, backend=jit_info.backend,
        in_shardings_treedef=jit_info.in_shardings_treedef,
        in_shardings_leaves=jit_info.in_shardings_leaves,
        out_shardings_treedef=jit_info.out_shardings_treedef,
        out_shardings_leaves=jit_info.out_shardings_leaves,
        in_layouts_treedef=jit_info.in_layouts_treedef,
        in_layouts_leaves=jit_info.in_layouts_leaves,
        out_layouts_treedef=jit_info.out_layouts_treedef,
        out_layouts_leaves=jit_info.out_layouts_leaves,
        use_resource_env=jit_info.use_resource_env)
    cpp_pjit_f = xc._xla.pjit(
        fun_name(fun), fun, cache_miss, jit_info.static_argnums,
        jit_info.static_argnames, cache_key, tree_util.dispatch_registry,  # type: ignore
        pxla.cc_shard_arg,
        _get_cpp_global_cache(cache_key.contains_explicit_attributes))
  else:
    has_explicit_sharding = _pjit_explicit_sharding_and_layout(
        jit_info.in_shardings_leaves, jit_info.out_shardings_leaves,
        jit_info.in_layouts_leaves, jit_info.out_layouts_leaves,
        jit_info.device, jit_info.backend)
    cpp_pjit_f = xc._xla.pjit(
        fun_name(fun), fun, cache_miss, jit_info.static_argnums,
        jit_info.static_argnames, jit_info.donate_argnums,
        tree_util.dispatch_registry, pxla.cc_shard_arg,
        _get_cpp_global_cache(has_explicit_sharding))

  cpp_pjitted_f = wraps(fun)(cpp_pjit_f)
  cpp_pjitted_f._fun = fun
  type(cpp_pjitted_f).clear_cache = _cpp_pjit_evict_fn
  return cpp_pjitted_f


def _split_layout_and_sharding(entries):
  entries_flat, treedef = tree_flatten(entries, is_leaf=lambda x: x is None)
  layouts, shardings = [], []

  for e in entries_flat:
    if isinstance(e, Layout):
      layouts.append(e.device_local_layout)
      shardings.append(e.sharding)
    elif isinstance(e, (DeviceLocalLayout, AutoLayout)):
      raise ValueError(
          '`jax.jit` does not accept device-local layouts directly. Create '
          'a `Layout` instance wrapping this device-local layout and pass '
          f'that to `jit` instead. Got {e}')
    else:
      layouts.append(None)
      shardings.append(e)

  assert len(layouts) == len(shardings)
  return tree_unflatten(treedef, layouts), tree_unflatten(treedef, shardings)


def _parse_jit_arguments(fun: Callable, in_shardings: Any, out_shardings: Any,
                         donate_argnums: int | Sequence[int] | None,
                         donate_argnames: str | Iterable[str] | None,
                         static_argnums: int | Sequence[int] | None,
                         static_argnames: str | Iterable[str] | None,
                         device: xc.Device | None, backend: str | None,
                         abstracted_axes: Any | None, keep_unused: bool,
                         inline: bool, use_resource_env: bool) -> PjitInfo:
  """Parses the arguments to jit/pjit.

  Performs any preprocessing and validation of the arguments that we can do
  ahead of time before the jit()-ed function is invoked.
  """
  if abstracted_axes and not config.dynamic_shapes.value:
    raise ValueError("abstracted_axes must be used with --jax_dynamic_shapes")

  check_callable(fun)

  if backend is not None or device is not None:
    warnings.warn(
        'backend and device argument on jit is deprecated. You can use'
        ' `jax.device_put(..., jax.local_devices("cpu")[0])` on the inputs to'
        ' the jitted function to get the same behavior.', DeprecationWarning)
    if device is not None and backend is not None:
      raise ValueError("can't specify both a device and a backend for jit, "
                       f"got {device=} and {backend=}")
    if in_shardings is not None and not is_unspecified(in_shardings):
      raise ValueError('If backend or device is specified on jit, then '
                       'in_shardings should not be specified.')
    if out_shardings is not None and not is_unspecified(out_shardings):
      raise ValueError('If backend or device is specified on jit, then '
                       'out_shardings should not be specified.')

  if isinstance(in_shardings, list):
    # To be a tree prefix of the positional args tuple, in_axes can never be a
    # list: if in_axes is not a leaf, it must be a tuple of trees. However,
    # in cases like these users expect tuples and lists to be treated
    # essentially interchangeably, so we canonicalize lists to tuples here
    # rather than raising an error. https://github.com/jax-ml/jax/issues/2367
    in_shardings = tuple(in_shardings)

  in_layouts, in_shardings = _split_layout_and_sharding(in_shardings)
  out_layouts, out_shardings = _split_layout_and_sharding(out_shardings)

  in_shardings = prepare_axis_resources(in_shardings, 'in_shardings')
  out_shardings = prepare_axis_resources(out_shardings, 'out_shardings')

  user_specified_in_shardings = (in_shardings is not None and
                                 not is_unspecified(in_shardings))

  in_shardings_leaves, in_shardings_treedef = none_lr.flatten(in_shardings)
  out_shardings_leaves, out_shardings_treedef = none_lr.flatten(out_shardings)
  in_layouts_leaves, in_layouts_treedef = none_lr.flatten(in_layouts)
  out_layouts_leaves, out_layouts_treedef = none_lr.flatten(out_layouts)

  fun_sourceinfo = api_util.fun_sourceinfo(fun)
  fun_signature = api_util.fun_signature(fun)

  donate_argnums, donate_argnames, static_argnums, static_argnames = resolve_argnums(
      fun, fun_signature, donate_argnums, donate_argnames, static_argnums,
      static_argnames)

  return PjitInfo(
        fun_sourceinfo=fun_sourceinfo,
        fun_signature=fun_signature,
        user_specified_in_shardings=user_specified_in_shardings,
        in_shardings_treedef=in_shardings_treedef,
        in_shardings_leaves=tuple(in_shardings_leaves),
        out_shardings_treedef=out_shardings_treedef,
        out_shardings_leaves=tuple(out_shardings_leaves),
        in_layouts_treedef=in_layouts_treedef,
        in_layouts_leaves=tuple(in_layouts_leaves),
        out_layouts_treedef=out_layouts_treedef,
        out_layouts_leaves=tuple(out_layouts_leaves),
        static_argnums=static_argnums,
        static_argnames=static_argnames, donate_argnums=donate_argnums,
        donate_argnames=donate_argnames, device=device, backend=backend,
        keep_unused=keep_unused, inline=inline,
        abstracted_axes=abstracted_axes,
        use_resource_env=use_resource_env)


def _make_jit_wrapper(fun: Callable, jit_info: PjitInfo):

  @api_boundary
  def lower(*args, **kwargs):
    return trace(*args, **kwargs).lower()

  @api_boundary
  def eval_shape(*args, **kwargs):
    p, _ = _infer_params(fun, jit_info, args, kwargs)
    out_s = [None if is_unspecified(s) else s for s in p.params['out_shardings']]
    # TODO(yashkatariya): Add `Layout` to SDS.
    out = [api.ShapeDtypeStruct(x.shape, x.dtype, sharding=s,
                                weak_type=x.weak_type)
           for x, s in zip(p.params['jaxpr'].out_avals, out_s)]
    return tree_unflatten(p.out_tree, out)

  @api_boundary
  def trace(*args, **kwargs) -> stages.Traced:
    p, args_flat = _infer_params(fun, jit_info, args, kwargs)
    donate_argnums = tuple(i for i, d in enumerate(p.donated_invars) if d)
    args_info = stages.make_args_info(p.in_tree, p.in_avals, donate_argnums)
    lower_callable = partial(_resolve_and_lower, args_flat, **p.params,
                             pgle_profiler=None)
    return stages.Traced(
        p.params['jaxpr'], args_info, p.params["name"], p.out_tree,
        lower_callable, args_flat, p.arg_names, p.num_consts)

  wrapped = _cpp_pjit(fun, jit_info)
  wrapped.lower = lower
  wrapped.eval_shape = eval_shape
  wrapped.trace = trace
  return wrapped


def make_jit(fun: Callable, in_shardings: Any, out_shardings: Any,
             donate_argnums: int | Sequence[int] | None,
             donate_argnames: str | Iterable[str] | None,
             static_argnums: int | Sequence[int] | None,
             static_argnames: str | Iterable[str] | None,
             device: xc.Device | None, backend: str | None,
             abstracted_axes: Any | None, keep_unused: bool,
             inline: bool, use_resource_env: bool) -> Any:
  """jit() and pjit() are thin wrappers around this function."""
  jit_info = _parse_jit_arguments(
        fun, in_shardings, out_shardings, donate_argnums, donate_argnames,
        static_argnums, static_argnames, device, backend, abstracted_axes,
        keep_unused, inline, use_resource_env)
  return _make_jit_wrapper(fun, jit_info)


class PjitParams(NamedTuple):
  consts: list[Any]  # Only jaxpr constants, we can't keep other arguments alive
  params: dict[str, Any]
  in_avals: tuple[core.AbstractValue, ...]
  in_tree: PyTreeDef
  out_tree: PyTreeDef
  donated_invars: tuple[bool, ...]
  arg_names: tuple[str, ...] | None
  num_consts: int
  attrs_tracked: list[tuple[PyTreeDef, PyTreeDef, tuple[Any, str]]]


def _infer_params_impl(
    fun: Callable,
    ji: PjitInfo,
    pjit_mesh: mesh_lib.Mesh | None,
    resource_env: mesh_lib.ResourceEnv | None,
    args: tuple[Any, ...],
    kwargs: dict[str, Any],
    in_avals: tuple[core.AbstractValue, ...] | None,
) -> tuple[PjitParams, list[Any]]:
  have_kwargs = bool(kwargs)
  if have_kwargs and ji.user_specified_in_shardings:
    raise ValueError(
        "pjit does not support kwargs when in_shardings is specified.")

  if pjit_mesh is not None:
    jit_name = 'pjit'
    if (ji.backend or ji.device) and not pjit_mesh.empty:
      raise ValueError(
          "Mesh context manager should not be used with jit when backend or "
          "device is also specified as an argument to jit.")
  else:
    jit_name = 'jit'

  axes_specs = _flat_axes_specs(ji.abstracted_axes, *args, **kwargs)

  dbg = debug_info(jit_name, ji.fun_sourceinfo, ji.fun_signature, args, kwargs,
                   ji.static_argnums, ji.static_argnames)
  f = lu.wrap_init(fun)
  f, res_paths = result_paths(f)
  f, dyn_args = argnums_partial_except(f, ji.static_argnums, args, allow_invalid=True)
  del args

  f, dyn_kwargs = argnames_partial_except(f, ji.static_argnames, kwargs)
  explicit_args, in_tree = tree_flatten((dyn_args, dyn_kwargs))
  flat_fun, out_tree = flatten_fun(f, in_tree)
  flat_fun, explicit_args = hoist_obj_attrs(flat_fun, explicit_args)

  if (ji.donate_argnums or ji.donate_argnames) and not config.debug_nans.value:
    donated_invars = donation_vector(ji.donate_argnums, ji.donate_argnames, in_tree)
  else:
    donated_invars = (False,) * len(explicit_args)

  # If backend or device is set as an arg on jit, then resolve them to
  # in_shardings and out_shardings as if user passed in in_shardings
  # and out_shardings.
  device_or_backend_set = bool(ji.backend or ji.device)
  if device_or_backend_set:
    sharding = _create_sharding_with_device_backend(ji.device, ji.backend)
    leaves, treedef = tree_flatten(sharding)
    in_shardings_leaves = out_shardings_leaves = tuple(leaves)
    in_shardings_treedef = out_shardings_treedef = treedef
  else:
    in_shardings_leaves = tuple(
        _create_sharding_for_array(pjit_mesh, x, 'in_shardings', jit_name)
        for x in ji.in_shardings_leaves)
    in_shardings_treedef = ji.in_shardings_treedef
    out_shardings_leaves = tuple(
        _create_sharding_for_array(pjit_mesh, x, 'out_shardings', jit_name)
        for x in ji.out_shardings_leaves)
    out_shardings_treedef = ji.out_shardings_treedef

  assert None not in in_shardings_leaves
  assert None not in out_shardings_leaves

  in_type: core.InputType | tuple[core.AbstractValue, ...]
  if config.dynamic_shapes.value:
    in_type = pe.infer_lambda_input_type(axes_specs, explicit_args)
    in_avals = tuple(a for a, e in in_type if e)
  elif in_avals is None:
    avals = []
    for i, a in enumerate(explicit_args):
      try:
        avals.append(shaped_abstractify(a))
      except OverflowError as e:
        arg_path = (f"argument path is {dbg.arg_names[i]}" if dbg
                    else f"flattened argument number is {i}")
        raise OverflowError(
          "An overflow was encountered while parsing an argument to a jitted "
          f"computation, whose {arg_path}."
        ) from e
    in_type = in_avals = tuple(avals)
  else:
    in_type = in_avals

  in_shardings_flat, in_layouts_flat = _process_in_axis_resources(
      in_shardings_treedef, in_shardings_leaves,
      ji.in_layouts_treedef, ji.in_layouts_leaves,
      in_avals, in_tree, dbg, device_or_backend_set, have_kwargs)

  attr_token = _attr_token(flat_fun, in_type)
  jaxpr, consts, out_avals, attrs_tracked = _create_pjit_jaxpr(
      flat_fun, in_type, attr_token, dbg,
      HashableFunction(res_paths, closure=()),
      IgnoreKey(ji.inline))
  _attr_update(flat_fun, in_type, attr_token, attrs_tracked)

  out_shardings_flat, out_layouts_flat = _check_and_canonicalize_out_shardings(
      out_shardings_treedef, out_shardings_leaves, ji.out_layouts_treedef,
      ji.out_layouts_leaves, HashableFunction(out_tree, closure=()),
      tuple(out_avals), jaxpr.jaxpr.debug_info, device_or_backend_set)

  assert len(explicit_args) == len(in_shardings_flat) == len(in_layouts_flat)

  if config.dynamic_shapes.value:
    implicit_args = _extract_implicit_args(
        cast(core.InputType, in_type), explicit_args)
  else:
    implicit_args = []
  args_flat = [*implicit_args, *explicit_args]

  num_states_in = sum(init_tree.num_leaves for init_tree, _, _ in attrs_tracked)
  num_extra_args = len(implicit_args) + num_states_in + len(consts)
  in_shardings_flat = (UNSPECIFIED,) * num_extra_args + in_shardings_flat
  in_layouts_flat = (None,) * num_extra_args + in_layouts_flat
  donated_invars = (False,) * num_extra_args + donated_invars
  assert (len(in_shardings_flat) == len(in_layouts_flat) ==
          len(donated_invars) == num_states_in + len(consts) + len(args_flat))

  params = dict(
      jaxpr=jaxpr,
      in_shardings=in_shardings_flat,
      out_shardings=out_shardings_flat,
      in_layouts=in_layouts_flat,
      out_layouts=out_layouts_flat,
      resource_env=resource_env,
      donated_invars=donated_invars,
      name=fun_qual_name(flat_fun),
      keep_unused=ji.keep_unused,
      inline=ji.inline,
  )
  return PjitParams(consts, params, in_avals, in_tree, out_tree(),
                    donated_invars, dbg.arg_names if dbg else None, len(consts),
                    attrs_tracked), args_flat


class InferParamsCacheEntry:
  """Mutable value object for _infer_params_cached."""
  __slots__ = ['pjit_params']

  pjit_params: PjitParams | None

  def __init__(self):
    self.pjit_params = None


# We use an outer cache that is keyed on the signature of the arguments, but
# when populating a cache entry using _infer_params_impl, we need to provide
# actual arguments. In principle we could refactor _infer_params_impl to look
# only at an argument signature instead of args/kwargs in those cases that we
# cache, but this was a more minimal change.
@util.weakref_lru_cache
def _infer_params_cached(
    fun: Callable,
    jit_info: PjitInfo,
    signature: jax_jit.ArgumentSignature,
    in_avals: tuple[core.AbstractValue, ...],
    pjit_mesh: mesh_lib.Mesh | None,
    resource_env: mesh_lib.ResourceEnv | None,
) -> InferParamsCacheEntry:
  return InferParamsCacheEntry()


def _infer_params(
    fun: Callable, ji: PjitInfo, args: tuple[Any, ...], kwargs: dict[str, Any]
) -> tuple[PjitParams, list[Any]]:
  if ji.use_resource_env:
    # We need to fetch the mesh from inside the wrapped function, because
    # meshes are dynamically scoped (i.e., with a context manager).
    resource_env = mesh_lib.thread_resources.env
    pjit_mesh = resource_env.physical_mesh
  else:
    resource_env = None
    pjit_mesh = None

  skip_cache = config.dynamic_shapes.value
  if not skip_cache:
    signature, dynargs = jax_jit.parse_arguments(
        args, tuple(kwargs.values()), tuple(kwargs.keys()), ji.static_argnums,
        ji.static_argnames, tree_util.default_registry)
    try:
      avals = tuple(shaped_abstractify(a) for a in dynargs)
    except (OverflowError, TypeError):
      # If we see something we don't understand, use the slow path.
      skip_cache = True

  if skip_cache:
    p, args_flat = _infer_params_impl(fun, ji, pjit_mesh, resource_env, args,
                                      kwargs, in_avals=None)
    return p, p.consts + args_flat

  entry = _infer_params_cached(
      fun, ji, signature, avals, pjit_mesh, resource_env)
  if entry.pjit_params is None:
    p, args_flat = _infer_params_impl(
        fun, ji, pjit_mesh, resource_env, args, kwargs, in_avals=avals)
    if p.attrs_tracked:
      # If there are attrs_tracked, don't use the cache.
      return p, p.consts + args_flat
    else:
      entry.pjit_params = p
  return entry.pjit_params, entry.pjit_params.consts + dynargs


def _extract_implicit_args(
  in_type: Sequence[tuple[core.AbstractValue, bool]],
  explicit_args: Sequence[Any]
) -> Sequence[core.Tracer]:
  """
  Given an input type and explicitly-passed arguments (per the user-facing API
  calling convention), extract implicit axis size arguments from shapes of
  explicit arguments (for the trace-time / jaxpr-level calling convention).
  """
  # First, using `in_type` construct a list to represent the full argument list,
  # leaving the implicit arguments as None placeholders for now.
  explicit_args_ = iter(explicit_args)
  args = [next(explicit_args_) if expl else None for _, expl in in_type]
  assert next(explicit_args_, None) is None
  del explicit_args, explicit_args_

  # Next, populate the implicit arguments using the DBIdxs in `in_type`.
  for i, (aval, explicit) in enumerate(in_type):
    if not explicit or not isinstance(aval, core.DShapedArray):
      continue  # can't populate an implicit argument
    arg = args[i]
    assert arg is not None
    for d1, d2 in zip(aval.shape, arg.aval.shape):
      if isinstance(d1, core.DBIdx):
        if args[d1.val] is None:
          args[d1.val] = d2
        assert core.same_referent(args[d1.val], d2)
  assert all(x is not None for x in args)
  return [x for x, (_, e) in zip(args, in_type) if not e]  # pytype: disable=bad-return-type

def _flat_axes_specs(abstracted_axes, *args, **kwargs
                     ) -> list[pe.AbstractedAxesSpec] | None:
  if abstracted_axes is None: return None
  if kwargs: raise NotImplementedError
  def ax_leaf(l):
    return (isinstance(l, dict) and all_leaves(l.values()) or
            isinstance(l, tuple) and all_leaves(l, lambda x: x is None))
  return broadcast_prefix(abstracted_axes, args, ax_leaf)


class JitWrapped(stages.Wrapped):

  def eval_shape(self, *args, **kwargs):
    """See ``jax.eval_shape``."""
    raise NotImplementedError

  def trace(self, *args, **kwargs) -> stages.Traced:
    raise NotImplementedError


# in_shardings and out_shardings can't be None as the default value
# because `None` means that the input is fully replicated.
def pjit(
    fun: Callable,
    in_shardings=UNSPECIFIED,
    out_shardings=UNSPECIFIED,
    static_argnums: int | Sequence[int] | None = None,
    static_argnames: str | Iterable[str] | None = None,
    donate_argnums: int | Sequence[int] | None = None,
    donate_argnames: str | Iterable[str] | None = None,
    keep_unused: bool = False,
    device: xc.Device | None = None,
    backend: str | None = None,
    inline: bool = False,
    abstracted_axes: Any | None = None,
) -> JitWrapped:
  """Makes ``fun`` compiled and automatically partitioned across multiple devices.

  NOTE: This function is now equivalent to jax.jit please use that instead.
  The returned function has semantics equivalent to those of ``fun``, but is
  compiled to an XLA computation that runs across multiple devices
  (e.g. multiple GPUs or multiple TPU cores). This can be useful if the jitted
  version of ``fun`` would not fit in a single device's memory, or to speed up
  ``fun`` by running each operation in parallel across multiple devices.

  The partitioning over devices happens automatically based on the
  propagation of the input partitioning specified in ``in_shardings`` and
  the output partitioning specified in ``out_shardings``. The resources
  specified in those two arguments must refer to mesh axes, as defined by
  the :py:func:`jax.sharding.Mesh` context manager. Note that the mesh
  definition at :func:`~pjit` application time is ignored, and the returned function
  will use the mesh definition available at each call site.

  Inputs to a :func:`~pjit`'d function will be automatically partitioned across devices
  if they're not already correctly partitioned based on ``in_shardings``.
  In some scenarios, ensuring that the inputs are already correctly pre-partitioned
  can increase performance. For example, if passing the output of one
  :func:`~pjit`'d function to another :func:`~pjit`’d function (or the same
  :func:`~pjit`’d function in a loop), make sure the relevant
  ``out_shardings`` match the corresponding ``in_shardings``.

  .. note::
    **Multi-process platforms:** On multi-process platforms such as TPU pods,
    :func:`~pjit` can be used to run computations across all available devices across
    processes. To achieve this, :func:`~pjit` is designed to be used in SPMD Python
    programs, where every process is running the same Python code such that all
    processes run the same :func:`~pjit`'d function in the same order.

    When running in this configuration, the mesh should contain devices across
    all processes. All inputs arguments must be globally shaped.
    ``fun`` will still be executed across *all* devices in the mesh,
    including those from other processes, and will be given a global view of the
    data spread across multiple processes as a single array.

    The SPMD model also requires that the same multi-process :func:`~pjit`'d
    functions must be run in the same order on all processes, but they can be
    interspersed with arbitrary operations running in a single process.

  Args:
    fun: Function to be compiled. Should be a pure function, as side-effects may
      only be executed once. Its arguments and return value should be arrays,
      scalars, or (nested) standard Python containers (tuple/list/dict) thereof.
      Positional arguments indicated by ``static_argnums`` can be anything at
      all, provided they are hashable and have an equality operation defined.
      Static arguments are included as part of a compilation cache key, which is
      why hash and equality operators must be defined.
    in_shardings: Pytree of structure matching that of arguments to ``fun``,
      with all actual arguments replaced by resource assignment specifications.
      It is also valid to specify a pytree prefix (e.g. one value in place of a
      whole subtree), in which case the leaves get broadcast to all values in
      that subtree.

      The ``in_shardings`` argument is optional. JAX will infer the shardings
      from the input :py:class:`jax.Array`'s, and defaults to replicating the input
      if the sharding cannot be inferred.

      The valid resource assignment specifications are:

      - :py:class:`Sharding`, which will decide how the value
        will be partitioned. With this, using a mesh context manager is not
        required.
      - :py:obj:`None` is a special case whose semantics are:
          - if the mesh context manager is *not* provided, JAX has the freedom to
            choose whatever sharding it wants.
            For in_shardings, JAX will mark is as replicated but this behavior
            can change in the future.
            For out_shardings, we will rely on the XLA GSPMD partitioner to
            determine the output shardings.
          - If the mesh context manager is provided, None will imply that the
            value will be replicated on all devices of the mesh.
      - For backwards compatibility, in_shardings still supports ingesting
        :py:class:`PartitionSpec`. This option can *only* be used with the
        mesh context manager.

        - :py:class:`PartitionSpec`, a tuple of length at most equal to the rank
          of the partitioned value. Each element can be a :py:obj:`None`, a mesh
          axis or a tuple of mesh axes, and specifies the set of resources assigned
          to partition the value's dimension matching its position in the spec.

      The size of every dimension has to be a multiple of the total number of
      resources assigned to it.
    out_shardings: Like ``in_shardings``, but specifies resource
      assignment for function outputs.
      The ``out_shardings`` argument is optional. If not specified, :py:func:`jax.jit`
      will use GSPMD's sharding propagation to determine how to shard the outputs.
    static_argnums: An optional int or collection of ints that specify which
      positional arguments to treat as static (compile-time constant).
      Operations that only depend on static arguments will be constant-folded in
      Python (during tracing), and so the corresponding argument values can be
      any Python object.

      Static arguments should be hashable, meaning both ``__hash__`` and
      ``__eq__`` are implemented, and immutable. Calling the jitted function
      with different values for these constants will trigger recompilation.
      Arguments that are not arrays or containers thereof must be marked as
      static.

      If ``static_argnums`` is not provided, no arguments are treated as static.
    static_argnames: An optional string or collection of strings specifying
      which named arguments to treat as static (compile-time constant). See the
      comment on ``static_argnums`` for details. If not
      provided but ``static_argnums`` is set, the default is based on calling
      ``inspect.signature(fun)`` to find corresponding named arguments.
    donate_argnums: Specify which positional argument buffers are "donated" to
      the computation. It is safe to donate argument buffers if you no longer
      need them once the computation has finished. In some cases XLA can make
      use of donated buffers to reduce the amount of memory needed to perform a
      computation, for example recycling one of your input buffers to store a
      result. You should not reuse buffers that you donate to a computation, JAX
      will raise an error if you try to. By default, no argument buffers are
      donated.

      If neither ``donate_argnums`` nor ``donate_argnames`` is provided, no
      arguments are donated. If ``donate_argnums`` is not provided but
      ``donate_argnames`` is, or vice versa, JAX uses
      :code:`inspect.signature(fun)` to find any positional arguments that
      correspond to ``donate_argnames``
      (or vice versa). If both ``donate_argnums`` and ``donate_argnames`` are
      provided, ``inspect.signature`` is not used, and only actual
      parameters listed in either ``donate_argnums`` or ``donate_argnames`` will
      be donated.

      For more details on buffer donation see the
      `FAQ <https://jax.readthedocs.io/en/latest/faq.html#buffer-donation>`_.
    donate_argnames: An optional string or collection of strings specifying
      which named arguments are donated to the computation. See the
      comment on ``donate_argnums`` for details. If not
      provided but ``donate_argnums`` is set, the default is based on calling
      ``inspect.signature(fun)`` to find corresponding named arguments.
    keep_unused: If `False` (the default), arguments that JAX determines to be
      unused by `fun` *may* be dropped from resulting compiled XLA executables.
      Such arguments will not be transferred to the device nor provided to the
      underlying executable. If `True`, unused arguments will not be pruned.
    device: This argument is deprecated. Please put your arguments on the
      device you want before passing them to jit.
      Optional, the Device the jitted function will run on. (Available devices
      can be retrieved via :py:func:`jax.devices`.) The default is inherited
      from XLA's DeviceAssignment logic and is usually to use
      ``jax.devices()[0]``.
    backend: This argument is deprecated. Please put your arguments on the
      backend you want before passing them to jit.
      Optional, a string representing the XLA backend: ``'cpu'``, ``'gpu'``, or
      ``'tpu'``.

  Returns:
    A wrapped version of ``fun``, set up for just-in-time compilation and
    automatically partitioned by the mesh available at each call site.

  For example, a convolution operator can be automatically partitioned over
  an arbitrary set of devices by a single :func:`~pjit` application:

  >>> import jax
  >>> import jax.numpy as jnp
  >>> import numpy as np
  >>> from jax.sharding import Mesh, PartitionSpec
  >>> from jax.experimental.pjit import pjit
  >>>
  >>> x = jnp.arange(8, dtype=jnp.float32)
  >>> f = pjit(lambda x: jax.numpy.convolve(x, jnp.asarray([0.5, 1.0, 0.5]), 'same'),
  ...         in_shardings=None, out_shardings=PartitionSpec('devices'))
  >>> with Mesh(np.array(jax.devices()), ('devices',)):
  ...   print(f(x))  # doctest: +SKIP
  [ 0.5  2.   4.   6.   8.  10.  12.  10. ]
  """
  return make_jit(
       fun, in_shardings, out_shardings, donate_argnums, donate_argnames,
       static_argnums, static_argnames, device, backend, abstracted_axes,
       keep_unused, inline, use_resource_env=True)


def hashable_pytree(pytree):
  vals, treedef = tree_flatten(pytree)
  vals = tuple(vals)
  return HashableFunction(lambda: tree_unflatten(treedef, vals),
                          closure=(treedef, vals))


def _create_sharding_for_array(mesh, x, name, api_name):
  if x is None and (mesh is None or mesh.empty):
    return UNSPECIFIED
  if isinstance(x, sharding.Sharding) or is_unspecified_or_auto(x):
    return x
  if mesh is None:
    msg = ('jax.jit only supports `Sharding`s being passed to'
           f' {name}. Looks like you are passing either `PartitionSpec` or `None`'
           f' which is not allowed in jax.jit.\n')
    if name == 'in_shardings':
      msg += (f'Note that {name} argument is optional. JAX will infer the shardings'
              " from the input jax.Array's and will default to replicating the"
              ' input if the sharding cannot be inferred.')
    elif name == 'out_shardings':
      msg += (f'Note that {name} is optional. If not specified, jax.jit will'
              " use GSPMD's sharding propagation to figure out what the sharding"
              ' of the output(s) should be.')
    raise RuntimeError(msg)
  if mesh.empty:
    raise RuntimeError(
        f'{api_name} requires a non-empty mesh if you are passing'
        f' `PartitionSpec`s or `None` to {name}! Is a mesh defined at the call'
        f' site? Alternatively, provide `Sharding`s to {name} and'
        ' then the mesh context manager is not required.')
  # A nice user error is raised in prepare_axis_resources.
  assert x is None or isinstance(x, ParsedPartitionSpec), x
  return (pxla.create_mesh_pspec_sharding(mesh, x) if x is None else
          pxla.create_mesh_pspec_sharding(mesh, x.get_partition_spec(), x))


def _create_sharding_with_device_backend(device, backend):
  if device is not None:
    assert backend is None
    out = SingleDeviceSharding(device)
  elif backend is not None:
    assert device is None
    out = SingleDeviceSharding(xb.get_backend(backend).local_devices()[0])
  else:
    raise AssertionError('Unreachable!')
  out._device_backend = True
  return out


def flatten_axis_resources(what, tree, shardings, tupled_args):
  try:
    return tuple(flatten_axes(what, tree, shardings, tupled_args=tupled_args))
  except ValueError:
    pass  # Raise a tree prefix error below

  # Tree leaves are always valid prefixes, so if there was a prefix error as
  # assumed here, axis_resources must not be a leaf.
  assert not treedef_is_leaf(tree_structure(shardings))

  # Check the type directly rather than using isinstance because of namedtuples.
  if tupled_args and (type(shardings) is not tuple or
                      len(shardings) != len(tree.children())):
    # We know axis_resources is meant to be a tuple corresponding to the args
    # tuple, but while it is a non-leaf pytree, either it wasn't a tuple or it
    # wasn't the right length.
    msg = (f"{what} specification must be a tree prefix of the positional "
           f"arguments tuple passed to the `pjit`-decorated function. In "
           f"particular, {what} must either be a None, a PartitionSpec, or "
           f"a tuple of length equal to the number of positional arguments.")
    # If `tree` represents an args tuple, then `axis_resources` must be a tuple.
    # TODO(mattjj,apaszke): disable implicit list casts, remove 'or list' below
    if type(shardings) is not tuple:
      msg += f" But {what} is not a tuple: got {type(shardings)} instead."
    elif len(shardings) != len(tree.children()):
      msg += (f" But {what} is the wrong length: got a tuple or list of length "
              f"{len(shardings)} for an args tuple of length "
              f"{len(tree.children())}.")

    # As an extra hint, let's check if the user just forgot to wrap
    # shardings in a singleton tuple.
    if len(tree.children()) == 1:
      try: flatten_axes(what, tree, (shardings,))
      except ValueError: pass  # That's not the issue.
      else:
        msg += (f" Given the corresponding argument being "
                f"passed, it looks like {what} might need to be wrapped in "
                f"a singleton tuple.")

    raise ValueError(msg)

  axis_tree = shardings

  # Because we only have the `tree` treedef and not the full pytree here,
  # we construct a dummy tree to compare against. Revise this in callers?
  dummy_tree = tree_unflatten(tree, [PytreeLeaf()] * tree.num_leaves)
  errors = prefix_errors(axis_tree, dummy_tree)
  if errors:
    e = errors[0]  # Only show information about the first disagreement found.
    raise e(what)

  # At this point we've failed to find a tree prefix error.
  assert False, "Please open a bug report!"  # This should be unreachable.

class PytreeLeaf:
  def __repr__(self): return "pytree leaf"


@util.cache(max_size=4096, trace_context_in_key=False)
def _process_in_axis_resources(in_shardings_treedef, in_shardings_leaves,
                               in_layouts_treedef, in_layouts_leaves,
                               in_avals, in_tree, debug_info,
                               device_or_backend_set, kws):
  if not kws:
    in_tree, _ = treedef_children(in_tree)

  orig_in_shardings = tree_unflatten(in_shardings_treedef, in_shardings_leaves)
  # Only do this if original in_shardings are unspecified. If it is AUTO, go
  # via flatten_axis_resources.
  if is_unspecified(orig_in_shardings):
    in_shardings_flat = (orig_in_shardings,) * len(in_avals)
  else:
    in_shardings_flat = flatten_axis_resources(
        "pjit in_shardings", in_tree, orig_in_shardings, tupled_args=True)

  in_layouts = tree_unflatten(in_layouts_treedef, in_layouts_leaves)
  if in_layouts is None:
    in_layouts_flat = (in_layouts,) * len(in_avals)
  else:
    in_layouts_flat = flatten_axis_resources(
        "pjit in_layouts", in_tree, in_layouts, tupled_args=True)

  # TODO(dougalm,mattjj): enable debug info with attrs_tracked
  attrs_tracked = debug_info and len(debug_info.arg_names) != len(in_avals)
  if not config.dynamic_shapes.value and not attrs_tracked:
    pjit_check_aval_sharding(in_shardings_flat, in_avals,
                             None if debug_info is None else debug_info.arg_names,
                             "pjit arguments", allow_uneven_sharding=False)
    check_aval_layout_compatibility(
        in_layouts_flat, in_avals,
        None if debug_info is None else debug_info.arg_names, "jit arguments")
  return in_shardings_flat, in_layouts_flat

callsites: set[str] = set()

def explain_tracing_cache_miss(
    f: Callable, unseen_f: bool, cache: dict, key: tuple):
  if config.check_tracer_leaks.value: return

  def unpack(key):
    transforms, (), _, (in_type, _, debug_info, _, inline), *_, ctx = key
    # TODO(dougalm,mattjj): enable cache miss explanation with attrs
    _, (_, (in_tree,)), *_ = transforms
    return in_tree, in_type, debug_info, inline.val, ctx
  in_tree, in_type, debug_info, inline, ctx = unpack(key)
  if inline: return

  msg: list[str] = []
  p = msg.append
  done = lambda: logger.log(logging.WARNING, '\n'.join(msg))

  callsite = source_info_util.summarize(source_info_util.current())
  p(f"TRACING CACHE MISS at {callsite} because:")

  # have we seen this function before at all?
  fun_name = getattr(f, '__qualname__', f)
  if debug_info is not None and debug_info.func_src_info:
    _, _, *rest = debug_info.func_src_info.split(' ')
    src_info = " defined at "  + ' '.join(rest)
  else:
    src_info = ''
  if unseen_f:
    p(f"  never seen function:\n    {fun_name} id={id(f)}{src_info}")
    if callsite in callsites:
      p("  but seen another function defined on the same line; maybe the function is\n"
        "  being re-defined repeatedly, preventing caching?")
    callsites.add(callsite)
    return done()
  else:
    p(f"  for {fun_name}{src_info}")

  seen_keys = map(unpack, cache.keys())

  # have we maybe switched some args to be kwargs or visa-versa?
  args_tree, kwargs_tree = treedef_children(in_tree)
  args_kwargs_trees = [treedef_children(k) for k, *_ in seen_keys]
  args_kwargs_match = [t for t in args_kwargs_trees
                       if t == [args_tree, kwargs_tree]]
  if not args_kwargs_match:
    num_args = len(treedef_children(args_tree))
    _, kwarg_keys = kwargs_tree.node_data()  # type: ignore
    p(f"  never seen passing {num_args} positional args and {len(kwarg_keys)} "
      "keyword args with keys:\n"
      f"    {', '.join(map(repr, kwarg_keys))}")
    dont_match = [set(t[1].node_data()[1]) for t in args_kwargs_trees  # type: ignore
                  if t != [args_tree, kwargs_tree]]
    close_kwargs = min(
        dont_match, key=set(kwarg_keys).symmetric_difference, default=None
    )
    if not close_kwargs:
      p("  closest seen is passing no keyword args")
    else:
      p(f"  closest seen passes {len(close_kwargs)} keyword args with keys:\n"
        f"    {', '.join(map(repr, close_kwargs))}")
    return done()

  # have we never seen this tracing context before?
  ctxs_match = [c for *_, c in seen_keys if c == ctx]
  if not ctxs_match:
    p("  tracing context doesn't match, e.g. due to config or context manager")
    dont_match = [c for *_, c in seen_keys if c != ctx]
    closest_ctx = min(dont_match, key=lambda c: sum(map(op.ne, c, ctx)))
    idxs = [i for i, (c1, c2) in enumerate(zip(ctx, closest_ctx)) if c1 != c2]
    p("  closest seen context tuple differs at positions:\n"
      f"    {', '.join(map(str, idxs))}\n"
      "  compare to tuple returned by config._trace_context() in jax/_src/config.py.")
    return done()

  # have we never seen this input pytree before?
  trees_match = [k for k in seen_keys if k[0] == in_tree]
  if not trees_match:
    in_tree_str = f':\n    {in_tree}' if len(str(in_tree)) < 76 else ''
    p(f"  never seen input pytree{in_tree_str}")
    dont_match = [t for t, *_ in seen_keys if t != in_tree]
    closest_tree = min(dont_match, key=lambda t: abs(t.num_leaves - in_tree.num_leaves))
    errs = list(tree_util.equality_errors_pytreedef(in_tree, closest_tree))  # type: ignore[arg-type]
    p(f"  closest seen input pytree has {len(errs)} mismatches, including:")
    for path, thing1, thing2, explanation in errs:
      fst, *path = path  # type: ignore
      base = ['args', 'kwargs'][fst.idx]
      p(f"    * at {base}{keystr(tuple(path))}, seen {thing2} but now given {thing1},"
        f"      so {explanation}")
    return done()

  # have we never seen these input types (eg shapes, dtypes) before?
  types_match = [k for k in trees_match if k[1] == in_type]
  if not types_match:
    if len(in_type) < 5:
      in_type_str = ':\n    {}'.format(',  '.join(
          f'{n}: {ty.str_short(short_dtypes=True)}'
          for n, ty in zip(debug_info.arg_names, in_type)))
    else:
      in_type_str = ''
    p(f"  never seen input type signature{in_type_str}")
    dont_match = [t for _, t, *_ in trees_match if t != in_type]
    closest_ty = min(dont_match, key=lambda t: sum(map(op.ne, t, in_type)))
    num_mismatch = sum(map(op.ne, closest_ty, in_type))
    p(f"  closest seen input type signature has {num_mismatch} mismatches, including:")
    add_weak_type_hint = False
    for name, ty1, ty2 in zip(debug_info.arg_names, closest_ty, in_type):
      if ty1 != ty2:
        if type(ty1) == type(ty2) == core.ShapedArray:
          s1, s2 = ty1.str_short(True), ty2.str_short(True)
          if s1 == s2:  # weak types don't show up in str_short()
            assert ty1.weak_type ^ ty2.weak_type
            s1 += f'{{weak_type={ty1.weak_type}}}'
            s2 += f'{{weak_type={ty2.weak_type}}}'
            add_weak_type_hint = True
        else:
          s1, s2 = str(ty1), str(ty2)
        p(f"    * at {name}, seen {s1}, but now given {s2}")
    if add_weak_type_hint:
      p('where weak_type=True often means a Python builtin numeric value, and ')
      p('weak_type=False means a jax.Array.')
      p('See https://jax.readthedocs.io/en/latest/type_promotion.html#weak-types')
    return done()

  # we think this is unreachable...
  p("explanation unavailable! please open an issue at https://github.com/jax-ml/jax")
  return done()

@partial(lu.cache, explain=explain_tracing_cache_miss)
def _create_pjit_jaxpr(
    fun: lu.WrappedFun,
    in_type: core.InputType | Sequence[core.AbstractValue],
    attr_data: int,
    debug_info: lu.TracingDebugInfo,
    out_paths: Callable,
    ignored_inline: IgnoreKey
) -> tuple[core.ClosedJaxpr, list[Any], list[core.AbstractValue],
           list[tuple[PyTreeDef, PyTreeDef, tuple[Any, str]]]]:
  del ignored_inline  # just for explain_cache_miss
  with dispatch.log_elapsed_time(
      "Finished tracing + transforming {fun_name} for pjit in {elapsed_time:.9f} sec",
      fun_name=fun.__name__, event=dispatch.JAXPR_TRACE_EVENT):
    pe_debug = debug_info and pe.debug_info_final(fun, debug_info.traced_for)
    if config.dynamic_shapes.value:
      jaxpr, global_out_avals, consts = pe.trace_to_jaxpr_dynamic2(
          lu.annotate(fun, cast(core.InputType, in_type)), debug_info=pe_debug)
      attrs_tracked = []
    else:
      jaxpr, global_out_avals, consts, attrs_tracked = pe.trace_to_jaxpr_dynamic(
          fun, in_type, debug_info=pe_debug)
      # assert attr_data is sentinel or attr_data matches attrs_tracked

  # TODO(dougalm,mattjj): enable debug info with attrs_tracked
  if not config.dynamic_shapes.value and not attrs_tracked:
    jaxpr = jaxpr_debug_info(jaxpr, debug_info, out_paths())

  if config.debug_key_reuse.value:
    # Import here to avoid circular imports
    from jax.experimental.key_reuse._core import check_key_reuse_jaxpr
    check_key_reuse_jaxpr(jaxpr)

  if any(isinstance(c, core.Tracer) for c in consts):
    closed_jaxpr = pe.close_jaxpr(pe.convert_constvars_jaxpr(jaxpr))
    final_consts = consts
  else:
    closed_jaxpr = core.ClosedJaxpr(jaxpr, consts)
    final_consts = []
  return closed_jaxpr, final_consts, global_out_avals, attrs_tracked


@util.cache(max_size=4096, trace_context_in_key=False)
def _check_and_canonicalize_out_shardings(
    out_shardings_treedef, out_shardings_leaves, out_layouts_treedef,
    out_layouts_leaves, out_tree, out_avals, debug_info, device_or_backend_set):
  orig_out_shardings = tree_unflatten(out_shardings_treedef, out_shardings_leaves)
  if (is_unspecified(orig_out_shardings) or
      isinstance(orig_out_shardings, sharding.Sharding)):
    out_shardings_flat = (orig_out_shardings,) * len(out_avals)
  else:
    out_shardings_flat = flatten_axis_resources(
        "pjit out_shardings", out_tree(), orig_out_shardings,
        tupled_args=False)

  out_layouts = tree_unflatten(out_layouts_treedef, out_layouts_leaves)
  if out_layouts is None:
    out_layouts_flat = (out_layouts,) * len(out_avals)
  else:
    out_layouts_flat = flatten_axis_resources(
        "pjit out_layouts", out_tree(), out_layouts, tupled_args=False)

  if not config.dynamic_shapes.value:
    pjit_check_aval_sharding(
        out_shardings_flat, out_avals,
        None if debug_info is None else debug_info.result_paths,
        "pjit outputs", allow_uneven_sharding=False)
    check_aval_layout_compatibility(
        out_layouts_flat, out_avals,
        None if debug_info is None else debug_info.result_paths, "jit outputs")
  return out_shardings_flat, out_layouts_flat


AttrRecord = tuple[object, str, PyTreeDef, list[core.AbstractValue]]
_seen_attrs = weakref.WeakKeyDictionary()  # type: ignore

def seen_attrs_get(
    fun: lu.WrappedFun,
    in_type: core.InputType | tuple[core.AbstractValue, ...]
) -> list:
  cache = _seen_attrs.setdefault(fun.f, defaultdict(list))
  assert fun.in_type is None or fun.in_type == in_type
  return cache[(fun.transforms, fun.params, in_type)]

def _attr_token(
    fun: lu.WrappedFun,
    in_type: core.InputType | tuple[core.AbstractValue, ...]
) -> int:
  from jax.experimental.attrs import jax_getattr
  cases = seen_attrs_get(fun, in_type)
  for i, records in enumerate(cases):
    for obj, attr, treedef, avals in records:
      val = jax_getattr(obj, attr)
      vals, treedef_ = tree_flatten(val)
      avals_ = map(shaped_abstractify, vals)
      if treedef != treedef_ or avals != avals_: break
    else:
      return i
  return len(cases)

def _attr_update(fun, in_type, i, attrs_tracked):
  from jax.experimental.attrs import jax_getattr
  leaves = lambda obj, attr: tree_leaves(jax_getattr(obj, attr))
  records = [(obj, attr, init_tree, map(shaped_abstractify, leaves(obj, attr)))
             for init_tree, _, (obj, attr) in attrs_tracked]
  cases = seen_attrs_get(fun, in_type)
  if i == len(cases):
    cases.append(records)
  else:
    assert i < len(cases) and cases[i] == records


@dataclasses.dataclass(frozen=True)
class IgnoreKey:
  val: Any
  def __hash__(self):
    return hash(self.__class__)
  def __eq__(self, other):
    return isinstance(other, IgnoreKey)  # ignore self.val!


def pjit_check_aval_sharding(
    shardings, flat_avals, names: tuple[str, ...] | None,
    what_aval: str, allow_uneven_sharding: bool):
  new_names = [''] * len(shardings) if names is None else names
  for aval, s, name in zip(flat_avals, shardings, new_names):
    if is_unspecified_or_auto(s):
      continue
    name_str = f' with pytree key path {name}' if name else ''
    shape = aval.shape
    try:
      # Sharding interfaces can implement `check_compatible_aval` as an optional
      # method to raise a more meaningful error.
      if hasattr(s, 'check_compatible_aval'):
        s.check_compatible_aval(shape)
      else:
        s._to_xla_hlo_sharding(len(shape))
    except ValueError as e:
      raise ValueError(
          f'One of {what_aval}{name_str} is incompatible with its sharding '
          f'annotation {s}: {e}')
    # Use the `OpSharding` proto to find out how many ways each dimension of
    # the aval is sharded. This approach will work across all
    # Sharding.
    hlo_sharding = s._to_xla_hlo_sharding(len(shape))
    assert hlo_sharding is not None
    num_ways_dim_sharded, _ = op_shardings.get_num_ways_dim_sharded(hlo_sharding)
    for i, size in enumerate(num_ways_dim_sharded):
      if not allow_uneven_sharding and shape[i] % size != 0:
        raise ValueError(f"One of {what_aval}{name_str} was given the sharding "
                         f"of {s}, which implies that "
                         f"the global size of its dimension {i} should be "
                         f"divisible by {size}, but it is equal to {shape[i]} "
                         f"(full shape: {shape})")


def check_aval_layout_compatibility(
    layouts, flat_avals, names: tuple[str, ...] | None, what_aval: str):
  new_names = [''] * len(layouts) if names is None else names
  for aval, l, name in zip(flat_avals, layouts, new_names):
    if l is None or isinstance(l, AutoLayout):
      continue
    name_str = f' with pytree key path {name}' if name else ''
    shape = aval.shape
    try:
      l.check_compatible_aval(shape)
    except ValueError as e:
      raise ValueError(
          f'One of {what_aval}{name_str} is incompatible with its layout '
          f'annotation {l}: {e}')


# -------------------- pjit rules --------------------

pjit_p = core.AxisPrimitive("pjit")
pjit_p.multiple_results = True


def _resolve_in_layouts(args, jit_in_layouts, resolved_in_shardings, in_avals):
  # If device or backend is set, return the default layout. This is because you
  # can pass arrays on cpu (with untiled layouts) to jit with backend='tpu'
  # which causes error checks to fail. Returning the default layout allows
  # this to exist. It's the same for handling shardings.
  if pxla.check_device_backend_on_shardings(resolved_in_shardings):
    return (None,) * len(jit_in_layouts)

  resolved_in_layouts = []
  for arg, jit_in_l, rs, aval in safe_zip(
      args, jit_in_layouts, resolved_in_shardings, in_avals):
    committed = getattr(arg, '_committed', True)
    # `arg_layout` is only used for checking purposes in the `else` branch
    # below. We cannot replace default layout with None to raise nicer errors.
    # `dispatch_arg_layout` replaces default layouts with `None` to simplify
    # dispatch and lowering logic downstream.
    if hasattr(arg, 'layout'):
      arg_layout = arg.layout.device_local_layout
      dispatch_arg_layout = (None if pxla.is_default_layout(arg_layout, rs, aval)
                             else arg_layout)
    else:
      arg_layout, dispatch_arg_layout = None, None
    # Sharding can be unspecified when array is committed if it's a PmapSharding.
    is_pmap_sharding = (is_unspecified(rs) or
                        isinstance(getattr(arg, 'sharding', None), PmapSharding))
    if jit_in_l is None:
      if committed:
        if is_pmap_sharding:
          resolved_in_layouts.append(None)
        else:
          resolved_in_layouts.append(dispatch_arg_layout)
      else:
        resolved_in_layouts.append(None)
    else:
      # arg_layout can be None because some backends don't implement the
      # required layout methods. Hence `arr.layout` can return
      # `Layout(None, sharding)`
      if (committed
          and not is_pmap_sharding
          and arg_layout is not None
          and not pxla.is_user_xla_layout_equal(jit_in_l, arg_layout)):
        extra_msg = ''
        if isinstance(jit_in_l, AutoLayout):
          extra_msg = (
              ' The layout given to `jax.jit` is `DeviceLocalLayout.AUTO` but'
              ' the corresponding argument passed is a `jax.Array` with a'
              ' concrete layout. Consider passing a `jax.ShapeDtypeStruct`'
              ' instead of `jax.Array` as an argument to the jitted function '
              ' when using `DeviceLocalLayout.AUTO`.'
          )
        raise ValueError('Layout passed to jit does not match the layout '
                          'on the respective arg. '
                          f'Got pjit layout: {jit_in_l},\n'
                          f'arg layout: {arg_layout} for '
                          f'arg shape: {shaped_abstractify(arg).str_short()}.'
                          f'{extra_msg}')
      resolved_in_layouts.append(jit_in_l)
  return tuple(resolved_in_layouts)


def _resolve_in_shardings(args, pjit_in_shardings: Sequence[PjitSharding]
                          ) -> Sequence[PjitSharding]:
  # If True, means that device or backend is set by the user on pjit and it
  # has the same semantics as device_put i.e. doesn't matter which device the
  # arg is on, reshard it to the device mentioned. So don't do any of the
  # checks and just return the pjit_in_shardings directly. `shard_args` will
  # handle the resharding.
  if pxla.check_device_backend_on_shardings(pjit_in_shardings):
    return pjit_in_shardings

  committed_arg_shardings = []
  for a in args:
    arg_s = getattr(a, 'sharding', None)
    # arg sharding can be None in case of ShapeDtypeStruct. jax.Array does
    # not allow None as the sharding.
    if arg_s is None:
      continue
    # Don't consider PmapSharding inputs as committed. They will get resharded
    # unconditionally.
    if isinstance(arg_s, PmapSharding):
      continue
    if getattr(a, '_committed', True):
      committed_arg_shardings.append((arg_s, pxla.MismatchType.ARG_SHARDING, None))

  resolved_in_shardings = []
  for arg, pjit_in_s in zip(args, pjit_in_shardings):
    # arg sharding can be None in case of ShapeDtypeStruct. jax.Array does
    # not allow None as the sharding.
    arg_s, committed = ((arg.sharding, getattr(arg, '_committed', True))
                        if hasattr(arg, 'sharding') and arg.sharding is not None
                        else (UNSPECIFIED, False))
    if is_unspecified(pjit_in_s):
      if is_unspecified(arg_s):
        resolved_in_shardings.append(arg_s)
      else:
        if committed:
          # If the arg has a PmapSharding, then reshard it unconditionally.
          if isinstance(arg_s, PmapSharding):
            resolved_in_shardings.append(UNSPECIFIED)
          else:
            resolved_in_shardings.append(arg_s)
        else:
          if dispatch.is_single_device_sharding(arg_s):
            resolved_in_shardings.append(UNSPECIFIED)
          else:
            raise NotImplementedError('Having uncommitted Array sharded on '
                                      'multiple devices is not supported.')
    else:
      if (isinstance(arg, np.ndarray) and
          not pjit_in_s.is_fully_replicated and  # type: ignore
          xb.process_count() > 1):
        raise ValueError(
            'Passing non-trivial shardings for numpy '
            'inputs is not allowed. To fix this error, either specify a '
            'replicated sharding explicitly or use '
            '`jax.experimental.multihost_utils.host_local_array_to_global_array(...)` '
            'to convert your host local numpy inputs to a jax.Array which you '
            'can pass to pjit. '
            'If the numpy input is the same on each process, then you can use '
            '`jax.make_array_from_callback(...) to create a `jax.Array` which '
            'you can pass to pjit. '
            'Please see the jax.Array migration guide for more information '
            'https://jax.readthedocs.io/en/latest/jax_array_migration.html#handling-of-host-local-inputs-to-pjit-like-batch-etc. '
            f'Got arg shape: {arg.shape}, arg value: {arg}')
      if not is_unspecified(arg_s):
        # jax.jit does not allow resharding across different memory kinds even
        # if the argument is uncommitted. Use jax.device_put for those cases,
        # either outside or inside jax.jit.
        if pjit_in_s.memory_kind != arg_s.memory_kind:  # type: ignore
          raise ValueError(
              'Memory kinds passed to jax.jit does not match memory kind on the'
              f' respective arg. Got pjit memory kind: {pjit_in_s.memory_kind}, '  # type: ignore
              f'arg memory kind: {arg_s.memory_kind} for '  # pytype: disable=attribute-error
              f'arg shape: {shaped_abstractify(arg).str_short()}')
        if (committed and
            not isinstance(arg_s, PmapSharding) and
            not op_shardings.are_op_shardings_equal(
                pjit_in_s._to_xla_hlo_sharding(arg.ndim),  # type: ignore
                arg_s._to_xla_hlo_sharding(arg.ndim))):
          raise ValueError('Sharding passed to pjit does not match the sharding '
                           'on the respective arg. '
                           f'Got pjit sharding: {pjit_in_s},\n'
                           f'arg sharding: {arg_s} for '
                           f'arg shape: {shaped_abstractify(arg).str_short()}')
      resolved_in_shardings.append(pjit_in_s)

  return tuple(resolved_in_shardings)


def _resolve_and_lower(
    args, jaxpr, in_shardings, out_shardings, in_layouts,
    out_layouts, resource_env, donated_invars, name, keep_unused, inline,
    lowering_platforms, lowering_parameters, pgle_profiler):
  in_shardings = _resolve_in_shardings(args, in_shardings)
  in_layouts = _resolve_in_layouts(args, in_layouts, in_shardings,
                                   jaxpr.in_avals)
  lowered = _pjit_lower(
      jaxpr, in_shardings, out_shardings, in_layouts, out_layouts, resource_env,
      donated_invars, name, keep_unused, inline,
      lowering_platforms=lowering_platforms,
      lowering_parameters=lowering_parameters,
      pgle_profiler=pgle_profiler)
  return lowered

def _pjit_call_impl_python(
    *args, jaxpr, in_shardings, out_shardings, in_layouts, out_layouts,
    resource_env, donated_invars, name, keep_unused, inline):
  global _most_recent_pjit_call_executable

  compile_options = None
  pgle_profiler = None
  pgle_profiler_dict = _most_recent_pjit_call_executable.weak_pgle_profiler_dict
  if config.enable_pgle.value and config.pgle_profiling_runs.value > 0:
    if jaxpr not in pgle_profiler_dict:
      pgle_profiler_dict[jaxpr] = profiler.PGLEProfiler(
          config.pgle_profiling_runs.value,
          config.pgle_aggregation_percentile.value)

    pgle_profiler = pgle_profiler_dict[jaxpr]
    # The method below will return FDO profile when module was profiled
    # config.jax_pgle_profiling_runs amount of times, otherwise the result will
    # be None.
    fdo_profile = pgle_profiler.consume_fdo_profile()
    if fdo_profile is not None:
      compile_options = {'fdo_profile': fdo_profile}

  # TODO(patrios): Do not pass mutable profile session through cached lowering
  # chain. Instead we need to move profilers dictionary to pxla module and use
  # module as key. Right now we can't do that since there is no way to evict _pjit_lower_cached cache for in PGLE mode.
  compiled = _resolve_and_lower(
      args, jaxpr=jaxpr, in_shardings=in_shardings,
      out_shardings=out_shardings, in_layouts=in_layouts,
      out_layouts=out_layouts, resource_env=resource_env,
      donated_invars=donated_invars, name=name, keep_unused=keep_unused,
      inline=inline, lowering_platforms=None,
      lowering_parameters=mlir.LoweringParameters(),
      pgle_profiler=pgle_profiler
  ).compile(compile_options)

  _most_recent_pjit_call_executable.weak_key_dict[jaxpr] = compiled
  # This check is expensive so only do it if enable_checks is on.
  if compiled._auto_spmd_lowering and config.enable_checks.value:
    pxla.check_array_xla_sharding_layout_match(
        args, compiled._in_shardings, compiled._in_layouts,
        jaxpr.jaxpr.debug_info, compiled._kept_var_idx)
  if config.distributed_debug.value:
    # Defensively only perform fingerprint logic if debug logging is enabled
    # NOTE(skyewm): I didn't benchmark this
    fingerprint = None
    if hasattr(compiled.runtime_executable(), "fingerprint"):
      fingerprint = compiled.runtime_executable().fingerprint
    if fingerprint is not None:
      fingerprint = fingerprint.hex()
    distributed_debug_log(("Running pjit'd function", name),
                          ("in_shardings", in_shardings),
                          ("out_shardings", out_shardings),
                          ("in_layouts", in_layouts),
                          ("out_layouts", out_layouts),
                          ("abstract args", map(xla.abstractify, args)),
                          ("fingerprint", fingerprint))
  try:
    return compiled.unsafe_call(*args), compiled
  except FloatingPointError as e:
    assert config.debug_nans.value or config.debug_infs.value  # compiled_fun can only raise in this case

    if len(jaxpr.eqns) > 1:
      _ = core.jaxpr_as_fun(jaxpr)(*args)  # may raise, not return

    # If control reaches this line, we got a NaN on the output of `compiled`
    # but not `fun.call_wrapped` on the same arguments. Let's tell the user.
    msg = (f"{str(e)}. Because "
           "jax_config.debug_nans.value and/or config.jax_debug_infs is set, the "
           "de-optimized function (i.e., the function as if the `jit` "
           "decorator were removed) was called in an attempt to get a more "
           "precise error message. However, the de-optimized function did not "
           "produce invalid values during its execution. This behavior can "
           "result from `jit` optimizations causing the invalid value to be "
           "produced. It may also arise from having nan/inf constants as "
           "outputs, like `jax.jit(lambda ...: jax.numpy.nan)(...)`. "
           "\n\n"
           "It may be possible to avoid the invalid value by removing the "
           "`jit` decorator, at the cost of losing optimizations. "
           "\n\n"
           "If you see this error, consider opening a bug report at "
           "https://github.com/jax-ml/jax.")
    raise FloatingPointError(msg)


@weakref_lru_cache
def _get_jaxpr_as_fun(jaxpr, in_shardings, out_shardings, in_layouts,
                      out_layouts, resource_env, donated_invars, name,
                      keep_unused, inline):
  # The input jaxpr to `_get_jaxpr_as_fun` is under a weakref_lru_cache so
  # returning `core.jaxpr_as_fun(jaxpr)` directly creates a strong reference to
  # the jaxpr defeating the purpose of weakref_lru_cache. So return a function
  # that closes over a weakrefed jaxpr and gets called inside that function.
  # This way there won't be a strong reference to the jaxpr from the output
  # function.
  jaxpr = weakref.ref(jaxpr)
  return lambda *args: core.jaxpr_as_fun(jaxpr())(*args)  # pylint: disable=unnecessary-lambda


def _pjit_call_impl(*args, jaxpr,
                    in_shardings, out_shardings, in_layouts, out_layouts,
                    resource_env,
                    donated_invars, name, keep_unused, inline):
  def call_impl_cache_miss(*args_, **kwargs_):
    out_flat, compiled = _pjit_call_impl_python(
        *args, jaxpr=jaxpr, in_shardings=in_shardings,
        out_shardings=out_shardings, in_layouts=in_layouts,
        out_layouts=out_layouts, resource_env=resource_env,
        donated_invars=donated_invars, name=name, keep_unused=keep_unused,
        inline=inline)
    pgle_profiler = _read_pgle_profiler(jaxpr)
    fastpath_data = _get_fastpath_data(
        compiled, tree_structure(out_flat), args, out_flat, [], jaxpr.effects,
        jaxpr.consts, None, pgle_profiler)
    return out_flat, fastpath_data, _need_to_rebuild_with_fdo(pgle_profiler)

  f = _get_jaxpr_as_fun(
      jaxpr, in_shardings, out_shardings, in_layouts, out_layouts,
      resource_env, donated_invars, name, keep_unused, inline)
  donated_argnums = tuple(i for i, d in enumerate(donated_invars) if d)
  if xla_extension_version >= 286:
    cache_key = pxla.JitGlobalCppCacheKeys(
        donate_argnums=donated_argnums, donate_argnames=None,
        device=None, backend=None,
        in_shardings_treedef=None, in_shardings_leaves=in_shardings,
        out_shardings_treedef=None, out_shardings_leaves=out_shardings,
        in_layouts_treedef=None, in_layouts_leaves=in_layouts,
        out_layouts_treedef=None, out_layouts_leaves=out_layouts,
        use_resource_env=resource_env is not None)
    return xc._xla.pjit(
        name, f, call_impl_cache_miss, [], [], cache_key,
        tree_util.dispatch_registry, pxla.cc_shard_arg,
        _get_cpp_global_cache(cache_key.contains_explicit_attributes))(*args)
  else:
    has_explicit_sharding = _pjit_explicit_sharding_and_layout(
        in_shardings, out_shardings, in_layouts, out_layouts, None, None)
    return xc._xla.pjit(
        name, f, call_impl_cache_miss, [], [], donated_argnums,
        tree_util.dispatch_registry, pxla.cc_shard_arg,
        _get_cpp_global_cache(has_explicit_sharding))(*args)

pjit_p.def_impl(_pjit_call_impl)


def _pjit_lower(*args, **kwargs):
  return _pjit_lower_cached(*args, **kwargs)


@weakref_lru_cache
def _pjit_lower_cached(
    jaxpr: core.ClosedJaxpr,
    in_shardings,
    out_shardings,
    in_layouts: pxla.MaybeLayout,
    out_layouts: pxla.MaybeLayout,
    resource_env,
    donated_invars,
    name: str,
    keep_unused: bool,
    inline: bool,
    *,
    lowering_platforms: tuple[str, ...] | None,
    lowering_parameters: mlir.LoweringParameters,
    pgle_profiler: profiler.PGLEProfiler | None):
  mesh, api_name = ((resource_env.physical_mesh, 'pjit')
                    if resource_env is not None else (None, 'jit'))
  return pxla.lower_sharding_computation(
      jaxpr, api_name, name, in_shardings, out_shardings,
      in_layouts, out_layouts, tuple(donated_invars),
      keep_unused=keep_unused, context_mesh=mesh,
      lowering_platforms=lowering_platforms,
      lowering_parameters=lowering_parameters,
      pgle_profiler=pgle_profiler)


def pjit_staging_rule(trace, *args, **params):
  jaxpr, in_fwd, out_shardings, out_layouts = _pjit_forwarding(
      params['jaxpr'], params['out_shardings'], params['out_layouts'])
  params = dict(params, jaxpr=jaxpr, out_shardings=out_shardings,
                out_layouts=out_layouts)
  if (params["inline"] and
      all(is_unspecified(i) for i in params["in_shardings"]) and
      all(is_unspecified(o) for o in params["out_shardings"]) and
      all(i is None for i in params["in_layouts"]) and
      all(o is None for o in params["out_layouts"])):
    if config.dynamic_shapes.value:
      # Inline jaxpr doesn't handle dynamic shapes when inlining. If dynamic
      # shapes are enabled, use eval_jaxpr, which uses the tracing machinery,
      # but redundantly performs abstract evaluation again.
      out_tracers = core.eval_jaxpr(jaxpr.jaxpr, jaxpr.consts, *args,
                                    propagate_source_info=False)
    else:
      out_tracers = pe.inline_jaxpr_into_trace(
          trace, jaxpr.jaxpr, jaxpr.consts, *args)
  elif config.dynamic_shapes.value:
    source_info = source_info_util.current()
    out_tracers = []
    for aval in _out_type(jaxpr):
      if type(aval) is core.DShapedArray:
        shape = [args[d.val] if type(d) is core.InDBIdx else
                 out_tracers[d.val] if type(d) is core.OutDBIdx else
                 d for d in aval.shape]
        aval = aval.update(shape=tuple(core.get_referent(d) for d in shape))
      out_tracers.append(pe.DynamicJaxprTracer(trace, aval, source_info))
    eqn = core.new_jaxpr_eqn(
      map(trace.getvar, args), map(trace.makevar, out_tracers), pjit_p, params,
      jaxpr.effects, source_info)
    trace.frame.add_eqn(eqn)
  elif any(isinstance(c, core.MutableArray) for c in jaxpr.consts):
    jaxpr, consts = pxla._move_mutable_consts(jaxpr)
    consts = map(trace.instantiate_const, consts)
    in_shardings = (*params['in_shardings'],) + (UNSPECIFIED,) * len(consts)
    in_layouts = (*params['in_layouts'],) + (None,) * len(consts)
    donated_invars = (*params['donated_invars'],) + (False,) * len(consts)
    new_params = dict(params, jaxpr=jaxpr, in_shardings=in_shardings,
                      in_layouts=in_layouts, donated_invars=donated_invars)
    out_tracers = trace.default_process_primitive(
        pjit_p, (*args, *consts), new_params)
  else:
    out_tracers = trace.default_process_primitive(pjit_p, args, params)

  out_tracers_ = iter(out_tracers)
  out_tracers = [args[f] if type(f) is int else next(out_tracers_)
                 for f in in_fwd]
  assert next(out_tracers_, None) is None
  return out_tracers
pe.custom_staging_rules[pjit_p] = pjit_staging_rule


def _pjit_forwarding(jaxpr, out_shardings, out_layouts):
  in_fwd: list[int | None] = pe._jaxpr_forwarding(jaxpr.jaxpr)
  in_fwd = [fwd if is_unspecified(os) and ol is None else None for fwd, os, ol
            in zip(in_fwd, out_shardings, out_layouts)]
  keep = [f is None for f in in_fwd]
  jaxpr = pe.prune_closed_jaxpr_outputs(jaxpr, keep)
  out_shardings = [o for o, k in zip(out_shardings, keep) if k]
  out_layouts   = [o for o, k in zip(out_layouts  , keep) if k]
  return jaxpr, in_fwd, out_shardings, out_layouts

def pjit_forwarding_rule(eqn):
  jaxpr, in_fwd, out_shardings, out_layouts = _pjit_forwarding(
      eqn.params['jaxpr'], eqn.params['out_shardings'], eqn.params['out_layouts'])
  new_outvars = [v for v, f in zip(eqn.outvars, in_fwd) if f is None]
  new_params = dict(eqn.params, jaxpr=jaxpr, out_shardings=(*out_shardings,),
                    out_layouts=(*out_layouts,))
  new_eqn = eqn.replace(params=new_params, outvars=new_outvars)
  fwd_vars = [eqn.invars[f] if f is not None else None for f in in_fwd]
  return fwd_vars, new_eqn
pe.forwarding_rules[pjit_p] = pjit_forwarding_rule


# TODO(mattjj): remove/trivialize this when jaxprs have type annotation on them,
# since it's actually not possible in general to infer the type from the term
def _out_type(jaxpr: core.ClosedJaxpr) -> list[core.AbstractValue]:
  out = []
  in_idx = {v: i for i, v in enumerate(jaxpr.jaxpr.invars)}
  out_idx = {x: i for i, x in enumerate(jaxpr.jaxpr.invars)
             if type(x) is core.Var}
  for x in jaxpr.jaxpr.outvars:
    aval = x.aval
    if type(aval) is core.DShapedArray:
      shape = [core.InDBIdx(in_idx[d]) if d in in_idx else
               core.OutDBIdx(out_idx[d]) if d in out_idx else
               d for d in x.aval.shape]
      aval = aval.update(shape=tuple(shape))
    out.append(aval)
  return out


def _pjit_typecheck(ctx_factory, *in_atoms, jaxpr, **params):
  return core._check_call(ctx_factory, pjit_p, in_atoms,
                          dict(params, call_jaxpr=jaxpr.jaxpr))
core.custom_typechecks[pjit_p] = _pjit_typecheck


def _pjit_abstract_eval(*args, jaxpr, **_):
  return jaxpr.out_avals, jaxpr.effects
pjit_p.def_effectful_abstract_eval(_pjit_abstract_eval)


def _pjit_cached_lower_jaxpr_to_fun(ctx, name, jaxpr, effects, in_shardings,
                                    out_shardings, in_layouts, out_layouts,
                                    api_name):
  mod_ctx = ctx.module_context
  axis_ctx = ctx.module_context.axis_context
  num_devices = None
  if isinstance(axis_ctx, sharding_impls.ShardingContext):
    num_devices = axis_ctx.num_devices
  elif isinstance(axis_ctx, sharding_impls.SPMDAxisContext):
    num_devices = axis_ctx.mesh.size
  key = (pjit_p, name, jaxpr, effects, num_devices,
         pxla.SemanticallyEqualShardings(in_shardings, jaxpr.in_avals),
         pxla.SemanticallyEqualShardings(out_shardings, jaxpr.out_avals),
         in_layouts, out_layouts, api_name)

  func = mod_ctx.cached_primitive_lowerings.get(key, None)
  if func is None:
    arg_shardings = [None if is_unspecified(i) else i for i in in_shardings]
    result_shardings = [None if is_unspecified(o) else o for o in out_shardings]
    # TODO(b/228598865): inlined calls cannot have shardings set directly on the
    # inputs or outputs because they are lost during MLIR->HLO conversion.
    # using_sharding_annotation=False means we add an identity operation instead.
    func = mlir.lower_jaxpr_to_fun(
        mod_ctx, name, jaxpr, effects, ctx.name_stack,
        arg_shardings=arg_shardings, result_shardings=result_shardings,
        use_sharding_annotations=False, api_name=api_name,
        arg_layouts=in_layouts, result_layouts=out_layouts)
    mod_ctx.cached_primitive_lowerings[key] = func
  return func


def _pjit_lowering(ctx, *args, name, jaxpr, in_shardings,
                   out_shardings, in_layouts, out_layouts, resource_env,
                   donated_invars, keep_unused, inline):
  effects = list(ctx.tokens_in.effects())
  output_types = map(mlir.aval_to_ir_type, ctx.avals_out)
  output_types = [mlir.token_type()] * len(effects) + output_types
  flat_output_types = mlir.flatten_ir_types(output_types)

  func = _pjit_cached_lower_jaxpr_to_fun(
      ctx, name, jaxpr, tuple(effects), in_shardings,
      out_shardings, in_layouts, out_layouts,
      api_name=('jit' if resource_env is None else 'pjit'))

  tokens_in = [ctx.tokens_in.get(eff) for eff in effects]
  args = (*ctx.dim_var_values, *tokens_in, *args)
  call = func_dialect.CallOp(flat_output_types,
                             ir.FlatSymbolRefAttr.get(func.name.value),
                             mlir.flatten_ir_values(args))
  mlir.wrap_compute_type_in_place(ctx, call)
  out_nodes = mlir.unflatten_ir_values_like_types(call.results, output_types)
  tokens, out_nodes = split_list(out_nodes, [len(effects)])
  tokens_out = ctx.tokens_in.update_tokens(mlir.TokenSet(zip(effects, tokens)))
  ctx.set_tokens_out(tokens_out)
  return out_nodes

mlir.register_lowering(pjit_p, _pjit_lowering)


def _pjit_batcher(spmd_axis_name, axis_size, axis_name, main_type,
                  vals_in, dims_in, jaxpr, in_shardings, out_shardings,
                  in_layouts, out_layouts, resource_env, donated_invars, name,
                  keep_unused, inline):
  segment_lens, dims_in = batching.indirectify_ragged_axes(dims_in)
  new_jaxpr, axes_out = batching.batch_jaxpr2(
      jaxpr, axis_size, dims_in, axis_name=axis_name,
      spmd_axis_name=spmd_axis_name, main_type=main_type)

  if resource_env is not None:
    mesh = resource_env.physical_mesh
  else:
    mesh = None

  # TODO(axch): prepend with Nones (?) to account for new segment_lens inputs
  in_shardings = tuple(
      _pjit_batcher_for_sharding(i, axis_in, spmd_axis_name, mesh, aval.ndim)
      if axis_in is not None else i
      for axis_in, i, aval in zip(dims_in, in_shardings, new_jaxpr.in_avals))
  out_shardings = tuple(
      _pjit_batcher_for_sharding(o, axis_out, spmd_axis_name, mesh, aval.ndim)
      if axis_out is not None else o
      for axis_out, o, aval in zip(axes_out, out_shardings, new_jaxpr.out_avals))
  # TODO(yashkatariya): Figure out layouts should change under vmap.
  if not (all(l is None for l in in_layouts) and
          all(l is None for l in out_layouts)):
    raise NotImplementedError(
        'Concrete layouts are not supported for vmap(jit).')

  vals_out = pjit_p.bind(
    *vals_in,
    jaxpr=new_jaxpr,
    in_shardings=in_shardings,
    out_shardings=out_shardings,
    in_layouts=in_layouts,
    out_layouts=out_layouts,
    resource_env=resource_env,
    donated_invars=donated_invars,
    name=name,
    keep_unused=keep_unused,
    inline=inline)

  resolved_axes_out = batching.resolve_ragged_axes_against_inputs_outputs(
      vals_in, vals_out, axes_out)
  return vals_out, resolved_axes_out

batching.spmd_axis_primitive_batchers[pjit_p] = _pjit_batcher
batching.axis_primitive_batchers[pjit_p] = partial(_pjit_batcher, None)

def _pjit_batcher_for_sharding(
    s: sharding.Sharding | UnspecifiedValue,
    dim: int, spmd_axis_name: tuple[str, ...] | None, mesh, ndim: int):
  if is_unspecified(s):
    return s
  hlo_s = s._to_xla_hlo_sharding(ndim)  # type: ignore
  if spmd_axis_name is None:
    if sharding_impls.is_op_sharding_replicated(hlo_s):
      return s
    if isinstance(s, NamedSharding) and isinstance(s.mesh, AbstractMesh):
      parsed_pspec = s._parsed_pspec.insert_axis_partitions(dim, None)
      return NamedSharding._from_parsed_pspec(s.mesh, parsed_pspec)
    new_op = hlo_s.to_proto().clone()
    tad = list(new_op.tile_assignment_dimensions)
    tad.insert(dim, 1)
    new_op.tile_assignment_dimensions = tad
    new_gs = GSPMDSharding(
        s._device_assignment, new_op,  # type: ignore
        _device_list=getattr(s, '_internal_device_list', None))
    return pxla._get_out_sharding_from_orig_sharding([new_gs], [None], s, None)[0]
  else:
    if isinstance(s, NamedSharding) and isinstance(s.mesh, AbstractMesh):
      parsed_pspec = s._parsed_pspec.insert_axis_partitions(dim, spmd_axis_name)
      return NamedSharding._from_parsed_pspec(s.mesh, parsed_pspec)
    if isinstance(s, NamedSharding):
      mesh = s.mesh
    if mesh is None or mesh.empty:
      raise ValueError(
          'If you are using spmd_axis_name parameter of jax.vmap,'
          ' please make sure to run your jitted function inside the mesh'
          ' context manager. Only `jax.lax.with_sharding_constraint` with'
          ' `jax.sharding.NamedSharding` as an input can be transformed with'
          ' spmd_axis_name batching rules outside of an explicit mesh context'
          f' manager scope{s!r}')
    parsed_pspec = parse_flatten_op_sharding(hlo_s, mesh)[0]
    parsed_pspec = parsed_pspec.insert_axis_partitions(dim, spmd_axis_name)
    return NamedSharding._from_parsed_pspec(mesh, parsed_pspec)


def _pjit_jvp(primals_in, tangents_in,
              jaxpr, in_shardings, out_shardings, in_layouts, out_layouts,
              resource_env, donated_invars, name, keep_unused, inline):
  if any(isinstance(c, core.MutableArray) for c in jaxpr.consts):
    jaxpr, mut_primals = pxla._move_mutable_consts(jaxpr)
    mut_tangents = map(ad_util.zeros_like_jaxval, mut_primals)
    primals_in = [*primals_in, *mut_primals]
    tangents_in = [*tangents_in, *mut_tangents]
    in_shardings = (*in_shardings,) + (UNSPECIFIED,) * len(mut_primals)
    in_layouts = (*in_layouts,) + (None,) * len(mut_primals)
    donated_invars = (*donated_invars,) + (False,) * len(mut_primals)

  tangents_in = [ad_util.zeros_like_aval(a) if isinstance(a, AbstractRef) else x
                 for x, a in zip(tangents_in, jaxpr.in_avals)]

  is_nz_tangents_in = [type(t) is not ad.Zero for t in tangents_in]
  jaxpr_jvp, is_nz_tangents_out = ad.jvp_jaxpr(
      jaxpr, is_nz_tangents_in, instantiate=False)

  def _filter_zeros(is_nz_l, l):
    return (x for nz, x in zip(is_nz_l, l) if nz)
  _filter_zeros_in = partial(_filter_zeros, is_nz_tangents_in)
  _filter_zeros_out = partial(_filter_zeros, is_nz_tangents_out)
  outputs = pjit_p.bind(
      *primals_in, *_filter_zeros_in(tangents_in),
      jaxpr=jaxpr_jvp,
      in_shardings=(*in_shardings, *_filter_zeros_in(in_shardings)),
      out_shardings=(*out_shardings, *_filter_zeros_out(out_shardings)),
      in_layouts=(*in_layouts, *_filter_zeros_in(in_layouts)),
      out_layouts=(*out_layouts, *_filter_zeros_out(out_layouts)),
      resource_env=resource_env,
      donated_invars=(*donated_invars, *_filter_zeros_in(donated_invars)),
      name=name,
      keep_unused=keep_unused,
      inline=inline)

  primals_out, tangents_out = split_list(outputs, [len(jaxpr.jaxpr.outvars)])
  assert len(primals_out) == len(jaxpr.jaxpr.outvars)
  tangents_out_it = iter(tangents_out)
  return primals_out, [next(tangents_out_it) if nz else ad.Zero(aval)
                       for nz, aval in zip(is_nz_tangents_out, jaxpr.out_avals)]
ad.primitive_jvps[pjit_p] = _pjit_jvp


@weakref_lru_cache
def _known_jaxpr_fwd(known_jaxpr: core.ClosedJaxpr,
                     in_fwd: tuple[int | None, ...]) -> core.ClosedJaxpr:
  updated_jaxpr = known_jaxpr.jaxpr.replace(
      outvars=[x for x, i in zip(known_jaxpr.jaxpr.outvars, in_fwd)
               if i is None])
  return known_jaxpr.replace(jaxpr=updated_jaxpr)


def _pjit_partial_eval(trace, *in_tracers,
                       jaxpr, in_shardings, out_shardings,
                       in_layouts, out_layouts, resource_env, donated_invars,
                       name, keep_unused, inline):
  in_pvals = [t.pval for t in in_tracers]

  known_ins = tuple(pv.is_known() for pv in in_pvals)
  unknown_ins = tuple(not k for k in known_ins)
  known_jaxpr, unknown_jaxpr, unknown_outs, res_avals = \
      pe.partial_eval_jaxpr_nounits(jaxpr, unknown_ins, instantiate=False)
  unknown_outs = tuple(unknown_outs)
  known_outs = tuple(not uk for uk in unknown_outs)
  num_residuals = len(res_avals)
  res_shardings = (UNSPECIFIED,) * num_residuals
  res_layouts = (None,) * num_residuals

  def keep_where(l, should_keep):
    return tuple(x for x, keep in zip(l, should_keep) if keep)

  known_out_shardings = keep_where(out_shardings, known_outs) + res_shardings
  known_out_layouts = keep_where(out_layouts, known_outs) + res_layouts

  # Input-to-output forwarding: compute which outputs are just forwarded inputs.
  num_out_primals = len(known_jaxpr.out_avals) - num_residuals
  in_fwd: list[int | None] = pe._jaxpr_forwarding(known_jaxpr.jaxpr)
  # Only forward primal outputs when corresponding out_sharding is UNSPECIFIED.
  in_fwd_primal, in_fwd_res = split_list(in_fwd, [num_out_primals])
  in_fwd = [
      fwd if is_unspecified(os) and ol is None else None
      for os, ol, fwd in zip(
          keep_where(out_shardings, known_outs),
          keep_where(out_layouts, known_outs), in_fwd_primal)
  ] + in_fwd_res
  del in_fwd_primal, in_fwd_res
  # Prune jaxpr outputs and out_shardings by removing the input-forwards.
  keep = [f is None for f in in_fwd]
  known_jaxpr = pe.prune_closed_jaxpr_outputs(known_jaxpr, keep)
  known_out_shardings = keep_where(known_out_shardings, keep)
  known_out_layouts = keep_where(known_out_layouts, keep)
  # Update num_out_primals to reflect pruning.
  kept_primals, kept_res = split_list(keep, [num_out_primals])
  num_out_primals = sum(kept_primals)
  del keep, kept_primals, kept_res

  # Output-to-output forwarding: compute which residuals are just primal outputs
  out_vars, res_vars = split_list(known_jaxpr.jaxpr.outvars, [num_out_primals])
  idx_map = {id(v): i for i, v in enumerate(out_vars)}
  out_fwd = [None] * num_out_primals + [idx_map.get(id(v)) for v in res_vars]
  # Prune jaxpr outputs and out_shardings by removing forwarded residuals.
  keep = [f is None for f in out_fwd]
  known_jaxpr = pe.prune_closed_jaxpr_outputs(known_jaxpr, keep)
  known_out_shardings = keep_where(known_out_shardings, keep)
  known_out_layouts = keep_where(known_out_layouts, keep)
  del keep

  known_params = dict(
      jaxpr=known_jaxpr, in_shardings=keep_where(in_shardings, known_ins),
      out_shardings=known_out_shardings,
      in_layouts=keep_where(in_layouts, known_ins),
      out_layouts=known_out_layouts, resource_env=resource_env,
      donated_invars=keep_where(donated_invars, known_ins),
      name=name, keep_unused=keep_unused, inline=inline)
  assert len(known_params['out_shardings']) == len(known_params['jaxpr'].out_avals)
  assert len(known_params['out_layouts']) == len(known_params['jaxpr'].out_avals)

  # Bind known things to pjit_p.
  known_inputs = [pv.get_known() for pv in in_pvals if pv.is_known()]
  all_known_outs = pjit_p.bind(*known_inputs, **known_params)
  # Add back in the output fwds.
  all_known_outs = subs_list(out_fwd, all_known_outs, all_known_outs)
  # Add back in the input fwds.
  all_known_outs = subs_list(in_fwd, known_inputs, all_known_outs)

  known_out_vals, residual_vals = \
      split_list(all_known_outs, [len(all_known_outs) - num_residuals])
  residual_tracers = map(trace.new_instantiated_const, residual_vals)

  # The convention of partial_eval_jaxpr_nounits is to place residual binders at
  # the front of the jaxpr produced, so we move them to the back since both the
  # jaxpr equation built below and the pjit transpose rule assume a
  # residual-inputs-last convention.
  unknown_jaxpr = pe.move_binders_to_back(
      unknown_jaxpr, [True] * num_residuals + [False] * sum(unknown_ins))
  # Prepare unknown tracers
  unknown_params = dict(
      jaxpr=unknown_jaxpr,
      in_shardings=(keep_where(in_shardings, unknown_ins) + res_shardings),
      out_shardings=keep_where(out_shardings, unknown_outs),
      in_layouts=(keep_where(in_layouts, unknown_ins) + res_layouts),
      out_layouts=keep_where(out_layouts, unknown_outs),
      resource_env=resource_env,
      donated_invars=(keep_where(donated_invars, unknown_ins) +
                      (False,) * num_residuals),
      name=name,
      keep_unused=keep_unused,
      inline=inline)
  unknown_tracers_in = [t for t in in_tracers if not t.pval.is_known()]
  unknown_out_avals = unknown_jaxpr.out_avals
  unknown_tracers_out = [
      pe.JaxprTracer(trace, pe.PartialVal.unknown(aval), None)
      for aval in unknown_out_avals
  ]
  eqn = pe.new_eqn_recipe((*unknown_tracers_in, *residual_tracers),
                          unknown_tracers_out,
                          pjit_p,
                          unknown_params,
                          unknown_jaxpr.effects,
                          source_info_util.current())
  for t in unknown_tracers_out: t.recipe = eqn
  return merge_lists(unknown_outs, known_out_vals, unknown_tracers_out)

pe.custom_partial_eval_rules[pjit_p] = _pjit_partial_eval


def _pjit_partial_eval_custom_params_updater(
    unks_in: Sequence[bool], inst_in: Sequence[bool],
    kept_outs_known: Sequence[bool], kept_outs_staged: Sequence[bool],
    num_res_out: int, num_res_in: int, params_known: dict, params_staged: dict
  ) -> tuple[dict, dict]:
  # prune inputs to jaxpr_known according to unks_in
  donated_invars_known, _ = pe.partition_list(unks_in, params_known['donated_invars'])
  in_shardings_known, _ = pe.partition_list(unks_in, params_known['in_shardings'])
  _, out_shardings_known = pe.partition_list(kept_outs_known, params_known['out_shardings'])
  in_layouts_known, _ = pe.partition_list(unks_in, params_known['in_layouts'])
  _, out_layouts_known = pe.partition_list(kept_outs_known, params_known['out_layouts'])

  new_params_known = dict(params_known,
                          in_shardings=tuple(in_shardings_known),
                          out_shardings=(*out_shardings_known,
                                         *[UNSPECIFIED] * num_res_out),
                          in_layouts=tuple(in_layouts_known),
                          out_layouts=(*out_layouts_known, *[None] * num_res_out),
                          donated_invars=tuple(donated_invars_known))
  assert len(new_params_known['in_shardings']) == len(params_known['jaxpr'].in_avals)
  assert len(new_params_known['out_shardings']) == len(params_known['jaxpr'].out_avals)
  assert len(new_params_known['in_layouts']) == len(params_known['jaxpr'].in_avals)
  assert len(new_params_known['out_layouts']) == len(params_known['jaxpr'].out_avals)

  # added num_res new inputs to jaxpr_staged, and pruning according to inst_in
  _, donated_invars_staged = pe.partition_list(inst_in, params_staged['donated_invars'])
  donated_invars_staged = [False] * num_res_in + donated_invars_staged
  _, in_shardings_staged = pe.partition_list(inst_in, params_staged['in_shardings'])
  in_shardings_staged = [*[UNSPECIFIED] * num_res_in, *in_shardings_staged]
  _, out_shardings_staged = pe.partition_list(kept_outs_staged, params_staged['out_shardings'])
  _, in_layouts_staged = pe.partition_list(inst_in, params_staged['in_layouts'])
  in_layouts_staged = [*[None] * num_res_in, *in_layouts_staged]
  _, out_layouts_staged = pe.partition_list(kept_outs_staged, params_staged['out_layouts'])

  new_params_staged = dict(params_staged,
                           in_shardings=tuple(in_shardings_staged),
                           out_shardings=tuple(out_shardings_staged),
                           in_layouts=tuple(in_layouts_staged),
                           out_layouts=tuple(out_layouts_staged),
                           donated_invars=tuple(donated_invars_staged))
  assert len(new_params_staged['in_shardings']) == len(params_staged['jaxpr'].in_avals)
  assert len(new_params_staged['out_shardings']) == len(params_staged['jaxpr'].out_avals)
  assert len(new_params_staged['in_layouts']) == len(params_staged['jaxpr'].in_avals)
  assert len(new_params_staged['out_layouts']) == len(params_staged['jaxpr'].out_avals)
  return new_params_known, new_params_staged

pe.partial_eval_jaxpr_custom_rules[pjit_p] = \
    partial(pe.closed_call_partial_eval_custom_rule, 'jaxpr',
            _pjit_partial_eval_custom_params_updater)


@lu.cache
def _pjit_transpose_trace(fun, in_avals):
  transpose_jaxpr, _, consts, attrs_tracked = pe.trace_to_jaxpr_dynamic(
      fun, in_avals)
  transpose_jaxpr = core.ClosedJaxpr(transpose_jaxpr, consts)
  return transpose_jaxpr, attrs_tracked


def _pjit_transpose(cts_in, *primals_in,
                    jaxpr, in_shardings, out_shardings, in_layouts, out_layouts,
                    resource_env, donated_invars, name, keep_unused, inline):
  def prune_type(ty, xs, maybe_zeros):
    return tuple(x for x, mz in zip(xs, maybe_zeros) if type(mz) is not ty)

  body = lu.wrap_init(ad.closed_backward_pass)
  body = lu.hashable_partial(body, jaxpr, False)
  primals_and_nz_cts_in, in_treedef = tree_flatten((primals_in, cts_in))
  body, cts_out_treedef_thunk = flatten_fun_nokwargs(body, in_treedef)

  transpose_in_shardings = (
    *prune_type(ad.UndefinedPrimal, in_shardings, primals_in),
    *prune_type(ad.Zero, out_shardings, cts_in)
  )
  transpose_in_layouts = (
    *prune_type(ad.UndefinedPrimal, in_layouts, primals_in),
    *prune_type(ad.Zero, out_layouts, cts_in)
  )
  global_cts_in_avals = tuple(core.raise_to_shaped(core.get_aval(ct))
                              for ct in primals_and_nz_cts_in)

  transpose_jaxpr, attrs_tracked = _pjit_transpose_trace(
      body, global_cts_in_avals)
  cts_out_treedef = cts_out_treedef_thunk()
  transpose_out_shardings = prune_type(
      ad.Zero,
      in_shardings,
      tree_unflatten(cts_out_treedef, [object()] * cts_out_treedef.num_leaves))
  transpose_out_layouts = prune_type(
      ad.Zero,
      in_layouts,
      tree_unflatten(cts_out_treedef, [object()] * cts_out_treedef.num_leaves))

  if attrs_tracked:
    init_states =  _get_states(attrs_tracked)
    primals_and_nz_cts_in = [*init_states, *primals_and_nz_cts_in]
    transpose_in_shardings = (UNSPECIFIED,) * len(attrs_tracked) + transpose_in_shardings
    transpose_out_shardings = (UNSPECIFIED,) * len(attrs_tracked) + transpose_out_shardings
    transpose_in_layouts = (None,) * len(attrs_tracked) + transpose_in_layouts
    transpose_out_layouts = (None,) * len(attrs_tracked) + transpose_out_layouts

  nz_cts_out = pjit_p.bind(
      *primals_and_nz_cts_in,
      jaxpr=transpose_jaxpr,
      in_shardings=transpose_in_shardings,
      out_shardings=transpose_out_shardings,
      in_layouts=transpose_in_layouts,
      out_layouts=transpose_out_layouts,
      resource_env=resource_env,
      donated_invars=(False,) * len(primals_and_nz_cts_in),
      name=name,
      keep_unused=keep_unused,
      inline=inline)

  if attrs_tracked:
    final_states, nz_cts_out = split_list(nz_cts_out, [len(init_states)])
    _set_states(attrs_tracked, final_states)

  return tree_unflatten(cts_out_treedef, nz_cts_out)
ad.reducing_transposes[pjit_p] = _pjit_transpose


@weakref_lru_cache
def _dce_jaxpr_pjit(
    jaxpr: core.ClosedJaxpr, used_outputs: tuple[bool, ...]
) -> tuple[core.ClosedJaxpr, list[bool]]:
  new_jaxpr, used_inputs = pe.dce_jaxpr(jaxpr.jaxpr, used_outputs)
  return core.ClosedJaxpr(new_jaxpr, jaxpr.consts), used_inputs


def dce_jaxpr_pjit_rule(used_outputs: list[bool], eqn: core.JaxprEqn
                        ) -> tuple[list[bool], core.JaxprEqn | None]:
  dced_jaxpr, used_inputs = _dce_jaxpr_pjit(
      eqn.params['jaxpr'], tuple(used_outputs))

  def keep_where(xs, keeps):
    return tuple(x for x, keep in zip(xs, keeps) if keep)

  eqn_params = eqn.params
  new_params = dict(
      eqn_params,
      jaxpr=dced_jaxpr,
      in_shardings=keep_where(eqn_params["in_shardings"], used_inputs),
      out_shardings=keep_where(eqn_params["out_shardings"], used_outputs),
      in_layouts=keep_where(eqn_params["in_layouts"], used_inputs),
      out_layouts=keep_where(eqn_params["out_layouts"], used_outputs),
      donated_invars=keep_where(eqn_params["donated_invars"], used_inputs),
  )
  if not any(used_inputs) and not any(used_outputs) and not dced_jaxpr.effects:
    return used_inputs, None
  else:
    new_eqn = core.new_jaxpr_eqn(
        [v for v, used in zip(eqn.invars, used_inputs) if used],
        [v for v, used in zip(eqn.outvars, used_outputs) if used],
        eqn.primitive, new_params, dced_jaxpr.effects, eqn.source_info, eqn.ctx)
    return used_inputs, new_eqn

pe.dce_rules[pjit_p] = dce_jaxpr_pjit_rule


def _pjit_pp_rule(eqn, context, settings):
  params = dict(eqn.params)
  del params['inline']
  if not any(params['donated_invars']):
    del params['donated_invars']
  if all(is_unspecified(s) for s in params['in_shardings']):
    del params['in_shardings']
  if all(is_unspecified(s) for s in params['out_shardings']):
    del params['out_shardings']
  if all(l is None for l in params['in_layouts']):
    del params['in_layouts']
  if all(l is None for l in params['out_layouts']):
    del params['out_layouts']
  if not params['keep_unused']:
    del params['keep_unused']
  if (params['resource_env'] is None or
      params['resource_env'].physical_mesh.empty):
    del params['resource_env']

  # Move name= to the front to make the resulting equation easier to scan.
  del params["name"]
  return core._pp_eqn(eqn, context, settings, params=["name"] + sorted(params))

core.pp_eqn_rules[pjit_p] = _pjit_pp_rule


def _pjit_state_discharge_rule(
    in_avals, out_avals, *args, jaxpr, in_shardings, out_shardings,
    in_layouts, out_layouts, **params):
  if not (all(map(is_unspecified, in_shardings)) and
          all(map(is_unspecified, out_shardings))):
    raise NotImplementedError

  if not (all(l is None for l in in_layouts) and
          all(l is None for l in out_layouts)):
    raise NotImplementedError

  jaxpr, consts = jaxpr.jaxpr, jaxpr.consts
  num_outs = len(jaxpr.outvars)
  discharged_jaxpr, discharged_consts = state_discharge.discharge_state(jaxpr, consts)
  discharged_closed_jaxpr = core.ClosedJaxpr(discharged_jaxpr, discharged_consts)
  new_in_shardings = (UnspecifiedValue(),) * len(discharged_jaxpr.invars)
  new_out_shardings = (UnspecifiedValue(),) * len(discharged_jaxpr.outvars)
  new_in_layouts = (None,) * len(discharged_jaxpr.invars)
  new_out_layouts = (None,) * len(discharged_jaxpr.outvars)
  out_and_ref_vals = pjit_p.bind(
      *args, jaxpr=discharged_closed_jaxpr, in_shardings=new_in_shardings,
      out_shardings=new_out_shardings, in_layouts=new_in_layouts,
      out_layouts=new_out_layouts, **params)
  out_vals, ref_vals = split_list(out_and_ref_vals, [num_outs])
  ref_vals_iter = iter(ref_vals)
  new_invals = tuple(next(ref_vals_iter) if isinstance(aval, AbstractRef)
                     else None for aval in in_avals)
  sentinel = object()
  assert next(ref_vals_iter, sentinel) is sentinel
  return new_invals, out_vals
state_discharge.register_discharge_rule(pjit_p)(_pjit_state_discharge_rule)


# -------------------- with_sharding_constraint --------------------

def with_sharding_constraint(x, shardings):
  """Mechanism to constrain the sharding of an Array inside a jitted computation

  This is a strict constraint for the GSPMD partitioner and not a hint. For examples
  of how to use this function, see `Distributed arrays and automatic parallelization`_.

  Args:
    x: PyTree of jax.Arrays which will have their shardings constrained
    shardings: PyTree of sharding specifications. Valid values are the same as for
      the ``in_shardings`` argument of :func:`jax.experimental.pjit`.
  Returns:
    x_with_shardings: PyTree of jax.Arrays with specified sharding constraints.

  .. _Distributed arrays and automatic parallelization: https://jax.readthedocs.io/en/latest/notebooks/Distributed_arrays_and_automatic_parallelization.html
  """
  x_flat, tree = tree_flatten(x)

  layouts, shardings = _split_layout_and_sharding(shardings)

  user_shardings = prepare_axis_resources(
      shardings, "shardings", allow_unconstrained_dims=True)
  del shardings

  user_shardings_flat = tuple(
      flatten_axes("with_sharding_constraint shardings", tree, user_shardings))
  del user_shardings

  user_layouts_flat = tuple(
      flatten_axes("with_sharding_constraint layouts", tree, layouts))
  del layouts

  resource_env = mesh_lib.thread_resources.env
  mesh = resource_env.physical_mesh

  shardings_flat = [_create_sharding_for_array(mesh, a, 'shardings',
                                               'with_sharding_constraint')
                    for a in user_shardings_flat]
  # TODO(bartchr): remove `unconstrained_dims` after migrating to Shardy. It's
  # already part of the shardings.
  unconstrained_dims = [get_unconstrained_dims(s)
                        if isinstance(s, NamedSharding) else {}
                        for s in shardings_flat]
  del user_shardings_flat

  pjit_check_aval_sharding(
      shardings_flat, x_flat, None, "with_sharding_constraint arguments",
      allow_uneven_sharding=True)

  check_aval_layout_compatibility(user_layouts_flat, x_flat, None,
                                  "with_sharding_constraint arguments")

  outs = [sharding_constraint_p.bind(xf, sharding=s, layout=l,
                                     resource_env=resource_env,
                                     unconstrained_dims=ud)
          for xf, s, l, ud in zip(x_flat, shardings_flat, user_layouts_flat,
                                  unconstrained_dims)]
  return tree_unflatten(tree, outs)

def _identity_fn(x): return x

def _sharding_constraint_impl(x, sharding, layout, resource_env,
                              unconstrained_dims):
  if (isinstance(sharding, NamedSharding) and
      isinstance(sharding.mesh, AbstractMesh)):
    aval = shaped_abstractify(x)
    if not hasattr(x, 'sharding'):
      raise ValueError(
          'Target sharding contains a `jax.sharding.AbstractMesh` which'
          ' requires the input passed should be a `jax.Array`. Got'
          f' {type(x)} with shape {aval.str_short()}')
    if not isinstance(x.sharding, NamedSharding):
      raise TypeError(
          'The sharding on the input must be a `NamedSharding` since the target'
          ' sharding has an `AbstractMesh` in it. Got sharding type'
          f' {type(x.sharding)} for shape {aval.str_short()}')
    if x.sharding.mesh.shape_tuple != sharding.mesh.shape_tuple:
      raise ValueError(
          f'Mesh shape of the input {x.sharding.mesh.shape_tuple} does not'
          ' match the mesh shape of the target sharding'
          f' {sharding.mesh.shape_tuple} for shape {aval.str_short()}')
    sharding = NamedSharding._from_parsed_pspec(
        x.sharding.mesh, sharding._parsed_pspec)

  if layout is None:
    if hasattr(x, 'sharding') and x.sharding.is_equivalent_to(sharding, x.ndim):
      return x
    # Run a jit here to raise good errors when device assignment don't match.
    return api.jit(_identity_fn, out_shardings=sharding)(x)
  else:
    if (hasattr(x, 'layout') and x.layout.device_local_layout == layout and
        x.sharding.is_equivalent_to(sharding, x.ndim)):
      return x
    return api.jit(_identity_fn, out_shardings=Layout(layout, sharding))(x)


sharding_constraint_p = core.Primitive("sharding_constraint")
sharding_constraint_p.def_impl(_sharding_constraint_impl)
sharding_constraint_p.def_abstract_eval(lambda x, **_: x)
ad.deflinear2(sharding_constraint_p,
              lambda ct, _, **params: (sharding_constraint_p.bind(ct, **params),))

def _sharding_constraint_hlo_lowering(ctx, x_node, *, sharding, layout,
                                      resource_env, unconstrained_dims):
  aval, = ctx.avals_in
  out_aval, = ctx.avals_out
  axis_ctx = ctx.module_context.axis_context
  if (isinstance(axis_ctx, sharding_impls.SPMDAxisContext) and
      axis_ctx.manual_axes):
    sharding = mlir.add_manual_axes(axis_ctx, sharding, aval.ndim)
  if config.use_shardy_partitioner.value:
    sharding = sharding._to_sdy_sharding(aval.ndim)
  else:
    sharding = sharding._to_xla_hlo_sharding(aval.ndim).to_proto()
  out = mlir.wrap_with_sharding_op(
      ctx, x_node, out_aval, sharding, unspecified_dims=unconstrained_dims)
  if layout is not None:
    out = mlir.wrap_with_layout_op(ctx, out, out_aval, layout, aval)
  return [out]
mlir.register_lowering(sharding_constraint_p,
                       _sharding_constraint_hlo_lowering)


def _sharding_constraint_batcher(
    spmd_axis_name, axis_size, axis_name, main_type, vals_in,
    dims_in, sharding, layout, resource_env, unconstrained_dims):
  if spmd_axis_name is not None and isinstance(sharding, NamedSharding):
    used = {n for ns in sharding.spec
            for n in (ns if isinstance(ns, tuple) else (ns,))}
    if set(spmd_axis_name) & used:
      raise ValueError(f"vmap spmd_axis_name {spmd_axis_name} cannot appear in "
                       "with_sharding_constraint spec, but got spec "
                       f"{sharding.spec}")
  x, = vals_in
  d, = dims_in

  unconstrained_dims = {ud + (d <= ud) for ud in unconstrained_dims}
  if spmd_axis_name is None:
    unconstrained_dims.add(d)

  vmapped_sharding = _pjit_batcher_for_sharding(
      sharding, d, spmd_axis_name, resource_env.physical_mesh, x.ndim)
  if unconstrained_dims and isinstance(vmapped_sharding, NamedSharding):
    new_spec = list(vmapped_sharding.spec) + [None] * (x.ndim - len(vmapped_sharding.spec))
    for u in unconstrained_dims:
      new_spec[u] = PartitionSpec.UNCONSTRAINED
    vmapped_sharding = NamedSharding(
        vmapped_sharding.mesh, PartitionSpec(*new_spec))

  # TODO(yashkatariya): Figure out layouts should change under vmap.
  if layout is not None:
    raise NotImplementedError(
        'Concrete layout is not supported for vmap(with_sharding_constraint). '
        f'Got layout {layout}')

  y = sharding_constraint_p.bind(
      x,
      sharding=vmapped_sharding,
      layout=layout,
      resource_env=resource_env,
      unconstrained_dims=unconstrained_dims)
  return y, d
batching.spmd_axis_primitive_batchers[sharding_constraint_p] = _sharding_constraint_batcher
batching.axis_primitive_batchers[sharding_constraint_p] = partial(
    _sharding_constraint_batcher, None)

# -------------------- helpers --------------------

def get_unconstrained_dims(sharding: NamedSharding):
  assert sharding._parsed_pspec is not None
  return {i for i, axes in enumerate(sharding._parsed_pspec)
          if axes is None}


def _get_partition_spec(
    ppspec: Sequence[ParsedPartitionSpec]) -> Sequence[PartitionSpec]:
  return [get_single_pspec(p) for p in ppspec]


def get_op_sharding_from_executable(
    executable) -> tuple[Sequence[xc.OpSharding], Sequence[xc.OpSharding]]:
  in_op_shardings: list[xc.OpSharding] = []
  parameter_shardings_from_xla = executable.get_parameter_shardings()
  if parameter_shardings_from_xla is not None:
    in_op_shardings = parameter_shardings_from_xla

  out_op_shardings: list[xc.OpSharding] = []
  output_shardings_from_xla = executable.get_output_shardings()
  if output_shardings_from_xla is not None:
    out_op_shardings = output_shardings_from_xla

  return in_op_shardings, out_op_shardings


def _get_ppspec_from_executable(
    executable, mesh
  ) -> tuple[Sequence[ParsedPartitionSpec], Sequence[ParsedPartitionSpec]]:
  input_op_shardings, output_op_sharding = get_op_sharding_from_executable(
      executable
  )
  in_ppspec: list[ParsedPartitionSpec] = []
  for s in input_op_shardings:
    in_ppspec.extend(parse_flatten_op_sharding(s, mesh))

  out_ppspec: list[ParsedPartitionSpec] = []
  for s in output_op_sharding:
    out_ppspec.extend(parse_flatten_op_sharding(s, mesh))
  return in_ppspec, out_ppspec


def get_pspec_from_executable(
    executable, mesh: pxla.Mesh
) -> tuple[tuple[PartitionSpec, ...], tuple[PartitionSpec, ...]]:
  in_ppspec, out_ppspec = _get_ppspec_from_executable(executable, mesh)
  out_partition_spec = _get_partition_spec(out_ppspec)
  in_partition_spec = _get_partition_spec(in_ppspec)
  return tuple(in_partition_spec), tuple(out_partition_spec)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								# Copyright 2021 The JAX Authors.
 								#
 								# Licensed under the Apache License, Version 2.0 (the "License");
 								# you may not use this file except in compliance with the License.
 								# You may obtain a copy of the License at
 								#
 								#     https://www.apache.org/licenses/LICENSE-2.0
 								#
 								# Unless required by applicable law or agreed to in writing, software
 								# distributed under the License is distributed on an "AS IS" BASIS,
 								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								# See the License for the specific language governing permissions and
 								# limitations under the License.
-												Upgrade remaining sources to Python 3.9

This PR is a follow up to #18881.

The changes were generated by adding

    from __future__ import annotations

to the files which did not already have them and running

    pyupgrade --py39-plus --keep-percent-format {jax,tests,jaxlib,examples,benchmarks}/**/*.py

											
										
										
											2023-12-11 13:59:29 +00:00
+								from __future__ import annotations
-												fix weak key cache stuff

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-05-29 04:03:02 +00:00
+								from collections import defaultdict
-												Run `pyupgrade --py310-plus`.

Also apply manual fixes to import sorting and unused imports.

											
										
										
											2024-06-26 14:44:52 -04:00
+								from collections.abc import Callable, Sequence, Iterable
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								import dataclasses
-												Add `util.cache` to `jax.clear_caches` and move pjit, sharding, array, etc uses of `functools.lru_cache` to `util.cache` so that those caches will be cleared if `jax.clear_caches` is called.

PiperOrigin-RevId: 642359226

											
										
										
											2024-06-11 12:46:11 -07:00
+								from functools import partial
-												Don't call inspect.signature() each time we trace a jit().

We can just call it once when jit itself is called.

While we're here, also don't recompute api_util.fun_sourceinfo.

PiperOrigin-RevId: 607443283

											
										
										
											2024-02-15 13:48:49 -08:00
+								import inspect
-												Add logging if we get a C++ cache miss

PiperOrigin-RevId: 531555996

											
										
										
											2023-05-12 11:14:53 -07:00
+								import logging
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								import operator as op
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								import weakref
-												Run `pyupgrade --py310-plus`.

Also apply manual fixes to import sorting and unused imports.

											
										
										
											2024-06-26 14:44:52 -04:00
+								from typing import NamedTuple, Any, Union, cast
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								import threading
 								import warnings
-												Apply pyupgrade --py39-plus.

Notable changes:
* use PEP 585 type names
* use PEP 604 type union syntax where `from __future__ import annotations` is present.
* use f-strings in more places.
* remove redundant arguments to open().

											
										
										
											2023-07-21 14:20:39 -04:00
+								import numpy as np
-												Dedupe shardings before passing them to _get_and_check_device_assignment

In practice, the number of different shardings is usually much smaller then
the number of inputs/output.

PiperOrigin-RevId: 600558309

											
										
										
											2024-01-22 13:44:34 -08:00
+								from jax._src import api
-												Add a zeros rule for mutable arrays and test it using a custom vjp.

add jit compatibility (have pjit jvp instantiate all ref tangents)

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-04-04 14:33:06 -04:00
+								from jax._src import ad_util
-												Don't call inspect.signature() each time we trace a jit().

We can just call it once when jit itself is called.

While we're here, also don't recompute api_util.fun_sourceinfo.

PiperOrigin-RevId: 607443283

											
										
										
											2024-02-15 13:48:49 -08:00
+								from jax._src import api_util
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								from jax._src import config
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								from jax._src import core
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								from jax._src import dispatch
-												[key reuse] add eager checks

											
										
										
											2024-02-29 15:30:19 -08:00
+								from jax._src import dtypes
-												migrate internal dependencies from `jax.interpreters.ad` to `jax._src.interpreters.ad`

... in preparation for paring down `jax.interpreters.ad`'s exported symbols.

Includes some import fixups along the way.

PiperOrigin-RevId: 507684262

											
										
										
											2023-02-06 22:51:50 -08:00
+								from jax._src import linear_util as lu
-												Dedupe shardings before passing them to _get_and_check_device_assignment

In practice, the number of different shardings is usually much smaller then
the number of inputs/output.

PiperOrigin-RevId: 600558309

											
										
										
											2024-01-22 13:44:34 -08:00
+								from jax._src import mesh as mesh_lib
-												Rename jax._src.sharding_utils to jax._src.op_shardings.

Move some more op_sharding related helpers to that module.

PiperOrigin-RevId: 522343010

											
										
										
											2023-04-06 08:31:47 -07:00
+								from jax._src import op_shardings
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								from jax._src import profiler
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								from jax._src import sharding_impls
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								from jax._src import source_info_util
-												Dedupe shardings before passing them to _get_and_check_device_assignment

In practice, the number of different shardings is usually much smaller then
the number of inputs/output.

PiperOrigin-RevId: 600558309

											
										
										
											2024-01-22 13:44:34 -08:00
+								from jax._src import stages
-												Make `error_test` a jax_test so that we can test other configs and fix it with `jit`/`pjit` merge.

PiperOrigin-RevId: 502743523

											
										
										
											2023-01-17 18:42:21 -08:00
+								from jax._src import traceback_util
-												Dedupe shardings before passing them to _get_and_check_device_assignment

In practice, the number of different shardings is usually much smaller then
the number of inputs/output.

PiperOrigin-RevId: 600558309

											
										
										
											2024-01-22 13:44:34 -08:00
+								from jax._src import tree_util
 								from jax._src import util
-												[JAX] Move jax._src.lib.xla_bridge to jax._src.xla_bridge.

Limit jax._src.lib to shims around jaxlib and nothing else.

The goal of this change is to avoid a dependency cycle between the rest of jax and jax._src.lib in a Bazel build. This allows the types for jax._src.lib to be inferred by pytype in isolation without referring to the rest of JAX.

PiperOrigin-RevId: 512922397

											
										
										
											2023-02-28 07:01:14 -08:00
+								from jax._src import xla_bridge as xb
-												migrate internal dependencies from `jax.interpreters.ad` to `jax._src.interpreters.ad`

... in preparation for paring down `jax.interpreters.ad`'s exported symbols.

Includes some import fixups along the way.

PiperOrigin-RevId: 507684262

											
										
										
											2023-02-06 22:51:50 -08:00
+								from jax._src.api_util import (
 								    argnums_partial_except, flatten_axes, flatten_fun, flatten_fun_nokwargs,
-												Replace donation_vector's logic with `donation_vector_with_in_tree` which is now deleted

PiperOrigin-RevId: 627556267

											
										
										
											2024-04-23 17:37:52 -07:00
+								    donation_vector, shaped_abstractify, check_callable, resolve_argnums,
-												[attrs] allow passing a jax-attrs object to jit functions

currently we don't get any interesting cache hits; only on object identity
match

											
										
										
											2024-02-13 16:45:27 -08:00
+								    argnames_partial_except, debug_info, result_paths, jaxpr_debug_info,
-												Replace donation_vector's logic with `donation_vector_with_in_tree` which is now deleted

PiperOrigin-RevId: 627556267

											
										
										
											2024-04-23 17:37:52 -07:00
+								    hoist_obj_attrs)
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src.interpreters import partial_eval as pe
-												Move PartitionSpec into its own file (jax/_src/partition_spec.py).

No functional changes intended.

A subsequent change will move ParsedPartitionSpec and array mapping utilities here also.

PiperOrigin-RevId: 522393166

											
										
										
											2023-04-06 11:42:45 -07:00
+								from jax._src.partition_spec import PartitionSpec
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src.interpreters import xla
-												migrate internal dependencies from `jax.interpreters.ad` to `jax._src.interpreters.ad`

... in preparation for paring down `jax.interpreters.ad`'s exported symbols.

Includes some import fixups along the way.

PiperOrigin-RevId: 507684262

											
										
										
											2023-02-06 22:51:50 -08:00
+								from jax._src.interpreters import ad
-												migrate internal dependencies from `jax.interpreters.batching` to `jax._src.interpreters.batching`

... in preparation for paring down `jax.interpreters.batching`'s exported symbols.

PiperOrigin-RevId: 508487887

											
										
										
											2023-02-09 15:11:20 -08:00
+								from jax._src.interpreters import batching
 								from jax._src.interpreters import mlir
-												Prune accidental exports from jax.interpreters.pxla.

These imports do not appear to have users outside JAX itself.

PiperOrigin-RevId: 507835295

											
										
										
											2023-02-07 11:16:01 -08:00
+								from jax._src.interpreters import pxla
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								from jax._src.lib.mlir import ir
 								from jax._src.lib.mlir.dialects import func as func_dialect
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								from jax._src.lib import jax_jit
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								from jax._src.lib import xla_client as xc
-												[Take 2] Generalize global jit cpp cache keys so we can add more keys than the current donate_argnums.

This allows us to get more cache hits globally. For example:

Before:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache miss
After:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache hit

Reverts b615266175effe4aefeb903620a19f3719a604da

PiperOrigin-RevId: 675746175

											
										
										
											2024-09-17 16:10:41 -07:00
+								from jax._src.lib import xla_extension_version
-												Deprecate `XLACompatibleSharding` in favor of `jax.sharding.Sharding`.

PiperOrigin-RevId: 640544939

											
										
										
											2024-06-05 09:06:36 -07:00
+								from jax._src import sharding
-												Introduce `jax.sharding.AbstractMesh(shape_tuple: tuple[tuple[str, int], ...])` and allow `with_sharding_constraint` and `shard_map` to accept an abstract mesh as input (`with_sharding_constraint` is via `NamedSharding(abstract_mesh, pspec)`).

**Semantics**

Inside jit, we don't need to talk about concrete devices ever so the semantics stay the same as today i.e. we can lower a NamedSharding with abstract mesh with only mesh axis names and sizes and PartitionSpec. The only restriction is that the number of devices need to be consistent throughout the program when we are tracing.
During compilation, the order of devices throughout the program needs to be consistent (same as before this change).

Outside jit i.e. eager mode, if a `shard_map` or `with_sharding_constraint` contains AbstractMesh, then the input to those primitives should contain a concrete Mesh with the same shape and names as the abstract mesh.

**Why do this?**

There are cases, where you want the change the devices in the mesh but keep the mesh shape the same (axis names and axis sizes). But this leads to a device mismatch error if you have `with_sharding_constraint` or `shard_map` in your computation because they embed concrete devices in their signature.

So to fix the error, you need to change the mesh in `wsc` and `shmap` which will lead to a tracing cache miss (because function id is now different) and consequently a lowering to stableHLO cache miss. Explaining via an example:

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(mesh1, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # DEVICE MISMATCH ERROR!
```

The same problem exists for `shard_map` since it takes a mesh with concrete devices in it's signature.

**Okay, so how do you fix this?**

As mentioned above, we need the above program to work and get tracing and lowering cache hits (**cache hits is the most important** part here)

The approach in this change, allows `with_sharding_constraint` to accept a `NamedSharding(abstract_mesh, pspec)` as input. This leads to no errors downstream and we get tracing and lowering cache hits since we don't encode the concrete devices anymore. Just the axis_names and axis_size of the mesh.

**The important part is that the concrete device information should only come from the arguments. Inside `jax.jit`, you should never reference concrete devices ever.**

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(abstract_mesh, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

**One caveat is that this only works with `jax.NamedSharding` but that's fine because `NamedSharding` is the most used `Sharding` in JAX.**

**What about `shard_map`?**

shard_map's signature will be: `shmap(f, mesh: Mesh | AbstractMesh, in_specs: Specs, out_specs: Specs)`.

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = shard_map(lambda x: x, mesh=abstract_mesh, in_specs=P('x'), out_specs=P('x'))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

This is a fully backwards change. So your current code will continue to work as is but you can opt-into this new behavior and get all the benefits!

PiperOrigin-RevId: 662670932

											
										
										
											2024-08-13 15:17:30 -07:00
+								from jax._src.mesh import AbstractMesh
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src.sharding_impls import (
-												Bump minimum jaxlib version to v0.4.30.

This corresponds to xla_extension_version 271 and mlir_api_version 57.

											
										
										
											2024-06-18 11:31:09 -04:00
+								    NamedSharding, GSPMDSharding,
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								    SingleDeviceSharding, PmapSharding, AUTO, UNSPECIFIED, UnspecifiedValue,
-												Remove dead code now that xmap is deleted

PiperOrigin-RevId: 655664512

											
										
										
											2024-07-24 12:39:42 -07:00
+								    ParsedPartitionSpec, get_single_pspec, is_unspecified,
-												Canonicalize to default memory in init of Shardings only on the backends that support memories right now.

PiperOrigin-RevId: 553942534

											
										
										
											2023-08-04 16:26:31 -07:00
+								    is_unspecified_or_auto, prepare_axis_resources, parse_flatten_op_sharding)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								from jax._src.layout import Layout, DeviceLocalLayout, AutoLayout
-												Add a zeros rule for mutable arrays and test it using a custom vjp.

add jit compatibility (have pjit jvp instantiate all ref tangents)

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-04-04 14:33:06 -04:00
+								from jax._src.state import discharge as state_discharge, RefEffect, AbstractRef
-												migrate internal dependencies from `jax.interpreters.ad` to `jax._src.interpreters.ad`

... in preparation for paring down `jax.interpreters.ad`'s exported symbols.

Includes some import fixups along the way.

PiperOrigin-RevId: 507684262

											
										
										
											2023-02-06 22:51:50 -08:00
+								from jax._src.traceback_util import api_boundary
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								from jax._src.tree_util import (
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								    tree_flatten, tree_unflatten, treedef_is_leaf, tree_structure, tree_leaves,
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								    treedef_children, broadcast_prefix, all_leaves, prefix_errors, keystr,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								    PyTreeDef, none_leaf_registry as none_lr)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								from jax._src.util import (
-												Fix `name_stack` usage of pjit. Now all the metadata of transformations in hlo are correct.

PiperOrigin-RevId: 501918212

											
										
										
											2023-01-13 12:53:42 -08:00
+								    HashableFunction, safe_map, safe_zip, wraps,
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    distributed_debug_log, split_list, weakref_lru_cache,
-												Don't wrap singleton ir.Types during HLO lowering.

This is similar to https://github.com/google/jax/pull/22211, but for MLIR types instead of MLIR values.

											
										
										
											2024-07-03 16:38:18 -04:00
+								    merge_lists, subs_list, fun_name, fun_qual_name)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								map, unsafe_map = safe_map, map
 								zip, unsafe_zip = safe_zip, zip
-												Make `error_test` a jax_test so that we can test other configs and fix it with `jit`/`pjit` merge.

PiperOrigin-RevId: 502743523

											
										
										
											2023-01-17 18:42:21 -08:00
+								traceback_util.register_exclusion(__file__)
-												Allow pjit.AUTO to be used with jax.jit. This introduces an API change which requires a mesh to be provided to pjit.AUTO(mesh).

`with mesh:` is no longer required with pjit to use the auto spmd pass of GSPMD.

PiperOrigin-RevId: 533801596

											
										
										
											2023-05-20 22:59:52 -07:00
+								PjitSharding = Union[GSPMDSharding, UnspecifiedValue, AUTO]
 								PjitShardingMinusUnspecified = Union[GSPMDSharding, AUTO]
 								MeshSharding = Union[NamedSharding, UnspecifiedValue, AUTO]
 								MeshShardingMinusUnspecified = Union[NamedSharding, AUTO]
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
-												Add logging if we get a C++ cache miss

PiperOrigin-RevId: 531555996

											
										
										
											2023-05-12 11:14:53 -07:00
+								logger = logging.getLogger(__name__)
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								def _find_arg_mismatch(arg_list, fails, fun_name):
 								  mismatched_args_msg = []
-												Cleanup _find_arg_mismatch logic

PiperOrigin-RevId: 592697969

											
										
										
											2023-12-20 17:23:49 -08:00
+								  def mismatch(err):
 								    for name, inp_da, aval in arg_list:
 								      if err.m_type == pxla.MismatchType.ARG_SHARDING and err.da == inp_da:
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								        mismatched_args_msg.append(
-												Apply pyupgrade --py39-plus.

Notable changes:
* use PEP 585 type names
* use PEP 604 type union syntax where `from __future__ import annotations` is present.
* use f-strings in more places.
* remove redundant arguments to open().

											
										
										
											2023-07-21 14:20:39 -04:00
+								            f"argument {name} of {fun_name} with shape {aval.str_short()} and "
-												Cleanup _find_arg_mismatch logic

PiperOrigin-RevId: 592697969

											
										
										
											2023-12-20 17:23:49 -08:00
+								            f"{err._dev_ids_plat_str}")
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								        break
-												Cleanup _find_arg_mismatch logic

PiperOrigin-RevId: 592697969

											
										
										
											2023-12-20 17:23:49 -08:00
+								  first_err, second_err = fails
 								  mismatch(first_err)
 								  mismatch(second_err)
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								  return mismatched_args_msg
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
 								def _device_assignment_mismatch_error(fun_name, fails, args_flat, api_name,
 								                                      arg_names):
 								  arg_list = []
-												Always flatten args and kwargs together i.e. `tree_flatten((args, kwargs))` so that we have a uniform in_tree structure everywhere.

Leads to a code cleanup and more standardization in jit.

PiperOrigin-RevId: 592388438

											
										
										
											2023-12-19 17:31:25 -08:00
+								  if arg_names is None:
 								    arg_names = [''] * len(args_flat)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  for a, n in zip(args_flat, arg_names):
-												Fix a bug where exceptions were thrown in debug message formatting, when sharding was set to None on arrays.

PiperOrigin-RevId: 621193460

											
										
										
											2024-04-02 08:55:51 -07:00
+								    da = (a.sharding._device_assignment
 								          if getattr(a, 'sharding', None) is not None else None)
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
+								    arg_list.append((n, da, shaped_abstractify(a)))
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
 								  mismatched_args_msg = _find_arg_mismatch(arg_list, fails, fun_name)
 								  if len(mismatched_args_msg) == 2:
-												Merge pull request #21273 from superbobry:mypy-ruff

PiperOrigin-RevId: 636146344

											
										
										
											2024-05-22 06:35:38 -07:00
+								    first, second = mismatched_args_msg  # pytype: disable=bad-unpacking
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								    extra_msg = f" Got {first} and {second}"
 								  elif len(mismatched_args_msg) == 1:
 								    first, second  = fails
 								    # Choose the failure left which is not already covered by ARG_SHARDING.
 								    left = second if first.m_type == pxla.MismatchType.ARG_SHARDING else first
 								    extra_msg = f" Got {mismatched_args_msg[0]} and{left._str(api_name)}"
 								  else:
 								    first, second = fails
 								    extra_msg = f" Got{first._str(api_name)} and{second._str(api_name)}"
 								  msg = (f"Received incompatible devices for {api_name}ted computation.{extra_msg}")
 								  return msg
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								class PjitInfo(NamedTuple):
 								  """Things that we know about a jit instance before it is called.
 								  In other words, this structure contains arguments to jit()/pjit(),
 								  preprocessed and validated.
 								  """
 								  fun_sourceinfo: str | None
 								  fun_signature: inspect.Signature | None
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								  # Shardings, as specified by the user. These can either be UNSPECIFIED or they
 								  # can be a tree (prefix) of shardings or None.
 								  user_specified_in_shardings: bool
 								  in_shardings_treedef: PyTreeDef
 								  in_shardings_leaves: tuple[Any, ...]
 								  out_shardings_treedef: PyTreeDef
 								  out_shardings_leaves: tuple[Any, ...]
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  in_layouts_treedef: PyTreeDef
 								  in_layouts_leaves: tuple[Any, ...]
 								  out_layouts_treedef: PyTreeDef
 								  out_layouts_leaves: tuple[Any, ...]
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								  static_argnums: tuple[int, ...]
 								  static_argnames: tuple[str, ...]
 								  donate_argnums: tuple[int, ...]
 								  donate_argnames: tuple[str, ...]
 								  device: xc.Device | None
 								  backend: str | None
 								  keep_unused: bool
 								  inline: bool
 								  abstracted_axes: Any | None
 								  use_resource_env: bool  # False for jit, True for pjit
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  # Hash and compare PjitInfo by identity when used as a cache key.
 								  def __hash__(self):
 								    return id(self)
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  def __eq__(self, other):
 								    return self is other
 								def _python_pjit_helper(fun, jit_info, *args, **kwargs):
 								  p, args_flat = _infer_params(fun, jit_info, args, kwargs)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  for arg in args_flat:
-												Move functions into `api_util.py` and `dispatch.py` to remove circular import error when pjit is imported in `api.py` for merging the `jit` and `pjit` frontend API.

PiperOrigin-RevId: 497172760

											
										
										
											2022-12-22 08:40:36 -08:00
+								    dispatch.check_arg(arg)
-												Raise a better error message when an invalid input is passed to jit call.

Before:

```
TypeError: Argument 'ShapeDtypeStruct(shape=(4, 2), dtype=int32)' of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

After:

```
TypeError: Argument 'x['b']['c']' of shape int32[4,2] of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

The error is raised deep down the stack during `shard_arg`, so we raise an `InvalidInputException` and catch it in `_python_pjit_helper` where we have the `arg_names` information.

PiperOrigin-RevId: 618014044

											
										
										
											2024-03-21 17:45:44 -07:00
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  if p.attrs_tracked:
 								    init_states = _get_states(p.attrs_tracked)
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								    args_flat = [*init_states, *args_flat]
-												Raise a better error message when an invalid input is passed to jit call.

Before:

```
TypeError: Argument 'ShapeDtypeStruct(shape=(4, 2), dtype=int32)' of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

After:

```
TypeError: Argument 'x['b']['c']' of shape int32[4,2] of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

The error is raised deep down the stack during `shard_arg`, so we raise an `InvalidInputException` and catch it in `_python_pjit_helper` where we have the `arg_names` information.

PiperOrigin-RevId: 618014044

											
										
										
											2024-03-21 17:45:44 -07:00
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								  try:
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    out_flat = pjit_p.bind(*args_flat, **p.params)
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								  except pxla.DeviceAssignmentMismatchError as e:
 								    fails, = e.args
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    api_name = 'jit' if p.params['resource_env'] is None else 'pjit'
-												Raise a better error message when there is a device assignment mismatch via the apply_primitive route.

PiperOrigin-RevId: 518282464

											
										
										
											2023-03-21 08:39:46 -07:00
+								    fun_name = getattr(fun, '__qualname__', getattr(fun, '__name__', str(fun)))
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								    msg = _device_assignment_mismatch_error(
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								        fun_name, fails, args_flat, api_name, p.arg_names)
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
+								    raise ValueError(msg) from None
-												Raise a better error message when an invalid input is passed to jit call.

Before:

```
TypeError: Argument 'ShapeDtypeStruct(shape=(4, 2), dtype=int32)' of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

After:

```
TypeError: Argument 'x['b']['c']' of shape int32[4,2] of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

The error is raised deep down the stack during `shard_arg`, so we raise an `InvalidInputException` and catch it in `_python_pjit_helper` where we have the `arg_names` information.

PiperOrigin-RevId: 618014044

											
										
										
											2024-03-21 17:45:44 -07:00
+								  except xla.InvalidInputException as e:
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    arg_names = [''] * len(args_flat) if p.arg_names is None else p.arg_names
-												Raise a better error message when an invalid input is passed to jit call.

Before:

```
TypeError: Argument 'ShapeDtypeStruct(shape=(4, 2), dtype=int32)' of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

After:

```
TypeError: Argument 'x['b']['c']' of shape int32[4,2] of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

The error is raised deep down the stack during `shard_arg`, so we raise an `InvalidInputException` and catch it in `_python_pjit_helper` where we have the `arg_names` information.

PiperOrigin-RevId: 618014044

											
										
										
											2024-03-21 17:45:44 -07:00
+								    # Run canonicalization again to figure out which arg failed.
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    if p.params['jaxpr'].consts:
-												Raise a better error message when an invalid input is passed to jit call.

Before:

```
TypeError: Argument 'ShapeDtypeStruct(shape=(4, 2), dtype=int32)' of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

After:

```
TypeError: Argument 'x['b']['c']' of shape int32[4,2] of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

The error is raised deep down the stack during `shard_arg`, so we raise an `InvalidInputException` and catch it in `_python_pjit_helper` where we have the `arg_names` information.

PiperOrigin-RevId: 618014044

											
										
										
											2024-03-21 17:45:44 -07:00
+								      raise TypeError(e.args[0]) from e
 								    else:
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								      for arg, name, aval in zip(args_flat, arg_names, p.in_avals):
-												Raise a better error message when an invalid input is passed to jit call.

Before:

```
TypeError: Argument 'ShapeDtypeStruct(shape=(4, 2), dtype=int32)' of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

After:

```
TypeError: Argument 'x['b']['c']' of shape int32[4,2] of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

The error is raised deep down the stack during `shard_arg`, so we raise an `InvalidInputException` and catch it in `_python_pjit_helper` where we have the `arg_names` information.

PiperOrigin-RevId: 618014044

											
										
										
											2024-03-21 17:45:44 -07:00
+								        try:
 								          xla.canonicalize_dtype(arg)
 								        except xla.InvalidInputException as _:
 								          # Reraise as TypeError with the new message.
 								          raise TypeError(
 								              f"Argument '{name}' of shape {aval.str_short()} of type"
 								              f' {type(arg)} is not a valid JAX type.') from e
 								      raise AssertionError("Unreachable") from e
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  if p.attrs_tracked:
 								    num_states_out = sum(end_tree.num_leaves for _, end_tree, _ in p.attrs_tracked)
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								    final_states, out_flat = split_list(out_flat, [num_states_out])
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    _set_states(p.attrs_tracked, final_states)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  outs = tree_unflatten(p.out_tree, out_flat)
 								  return outs, out_flat, p.out_tree, args_flat, p.params['jaxpr'], p.attrs_tracked
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
-												Raise a better error message when an invalid input is passed to jit call.

Before:

```
TypeError: Argument 'ShapeDtypeStruct(shape=(4, 2), dtype=int32)' of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

After:

```
TypeError: Argument 'x['b']['c']' of shape int32[4,2] of type <class 'jax._src.api.ShapeDtypeStruct'> is not a valid JAX type.

```

The error is raised deep down the stack during `shard_arg`, so we raise an `InvalidInputException` and catch it in `_python_pjit_helper` where we have the `arg_names` information.

PiperOrigin-RevId: 618014044

											
										
										
											2024-03-21 17:45:44 -07:00
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								def _set_states(attrs_tracked, vals):
-												Removed noop # type: ignore comments

mypy should now flag these by default.

											
										
										
											2024-05-17 09:46:36 +01:00
+								  from jax.experimental.attrs import jax_setattr
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								  valss = split_list(vals, [td.num_leaves for _, td, _ in attrs_tracked[:-1]])
 								  for ((_, treedef, (obj, attr)), leaves) in zip(attrs_tracked, valss):
 								    val = tree_unflatten(treedef, leaves)
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								    jax_setattr(obj, attr, val)
 								def _get_states(attrs_tracked):
-												Removed noop # type: ignore comments

mypy should now flag these by default.

											
										
										
											2024-05-17 09:46:36 +01:00
+								  from jax.experimental.attrs import jax_getattr
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								  vals = []
 								  for treedef, _, (obj, attr) in attrs_tracked:
 								    tree = jax_getattr(obj, attr)
 								    leaves, treedef_ = tree_flatten(tree)
 								    assert treedef == treedef_
 								    vals.extend(leaves)
 								  return vals
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								def _need_to_rebuild_with_fdo(pgle_profiler):
 								  return (pgle_profiler is not None and pgle_profiler.is_enabled()
 								          and not pgle_profiler.is_fdo_consumed())
-												Improve the error message which is raised from `_get_and_check_device_assignment`.

Before:

```
ValueError: Devices of all `Array` inputs and outputs should be the same. Got array device ids [0] on platform CPU and another array's device ids [0, 1, 2, 3] on platform CPU
```

After:

```
ValueError: Received incompatible devices for jitted computation. Got argument inp of ArrayPjitTest.test_jit_with_sharding_constraint_committed_inp_error.<locals>.sharded_inp with bfloat16[8,2] and device ids [0] on platform CPU and with_sharding_constraint or nested pjit or shard_map with device ids [0, 1, 2, 3] on platform CPU at jax/tests/pjit_test.py:2509 (sharded_inp)
```
PiperOrigin-RevId: 508746961

											
										
										
											2023-02-10 13:53:43 -08:00
-												[mutable-arrays] allow state effects in jit by building in run_state

with help from @sharadmv, @yashkatariya, @dougalm, and others

The basic strategy is to apply discharge_state when lowering a jaxpr with state
effects to HLO, and update the dispatch path accordingly. Specifically:
1. in tests only for now, introduce a MutableArray data type;
2. teach jit to abstract it to a Ref(ShapedArray) type, register an input
   handler, etc;
3. call discharge_state in `lower_sharding_computation` to lower a jaxpr with
   refs to a jaxpr (and then to an HLO) with extra outputs, and set up aliasing;
4. teach the output side of the dispatch path to drop those outputs.

As an alternative to (3), we could potentially lower away the effects at a
higher level, like in _pjit_lower_cached. They are similar because
_pjit_lower_cached is the only (non-xmap) caller of lower_sharding_computation.
I decided to do it in lower_sharding_computation mainly because that's closer
to where we set up aliases, and I wanted to make mutable arrays correspond to
aliased inputs/outputs on the XLA computation.

											
										
										
											2024-02-26 14:46:05 -08:00
+								def _get_fastpath_data(
-												Remove `_python_pjit` and make `_cpp_pjit` the only function wrapper.

PiperOrigin-RevId: 617846352

											
										
										
											2024-03-21 08:09:37 -07:00
+								    executable, out_tree, args_flat, out_flat, attrs_tracked, effects,
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								    consts, abstracted_axes, pgle_profiler
-												Run `pyupgrade --py310-plus`.

Also apply manual fixes to import sorting and unused imports.

											
										
										
											2024-06-26 14:44:52 -04:00
+								) -> pxla.MeshExecutableFastpathData | None:
-												[key reuse] add eager checks

											
										
										
											2024-02-29 15:30:19 -08:00
+								  out_reflattened, out_tree = pxla.reflatten_outputs_for_dispatch(out_tree, out_flat)
-												[mutable-arrays] allow state effects in jit by building in run_state

with help from @sharadmv, @yashkatariya, @dougalm, and others

The basic strategy is to apply discharge_state when lowering a jaxpr with state
effects to HLO, and update the dispatch path accordingly. Specifically:
1. in tests only for now, introduce a MutableArray data type;
2. teach jit to abstract it to a Ref(ShapedArray) type, register an input
   handler, etc;
3. call discharge_state in `lower_sharding_computation` to lower a jaxpr with
   refs to a jaxpr (and then to an HLO) with extra outputs, and set up aliasing;
4. teach the output side of the dispatch path to drop those outputs.

As an alternative to (3), we could potentially lower away the effects at a
higher level, like in _pjit_lower_cached. They are similar because
_pjit_lower_cached is the only (non-xmap) caller of lower_sharding_computation.
I decided to do it in lower_sharding_computation mainly because that's closer
to where we set up aliases, and I wanted to make mutable arrays correspond to
aliased inputs/outputs on the XLA computation.

											
										
										
											2024-02-26 14:46:05 -08:00
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								  use_fastpath = (
-												Convert in_shardings to physical shardings in cpp dispatch path because the same happens with prng arrays.

Also comment out key reuse check in cpp dispatch since it's True for jax tests which prevent prng keys from taking Cpp dispatch.

PiperOrigin-RevId: 613289252

											
										
										
											2024-03-06 11:41:34 -08:00
+								      executable is not None
 								      and isinstance(executable, pxla.MeshExecutable)
 								      and isinstance(executable.unsafe_call, pxla.ExecuteReplicated)
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								      # No effects in computation
-												Convert in_shardings to physical shardings in cpp dispatch path because the same happens with prng arrays.

Also comment out key reuse check in cpp dispatch since it's True for jax tests which prevent prng keys from taking Cpp dispatch.

PiperOrigin-RevId: 613289252

											
										
										
											2024-03-06 11:41:34 -08:00
+								      and not executable.unsafe_call.ordered_effects
 								      and not executable.unsafe_call.has_unordered_effects
 								      and not executable.unsafe_call.has_host_callbacks
 								      and all(isinstance(x, xc.ArrayImpl) for x in out_reflattened)
-												Remove `_python_pjit` and make `_cpp_pjit` the only function wrapper.

PiperOrigin-RevId: 617846352

											
										
										
											2024-03-21 08:09:37 -07:00
+								      and abstracted_axes is None
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								      # no attr state effects
-												Convert in_shardings to physical shardings in cpp dispatch path because the same happens with prng arrays.

Also comment out key reuse check in cpp dispatch since it's True for jax tests which prevent prng keys from taking Cpp dispatch.

PiperOrigin-RevId: 613289252

											
										
										
											2024-03-06 11:41:34 -08:00
+								      and not attrs_tracked
-												[mutable-arrays] allow state effects in jit by building in run_state

with help from @sharadmv, @yashkatariya, @dougalm, and others

The basic strategy is to apply discharge_state when lowering a jaxpr with state
effects to HLO, and update the dispatch path accordingly. Specifically:
1. in tests only for now, introduce a MutableArray data type;
2. teach jit to abstract it to a Ref(ShapedArray) type, register an input
   handler, etc;
3. call discharge_state in `lower_sharding_computation` to lower a jaxpr with
   refs to a jaxpr (and then to an HLO) with extra outputs, and set up aliasing;
4. teach the output side of the dispatch path to drop those outputs.

As an alternative to (3), we could potentially lower away the effects at a
higher level, like in _pjit_lower_cached. They are similar because
_pjit_lower_cached is the only (non-xmap) caller of lower_sharding_computation.
I decided to do it in lower_sharding_computation mainly because that's closer
to where we set up aliases, and I wanted to make mutable arrays correspond to
aliased inputs/outputs on the XLA computation.

											
										
										
											2024-02-26 14:46:05 -08:00
+								      # no ref state effects
-												Convert in_shardings to physical shardings in cpp dispatch path because the same happens with prng arrays.

Also comment out key reuse check in cpp dispatch since it's True for jax tests which prevent prng keys from taking Cpp dispatch.

PiperOrigin-RevId: 613289252

											
										
										
											2024-03-06 11:41:34 -08:00
+								      and not any(isinstance(e, RefEffect) for e in effects)
-												[key reuse] add eager checks

											
										
										
											2024-02-29 15:30:19 -08:00
+								      # no prng reuse checking
-												[key reuse] rename flag to jax_debug_key_reuse

											
										
										
											2024-03-21 10:47:16 -07:00
+								      and not (config.debug_key_reuse.value and any(
-												[key reuse] add eager checks

											
										
										
											2024-02-29 15:30:19 -08:00
+								        hasattr(arg, 'dtype') and dtypes.issubdtype(arg.dtype, dtypes.prng_key)
-												[key reuse] handle reuse of closed-over constants

											
										
										
											2024-04-11 12:23:01 -07:00
+								        for arg in (*args_flat, *out_flat, *consts)))
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								      and not _need_to_rebuild_with_fdo(pgle_profiler)
-												[mutable-arrays] allow state effects in jit by building in run_state

with help from @sharadmv, @yashkatariya, @dougalm, and others

The basic strategy is to apply discharge_state when lowering a jaxpr with state
effects to HLO, and update the dispatch path accordingly. Specifically:
1. in tests only for now, introduce a MutableArray data type;
2. teach jit to abstract it to a Ref(ShapedArray) type, register an input
   handler, etc;
3. call discharge_state in `lower_sharding_computation` to lower a jaxpr with
   refs to a jaxpr (and then to an HLO) with extra outputs, and set up aliasing;
4. teach the output side of the dispatch path to drop those outputs.

As an alternative to (3), we could potentially lower away the effects at a
higher level, like in _pjit_lower_cached. They are similar because
_pjit_lower_cached is the only (non-xmap) caller of lower_sharding_computation.
I decided to do it in lower_sharding_computation mainly because that's closer
to where we set up aliases, and I wanted to make mutable arrays correspond to
aliased inputs/outputs on the XLA computation.

											
										
										
											2024-02-26 14:46:05 -08:00
+								      )
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
 								  if use_fastpath:
-												[key reuse] add eager checks

											
										
										
											2024-02-29 15:30:19 -08:00
+								    out_avals = [o.aval for o in out_reflattened]
 								    out_committed = [o._committed for o in out_reflattened]
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								    kept_var_bitvec = [i in executable._kept_var_idx
 								                       for i in range(len(args_flat))]
-												Convert in_shardings to physical shardings in cpp dispatch path because the same happens with prng arrays.

Also comment out key reuse check in cpp dispatch since it's True for jax tests which prevent prng keys from taking Cpp dispatch.

PiperOrigin-RevId: 613289252

											
										
										
											2024-03-06 11:41:34 -08:00
+								    in_shardings = [
-												Simplify extended dtypes rules part 1. Start by removing sharding specific rules from EDtypes. This is because we always want to replicate the trailing dims introduced by Edtypes.

PiperOrigin-RevId: 639920049

											
										
										
											2024-06-03 14:52:08 -07:00
+								        sharding_impls.physical_sharding(a, s)
-												Convert in_shardings to physical shardings in cpp dispatch path because the same happens with prng arrays.

Also comment out key reuse check in cpp dispatch since it's True for jax tests which prevent prng keys from taking Cpp dispatch.

PiperOrigin-RevId: 613289252

											
										
										
											2024-03-06 11:41:34 -08:00
+								        if a is not core.abstract_token and dtypes.issubdtype(a.dtype, dtypes.extended)
 								        else s
 								        for s, a in zip(executable._in_shardings, executable.in_avals)
 								    ]
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								    fastpath_data = pxla.MeshExecutableFastpathData(
-												Convert in_shardings to physical shardings in cpp dispatch path because the same happens with prng arrays.

Also comment out key reuse check in cpp dispatch since it's True for jax tests which prevent prng keys from taking Cpp dispatch.

PiperOrigin-RevId: 613289252

											
										
										
											2024-03-06 11:41:34 -08:00
+								        executable.xla_executable, out_tree, in_shardings,
-												Call shard_arg fallback in pjit's cpp fast path instead of dropping out completely.

PiperOrigin-RevId: 592344105

											
										
										
											2023-12-19 14:25:25 -08:00
+								        executable._out_shardings, out_avals, out_committed, kept_var_bitvec,
-												Standardize default layout to `None` in internals (dispatch, lowering and compilation) and non-default layouts to concrete layouts.

This massively simplifies the amount of checks we need and improves dispatch time too. It also fixes a donation bug being hit in serving code related to layouts and non-standardization of default layout in JAX.

PiperOrigin-RevId: 668527139

											
										
										
											2024-08-28 11:05:45 -07:00
+								        executable._dispatch_in_layouts)
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								  else:
 								    fastpath_data = None
 								  return fastpath_data
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								class _MostRecentPjitCallExecutable(threading.local):
 								  def __init__(self):
-												Change the _most_recent_executable logic to store a weakref dict of jaxpr -> executable so that with the inner cpp cache and outer cpp cache, we extract the correct executable.

PiperOrigin-RevId: 537908874

											
										
										
											2023-06-05 10:06:30 -07:00
+								    self.weak_key_dict = weakref.WeakKeyDictionary()
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								    self.weak_pgle_profiler_dict = weakref.WeakKeyDictionary()
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								_most_recent_pjit_call_executable = _MostRecentPjitCallExecutable()
-												Add static_argnames to the _cpp_pjit path.

PiperOrigin-RevId: 499311688

											
										
										
											2023-01-03 14:05:17 -08:00
-												Change the _most_recent_executable logic to store a weakref dict of jaxpr -> executable so that with the inner cpp cache and outer cpp cache, we extract the correct executable.

PiperOrigin-RevId: 537908874

											
										
										
											2023-06-05 10:06:30 -07:00
+								def _read_most_recent_pjit_call_executable(jaxpr):
 								  return _most_recent_pjit_call_executable.weak_key_dict.get(jaxpr, None)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								def _read_pgle_profiler(jaxpr):
-												If input layouts are specified via `in_shardings` to `jit` and the array that the jitted function is called with is uncommitted, reshard the input array to the layout specified by the user.

Not doing the resharding, leads to incorrect outputs on GPU and a crash on TPU which is not good.

Fixes: https://github.com/google/jax/issues/23100
PiperOrigin-RevId: 665000157

											
										
										
											2024-08-19 15:10:00 -07:00
+								  return _most_recent_pjit_call_executable.weak_pgle_profiler_dict.get(jaxpr, None)
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
-												Add clear_cache endpoint to python pjit and cpp pjit functions.

PiperOrigin-RevId: 509696516

											
										
										
											2023-02-14 18:45:31 -08:00
+								def _cpp_pjit_evict_fn(self):
 								  self._clear_cache()
-												Internal cleanup

PiperOrigin-RevId: 636518124

											
										
										
											2024-05-23 05:35:00 -07:00
+								  _create_pjit_jaxpr.evict_function(self._fun)  # pytype: disable=attribute-error
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  _infer_params_cached.cache_clear()
-												Add clear_cache endpoint to python pjit and cpp pjit functions.

PiperOrigin-RevId: 509696516

											
										
										
											2023-02-14 18:45:31 -08:00
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								# The entries are doubled here from the default 4096 because _pjit_call_impl
 								# also has a cpp dispatch path and that would double the number of entries in
 								# the global shared cache.
-												[Take 2] Generalize global jit cpp cache keys so we can add more keys than the current donate_argnums.

This allows us to get more cache hits globally. For example:

Before:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache miss
After:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache hit

Reverts b615266175effe4aefeb903620a19f3719a604da

PiperOrigin-RevId: 675746175

											
										
										
											2024-09-17 16:10:41 -07:00
+								# This cache is only used for jit's with only fun. For example: jax.jit(f)
 								_cpp_pjit_cache_fun_only = xc._xla.PjitFunctionCache(capacity=8192)
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
-												[Take 2] Generalize global jit cpp cache keys so we can add more keys than the current donate_argnums.

This allows us to get more cache hits globally. For example:

Before:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache miss
After:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache hit

Reverts b615266175effe4aefeb903620a19f3719a604da

PiperOrigin-RevId: 675746175

											
										
										
											2024-09-17 16:10:41 -07:00
+								# This cache is used for jit where extra arguments are defined other than the
 								# fun. For example: jax.jit(f, donate_argnums=...) OR
 								# jax.jit(f, out_shardings=...), etc. We don't use the same cache because the
 								# capacity might get full very fast because of all the jitted function in JAX
 								# which might evict train_step for example.
 								_cpp_pjit_cache_explicit_attributes = xc._xla.PjitFunctionCache(capacity=8192)
-												Make pjit's cache global just like `jit`'s cache. This will allow cache hits in C++ when `pjit(f)(jnp.arange(3.))` is executed twice.

Also includes Peter's change to fix the cache hit behavior which was broken at HEAD with jit.

PiperOrigin-RevId: 507662634

											
										
										
											2023-02-06 20:34:51 -08:00
-												[Take 2] Generalize global jit cpp cache keys so we can add more keys than the current donate_argnums.

This allows us to get more cache hits globally. For example:

Before:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache miss
After:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache hit

Reverts b615266175effe4aefeb903620a19f3719a604da

PiperOrigin-RevId: 675746175

											
										
										
											2024-09-17 16:10:41 -07:00
 								if xla_extension_version < 286:
 								  def _get_cpp_global_cache(pjit_has_explicit_sharding):
 								    if pjit_has_explicit_sharding:
 								      return xc._xla.PjitFunctionCache()
 								    else:
 								      return _cpp_pjit_cache_fun_only
 								  def _pjit_explicit_sharding_and_layout(
 								    in_shardings_flat, out_shardings_flat, in_layouts_flat, out_layouts_flat,
 								    device, backend) -> bool:
 								    return (device is not None or
 								            backend is not None or
 								            any(not is_unspecified(i) for i in in_shardings_flat) or
 								            any(not is_unspecified(o) for o in out_shardings_flat) or
 								            any(i is not None for i in in_layouts_flat) or
 								            any(o is not None for o in out_layouts_flat))
 								else:
 								  def _get_cpp_global_cache(contains_explicit_attributes: bool):  # type: ignore
 								    if contains_explicit_attributes:
 								      return _cpp_pjit_cache_explicit_attributes
 								    else:
 								      return _cpp_pjit_cache_fun_only
-												Fix the `test_sharding_on_output_with_vmap` failure in Pathways which was getting a cache miss in pjit_call_impl.

There was an inconsistency between how the global cache was used at the top level and in pjit_call_impl so standardize it via a helper function.

In the test, check for re-compilation which is what that test was doing before cl/535630905

PiperOrigin-RevId: 536575987

											
										
										
											2023-05-30 19:51:06 -07:00
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								def _cpp_pjit(fun: Callable, jit_info: PjitInfo):
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
-												Make `error_test` a jax_test so that we can test other configs and fix it with `jit`/`pjit` merge.

PiperOrigin-RevId: 502743523

											
										
										
											2023-01-17 18:42:21 -08:00
+								  @api_boundary
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  def cache_miss(*args, **kwargs):
-												add experimental jax.no_tracing context manager

											
										
										
											2024-08-23 21:21:55 +00:00
+								    if config.no_tracing.value:
 								      raise RuntimeError(f"re-tracing function {jit_info.fun_sourceinfo} for "
 								                         "`jit`, but 'no_tracing' is set")
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								    outs, out_flat, out_tree, args_flat, jaxpr, attrs_tracked = _python_pjit_helper(
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								        fun, jit_info, *args, **kwargs)
-												Change the _most_recent_executable logic to store a weakref dict of jaxpr -> executable so that with the inner cpp cache and outer cpp cache, we extract the correct executable.

PiperOrigin-RevId: 537908874

											
										
										
											2023-06-05 10:06:30 -07:00
+								    executable = _read_most_recent_pjit_call_executable(jaxpr)
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								    pgle_profiler = _read_pgle_profiler(jaxpr)
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								    maybe_fastpath_data = _get_fastpath_data(
-												Remove `_python_pjit` and make `_cpp_pjit` the only function wrapper.

PiperOrigin-RevId: 617846352

											
										
										
											2024-03-21 08:09:37 -07:00
+								        executable, out_tree, args_flat, out_flat, attrs_tracked, jaxpr.effects,
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								        jaxpr.consts, jit_info.abstracted_axes,
 								        pgle_profiler)
-												Bump minimum jaxlib version to v0.4.30.

This corresponds to xla_extension_version 271 and mlir_api_version 57.

											
										
										
											2024-06-18 11:31:09 -04:00
+								    return outs, maybe_fastpath_data, _need_to_rebuild_with_fdo(pgle_profiler)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[Take 2] Generalize global jit cpp cache keys so we can add more keys than the current donate_argnums.

This allows us to get more cache hits globally. For example:

Before:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache miss
After:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache hit

Reverts b615266175effe4aefeb903620a19f3719a604da

PiperOrigin-RevId: 675746175

											
										
										
											2024-09-17 16:10:41 -07:00
+								  if xla_extension_version >= 286:
 								    cache_key = pxla.JitGlobalCppCacheKeys(
 								        donate_argnums=jit_info.donate_argnums,
 								        donate_argnames=jit_info.donate_argnames,
 								        device=jit_info.device, backend=jit_info.backend,
 								        in_shardings_treedef=jit_info.in_shardings_treedef,
 								        in_shardings_leaves=jit_info.in_shardings_leaves,
 								        out_shardings_treedef=jit_info.out_shardings_treedef,
 								        out_shardings_leaves=jit_info.out_shardings_leaves,
 								        in_layouts_treedef=jit_info.in_layouts_treedef,
 								        in_layouts_leaves=jit_info.in_layouts_leaves,
 								        out_layouts_treedef=jit_info.out_layouts_treedef,
 								        out_layouts_leaves=jit_info.out_layouts_leaves,
 								        use_resource_env=jit_info.use_resource_env)
 								    cpp_pjit_f = xc._xla.pjit(
 								        fun_name(fun), fun, cache_miss, jit_info.static_argnums,
 								        jit_info.static_argnames, cache_key, tree_util.dispatch_registry,  # type: ignore
 								        pxla.cc_shard_arg,
 								        _get_cpp_global_cache(cache_key.contains_explicit_attributes))
 								  else:
 								    has_explicit_sharding = _pjit_explicit_sharding_and_layout(
 								        jit_info.in_shardings_leaves, jit_info.out_shardings_leaves,
 								        jit_info.in_layouts_leaves, jit_info.out_layouts_leaves,
 								        jit_info.device, jit_info.backend)
 								    cpp_pjit_f = xc._xla.pjit(
 								        fun_name(fun), fun, cache_miss, jit_info.static_argnums,
 								        jit_info.static_argnames, jit_info.donate_argnums,
 								        tree_util.dispatch_registry, pxla.cc_shard_arg,
 								        _get_cpp_global_cache(has_explicit_sharding))
-												Add clear_cache endpoint to python pjit and cpp pjit functions.

PiperOrigin-RevId: 509696516

											
										
										
											2023-02-14 18:45:31 -08:00
 								  cpp_pjitted_f = wraps(fun)(cpp_pjit_f)
 								  cpp_pjitted_f._fun = fun
 								  type(cpp_pjitted_f).clear_cache = _cpp_pjit_evict_fn
 								  return cpp_pjitted_f
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								def _split_layout_and_sharding(entries):
 								  entries_flat, treedef = tree_flatten(entries, is_leaf=lambda x: x is None)
 								  layouts, shardings = [], []
 								  for e in entries_flat:
-												Read the layout set by `with_sharding_constraint` and set the top module level `out_layout` to `AUTO` if wsc layout is not None.

This will allow XLA to override the entry_computation_layout with the layout set via custom call (i.e. via wsc).

PiperOrigin-RevId: 648911765

											
										
										
											2024-07-02 19:12:27 -07:00
+								    if isinstance(e, Layout):
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      layouts.append(e.device_local_layout)
 								      shardings.append(e.sharding)
 								    elif isinstance(e, (DeviceLocalLayout, AutoLayout)):
 								      raise ValueError(
 								          '`jax.jit` does not accept device-local layouts directly. Create '
 								          'a `Layout` instance wrapping this device-local layout and pass '
 								          f'that to `jit` instead. Got {e}')
 								    else:
 								      layouts.append(None)
 								      shardings.append(e)
 								  assert len(layouts) == len(shardings)
 								  return tree_unflatten(treedef, layouts), tree_unflatten(treedef, shardings)
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								def _parse_jit_arguments(fun: Callable, in_shardings: Any, out_shardings: Any,
 								                         donate_argnums: int | Sequence[int] | None,
 								                         donate_argnames: str | Iterable[str] | None,
 								                         static_argnums: int | Sequence[int] | None,
 								                         static_argnames: str | Iterable[str] | None,
 								                         device: xc.Device | None, backend: str | None,
 								                         abstracted_axes: Any | None, keep_unused: bool,
 								                         inline: bool, use_resource_env: bool) -> PjitInfo:
 								  """Parses the arguments to jit/pjit.
 								  Performs any preprocessing and validation of the arguments that we can do
 								  ahead of time before the jit()-ed function is invoked.
 								  """
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								  if abstracted_axes and not config.dynamic_shapes.value:
-												Add abstracted axes to pjit to make jax2tf tests pass. abstracted_axes and dynamic_shapes is not supported by pjit yet.

PiperOrigin-RevId: 502138836

											
										
										
											2023-01-14 20:16:57 -08:00
+								    raise ValueError("abstracted_axes must be used with --jax_dynamic_shapes")
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  check_callable(fun)
 								  if backend is not None or device is not None:
 								    warnings.warn(
-												Update the deprecation message of `backend` and `device` argument of `jit` to be more actionable.

PiperOrigin-RevId: 637899890

											
										
										
											2024-05-28 07:59:31 -07:00
+								        'backend and device argument on jit is deprecated. You can use'
 								        ' `jax.device_put(..., jax.local_devices("cpu")[0])` on the inputs to'
 								        ' the jitted function to get the same behavior.', DeprecationWarning)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    if device is not None and backend is not None:
 								      raise ValueError("can't specify both a device and a backend for jit, "
 								                       f"got {device=} and {backend=}")
-												Allow None to be passed to in_shardings and out_shardings. The default is still UNSPECIFIED to handle edge cases around the old semantics where None is treated as fully replicated.

The semantics are as follow:

* if the mesh context manager is not provided, None will be treated as UNSPECIFIED for both in_shardings and out_shardings

* If the mesh context manager is provided, None will be treated as fully replicated as per the old semantics.

This will make sure that we don't break existing code depending on None meaning replicated but also start making the transition to None meaning UNSPECIFIED for jit and pjit.

PiperOrigin-RevId: 540705660

											
										
										
											2023-06-15 15:21:36 -07:00
+								    if in_shardings is not None and not is_unspecified(in_shardings):
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      raise ValueError('If backend or device is specified on jit, then '
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								                       'in_shardings should not be specified.')
-												Allow None to be passed to in_shardings and out_shardings. The default is still UNSPECIFIED to handle edge cases around the old semantics where None is treated as fully replicated.

The semantics are as follow:

* if the mesh context manager is not provided, None will be treated as UNSPECIFIED for both in_shardings and out_shardings

* If the mesh context manager is provided, None will be treated as fully replicated as per the old semantics.

This will make sure that we don't break existing code depending on None meaning replicated but also start making the transition to None meaning UNSPECIFIED for jit and pjit.

PiperOrigin-RevId: 540705660

											
										
										
											2023-06-15 15:21:36 -07:00
+								    if out_shardings is not None and not is_unspecified(out_shardings):
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      raise ValueError('If backend or device is specified on jit, then '
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								                       'out_shardings should not be specified.')
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  if isinstance(in_shardings, list):
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    # To be a tree prefix of the positional args tuple, in_axes can never be a
 								    # list: if in_axes is not a leaf, it must be a tuple of trees. However,
 								    # in cases like these users expect tuples and lists to be treated
 								    # essentially interchangeably, so we canonicalize lists to tuples here
-												Update references to the GitHub url in JAX codebase to reflect move from google/jax to jax-ml/jax

PiperOrigin-RevId: 676843138

											
										
										
											2024-09-20 07:51:48 -07:00
+								    # rather than raising an error. https://github.com/jax-ml/jax/issues/2367
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    in_shardings = tuple(in_shardings)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  in_layouts, in_shardings = _split_layout_and_sharding(in_shardings)
 								  out_layouts, out_shardings = _split_layout_and_sharding(out_shardings)
-												Remove the unused return from prepare_axis_resources

PiperOrigin-RevId: 621738698

											
										
										
											2024-04-03 22:38:45 -07:00
+								  in_shardings = prepare_axis_resources(in_shardings, 'in_shardings')
 								  out_shardings = prepare_axis_resources(out_shardings, 'out_shardings')
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								  user_specified_in_shardings = (in_shardings is not None and
 								                                 not is_unspecified(in_shardings))
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
 								  in_shardings_leaves, in_shardings_treedef = none_lr.flatten(in_shardings)
 								  out_shardings_leaves, out_shardings_treedef = none_lr.flatten(out_shardings)
 								  in_layouts_leaves, in_layouts_treedef = none_lr.flatten(in_layouts)
 								  out_layouts_leaves, out_layouts_treedef = none_lr.flatten(out_layouts)
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								  fun_sourceinfo = api_util.fun_sourceinfo(fun)
 								  fun_signature = api_util.fun_signature(fun)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Only call inspect.signature once during the initial call to jit().

We call inspect.signature() once for debug information and once for argnum resolving. We can just call it once and reuse the result.

PiperOrigin-RevId: 617824439

											
										
										
											2024-03-21 06:35:20 -07:00
+								  donate_argnums, donate_argnames, static_argnums, static_argnames = resolve_argnums(
 								      fun, fun_signature, donate_argnums, donate_argnames, static_argnums,
 								      static_argnames)
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								  return PjitInfo(
 								        fun_sourceinfo=fun_sourceinfo,
 								        fun_signature=fun_signature,
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								        user_specified_in_shardings=user_specified_in_shardings,
 								        in_shardings_treedef=in_shardings_treedef,
 								        in_shardings_leaves=tuple(in_shardings_leaves),
 								        out_shardings_treedef=out_shardings_treedef,
 								        out_shardings_leaves=tuple(out_shardings_leaves),
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								        in_layouts_treedef=in_layouts_treedef,
 								        in_layouts_leaves=tuple(in_layouts_leaves),
 								        out_layouts_treedef=out_layouts_treedef,
 								        out_layouts_leaves=tuple(out_layouts_leaves),
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								        static_argnums=static_argnums,
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								        static_argnames=static_argnames, donate_argnums=donate_argnums,
 								        donate_argnames=donate_argnames, device=device, backend=backend,
 								        keep_unused=keep_unused, inline=inline,
 								        abstracted_axes=abstracted_axes,
 								        use_resource_env=use_resource_env)
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								def _make_jit_wrapper(fun: Callable, jit_info: PjitInfo):
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Make `error_test` a jax_test so that we can test other configs and fix it with `jit`/`pjit` merge.

PiperOrigin-RevId: 502743523

											
										
										
											2023-01-17 18:42:21 -08:00
+								  @api_boundary
-												Smuggle _experimental_lowering_platform via kwargs to make it hidden and extremely private temporary.

PiperOrigin-RevId: 532644979

											
										
										
											2023-05-16 19:47:19 -07:00
+								  def lower(*args, **kwargs):
-												Move `DeviceAssignmentMismatchError` exception catching code to `def lower` method of `Traced` so that all libraries calling `traced.lower()` see a better error message

PiperOrigin-RevId: 674095608

											
										
										
											2024-09-12 19:02:57 -07:00
+								    return trace(*args, **kwargs).lower()
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Make eval_shape a wrapper around `jax.jit(f).eval_shape(*args, **kwargs)`

PiperOrigin-RevId: 599724490

											
										
										
											2024-01-18 22:10:24 -08:00
+								  @api_boundary
 								  def eval_shape(*args, **kwargs):
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								    p, _ = _infer_params(fun, jit_info, args, kwargs)
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    out_s = [None if is_unspecified(s) else s for s in p.params['out_shardings']]
-												Accept layout on `ShapeDtypeStruct` on the `sharding` argument. `DeviceLocalLayout.AUTO` is not allowed on SDS.

PiperOrigin-RevId: 624982814

											
										
										
											2024-04-15 09:18:46 -07:00
+								    # TODO(yashkatariya): Add `Layout` to SDS.
-												Add weak_type to ShapeDtypeStruct because jax.Array also has it and SDS is a duck of jax.Array

This fixes a tracing cache miss issue when you eval shape with a weak_type input and get a strong type output back and pass that back in leading to a cache miss.

Fixes: https://github.com/google/jax/issues/23302
PiperOrigin-RevId: 668949430

											
										
										
											2024-08-29 08:35:00 -07:00
+								    out = [api.ShapeDtypeStruct(x.shape, x.dtype, sharding=s,
 								                                weak_type=x.weak_type)
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								           for x, s in zip(p.params['jaxpr'].out_avals, out_s)]
 								    return tree_unflatten(p.out_tree, out)
-												Make eval_shape a wrapper around `jax.jit(f).eval_shape(*args, **kwargs)`

PiperOrigin-RevId: 599724490

											
										
										
											2024-01-18 22:10:24 -08:00
-												Add `specialize` on jax.jit and make it a `Stage`.

Eventually, we should use this in jax.make_jaxpr and delete all the duplicated code.

PiperOrigin-RevId: 640707223

											
										
										
											2024-06-05 17:45:34 -07:00
+								  @api_boundary
-												rename `Specialized` to `Traced` (and `specialize` to `trace`)

PiperOrigin-RevId: 641076488

											
										
										
											2024-06-06 17:42:25 -07:00
+								  def trace(*args, **kwargs) -> stages.Traced:
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								    p, args_flat = _infer_params(fun, jit_info, args, kwargs)
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    donate_argnums = tuple(i for i, d in enumerate(p.donated_invars) if d)
 								    args_info = stages.make_args_info(p.in_tree, p.in_avals, donate_argnums)
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								    lower_callable = partial(_resolve_and_lower, args_flat, **p.params,
-												Add `lowering_platforms` to `traced.lower()` to allow lowering to different backends and multi-backend lowering too. In other words, enable cross-lowering!

The motivation for doing this is 2-fold:

1) This will help with deprecating and eventually deleting `jax.xla_computation` which allows for cross backend lowering.

2) Allow for cross-backend and multi-backend lowering via jax AOT APIs which will help cleanup some hacks implemented for `jax.export`.

Note that this is only available by `.trace.lower(lowering_platforms=('tpu',))`. You cannot use `.lower` to do cross-lowering. We can introduce top-level APIs in the future to allow for composable aot apis to make this easier if `.trace(*args).lower(lowering_platforms)` is cumbersome to write.

Designed with @froystig!

PiperOrigin-RevId: 644087787

											
										
										
											2024-06-17 11:58:18 -07:00
+								                             pgle_profiler=None)
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    return stages.Traced(
-												Move `DeviceAssignmentMismatchError` exception catching code to `def lower` method of `Traced` so that all libraries calling `traced.lower()` see a better error message

PiperOrigin-RevId: 674095608

											
										
										
											2024-09-12 19:02:57 -07:00
+								        p.params['jaxpr'], args_info, p.params["name"], p.out_tree,
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								        lower_callable, args_flat, p.arg_names, p.num_consts)
-												Add `specialize` on jax.jit and make it a `Stage`.

Eventually, we should use this in jax.make_jaxpr and delete all the duplicated code.

PiperOrigin-RevId: 640707223

											
										
										
											2024-06-05 17:45:34 -07:00
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  wrapped = _cpp_pjit(fun, jit_info)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  wrapped.lower = lower
-												Make eval_shape a wrapper around `jax.jit(f).eval_shape(*args, **kwargs)`

PiperOrigin-RevId: 599724490

											
										
										
											2024-01-18 22:10:24 -08:00
+								  wrapped.eval_shape = eval_shape
-												rename `Specialized` to `Traced` (and `specialize` to `trace`)

PiperOrigin-RevId: 641076488

											
										
										
											2024-06-06 17:42:25 -07:00
+								  wrapped.trace = trace
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  return wrapped
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								def make_jit(fun: Callable, in_shardings: Any, out_shardings: Any,
 								             donate_argnums: int | Sequence[int] | None,
 								             donate_argnames: str | Iterable[str] | None,
 								             static_argnums: int | Sequence[int] | None,
 								             static_argnames: str | Iterable[str] | None,
 								             device: xc.Device | None, backend: str | None,
 								             abstracted_axes: Any | None, keep_unused: bool,
 								             inline: bool, use_resource_env: bool) -> Any:
 								  """jit() and pjit() are thin wrappers around this function."""
 								  jit_info = _parse_jit_arguments(
 								        fun, in_shardings, out_shardings, donate_argnums, donate_argnames,
 								        static_argnums, static_argnames, device, backend, abstracted_axes,
 								        keep_unused, inline, use_resource_env)
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  return _make_jit_wrapper(fun, jit_info)
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								class PjitParams(NamedTuple):
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  consts: list[Any]  # Only jaxpr constants, we can't keep other arguments alive
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  params: dict[str, Any]
 								  in_avals: tuple[core.AbstractValue, ...]
 								  in_tree: PyTreeDef
 								  out_tree: PyTreeDef
 								  donated_invars: tuple[bool, ...]
 								  arg_names: tuple[str, ...] | None
 								  num_consts: int
 								  attrs_tracked: list[tuple[PyTreeDef, PyTreeDef, tuple[Any, str]]]
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								def _infer_params_impl(
 								    fun: Callable,
 								    ji: PjitInfo,
 								    pjit_mesh: mesh_lib.Mesh | None,
 								    resource_env: mesh_lib.ResourceEnv | None,
 								    args: tuple[Any, ...],
 								    kwargs: dict[str, Any],
 								    in_avals: tuple[core.AbstractValue, ...] | None,
 								) -> tuple[PjitParams, list[Any]]:
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								  have_kwargs = bool(kwargs)
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  if have_kwargs and ji.user_specified_in_shardings:
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								    raise ValueError(
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								        "pjit does not support kwargs when in_shardings is specified.")
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  if pjit_mesh is not None:
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								    jit_name = 'pjit'
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    if (ji.backend or ji.device) and not pjit_mesh.empty:
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								      raise ValueError(
 								          "Mesh context manager should not be used with jit when backend or "
 								          "device is also specified as an argument to jit.")
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  else:
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								    jit_name = 'jit'
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  axes_specs = _flat_axes_specs(ji.abstracted_axes, *args, **kwargs)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  dbg = debug_info(jit_name, ji.fun_sourceinfo, ji.fun_signature, args, kwargs,
 								                   ji.static_argnums, ji.static_argnames)
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  f = lu.wrap_init(fun)
-												Make eval_shape a wrapper around `jax.jit(f).eval_shape(*args, **kwargs)`

PiperOrigin-RevId: 599724490

											
										
										
											2024-01-18 22:10:24 -08:00
+								  f, res_paths = result_paths(f)
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  f, dyn_args = argnums_partial_except(f, ji.static_argnums, args, allow_invalid=True)
-												Make eval_shape a wrapper around `jax.jit(f).eval_shape(*args, **kwargs)`

PiperOrigin-RevId: 599724490

											
										
										
											2024-01-18 22:10:24 -08:00
+								  del args
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  f, dyn_kwargs = argnames_partial_except(f, ji.static_argnames, kwargs)
-												Make eval_shape a wrapper around `jax.jit(f).eval_shape(*args, **kwargs)`

PiperOrigin-RevId: 599724490

											
										
										
											2024-01-18 22:10:24 -08:00
+								  explicit_args, in_tree = tree_flatten((dyn_args, dyn_kwargs))
 								  flat_fun, out_tree = flatten_fun(f, in_tree)
-												[attrs] allow passing a jax-attrs object to jit functions

currently we don't get any interesting cache hits; only on object identity
match

											
										
										
											2024-02-13 16:45:27 -08:00
+								  flat_fun, explicit_args = hoist_obj_attrs(flat_fun, explicit_args)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  if (ji.donate_argnums or ji.donate_argnames) and not config.debug_nans.value:
 								    donated_invars = donation_vector(ji.donate_argnums, ji.donate_argnames, in_tree)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  else:
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    donated_invars = (False,) * len(explicit_args)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  # If backend or device is set as an arg on jit, then resolve them to
 								  # in_shardings and out_shardings as if user passed in in_shardings
 								  # and out_shardings.
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  device_or_backend_set = bool(ji.backend or ji.device)
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								  if device_or_backend_set:
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    sharding = _create_sharding_with_device_backend(ji.device, ji.backend)
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								    leaves, treedef = tree_flatten(sharding)
 								    in_shardings_leaves = out_shardings_leaves = tuple(leaves)
 								    in_shardings_treedef = out_shardings_treedef = treedef
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  else:
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								    in_shardings_leaves = tuple(
 								        _create_sharding_for_array(pjit_mesh, x, 'in_shardings', jit_name)
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								        for x in ji.in_shardings_leaves)
 								    in_shardings_treedef = ji.in_shardings_treedef
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								    out_shardings_leaves = tuple(
 								        _create_sharding_for_array(pjit_mesh, x, 'out_shardings', jit_name)
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								        for x in ji.out_shardings_leaves)
 								    out_shardings_treedef = ji.out_shardings_treedef
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								  assert None not in in_shardings_leaves
 								  assert None not in out_shardings_leaves
-												Allow None to be passed to in_shardings and out_shardings. The default is still UNSPECIFIED to handle edge cases around the old semantics where None is treated as fully replicated.

The semantics are as follow:

* if the mesh context manager is not provided, None will be treated as UNSPECIFIED for both in_shardings and out_shardings

* If the mesh context manager is provided, None will be treated as fully replicated as per the old semantics.

This will make sure that we don't break existing code depending on None meaning replicated but also start making the transition to None meaning UNSPECIFIED for jit and pjit.

PiperOrigin-RevId: 540705660

											
										
										
											2023-06-15 15:21:36 -07:00
-												Run `pyupgrade --py310-plus`.

Also apply manual fixes to import sorting and unused imports.

											
										
										
											2024-06-26 14:44:52 -04:00
+								  in_type: core.InputType | tuple[core.AbstractValue, ...]
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								  if config.dynamic_shapes.value:
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    in_type = pe.infer_lambda_input_type(axes_specs, explicit_args)
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								    in_avals = tuple(a for a, e in in_type if e)
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  elif in_avals is None:
-												Report the argument path when encountering an overflow error for a Python value.

PiperOrigin-RevId: 522106244

											
										
										
											2023-04-05 11:23:02 -07:00
+								    avals = []
 								    for i, a in enumerate(explicit_args):
 								      try:
 								        avals.append(shaped_abstractify(a))
 								      except OverflowError as e:
 								        arg_path = (f"argument path is {dbg.arg_names[i]}" if dbg
 								                    else f"flattened argument number is {i}")
 								        raise OverflowError(
 								          "An overflow was encountered while parsing an argument to a jitted "
 								          f"computation, whose {arg_path}."
 								        ) from e
 								    in_type = in_avals = tuple(avals)
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  else:
 								    in_type = in_avals
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  in_shardings_flat, in_layouts_flat = _process_in_axis_resources(
 								      in_shardings_treedef, in_shardings_leaves,
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								      ji.in_layouts_treedef, ji.in_layouts_leaves,
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								      in_avals, in_tree, dbg, device_or_backend_set, have_kwargs)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  attr_token = _attr_token(flat_fun, in_type)
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								  jaxpr, consts, out_avals, attrs_tracked = _create_pjit_jaxpr(
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								      flat_fun, in_type, attr_token, dbg,
 								      HashableFunction(res_paths, closure=()),
 								      IgnoreKey(ji.inline))
 								  _attr_update(flat_fun, in_type, attr_token, attrs_tracked)
-												Add `sharding` to `convert_element_type_p` primitive.

There are 2 reasons for doing this:

* Avoid an extra allocation by putting the output on the correct sharding that the user specified. If you device_put the output of `_convert_element_type`, then you pay the cost of 2 transfers which is not ideal at all since this path would be critical (when users use `device`) and we should avoid doing extra transfers at all costs.

* This will allow us to streamline `device` arguments being added to all `jnp` functions as we will have one place (`_convert_element_type`) which will handle the logic of putting things on the right device.

Also fixes: https://github.com/google/jax/issues/17422

PiperOrigin-RevId: 650621659

											
										
										
											2024-07-09 07:32:38 -07:00
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								  out_shardings_flat, out_layouts_flat = _check_and_canonicalize_out_shardings(
 								      out_shardings_treedef, out_shardings_leaves, ji.out_layouts_treedef,
 								      ji.out_layouts_leaves, HashableFunction(out_tree, closure=()),
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								      tuple(out_avals), jaxpr.jaxpr.debug_info, device_or_backend_set)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  assert len(explicit_args) == len(in_shardings_flat) == len(in_layouts_flat)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								  if config.dynamic_shapes.value:
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								    implicit_args = _extract_implicit_args(
 								        cast(core.InputType, in_type), explicit_args)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  else:
 								    implicit_args = []
 								  args_flat = [*implicit_args, *explicit_args]
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								  num_states_in = sum(init_tree.num_leaves for init_tree, _, _ in attrs_tracked)
 								  num_extra_args = len(implicit_args) + num_states_in + len(consts)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  in_shardings_flat = (UNSPECIFIED,) * num_extra_args + in_shardings_flat
-												Add a private API to allow setting layouts on jitted computations.

We expose 3 modes:

* `SpecifiedLayout`: User specifies the `minor_to_major` field of the layout. Tiling not exposed yet.

* `DefaultLayout`: PJRT chooses the layout. It defaults to the current behavior.

* `AUTO`: Compiler chooses the layout. This field is not a layout per se. It's a request to get the layout from the compiler. This field cannot be on an Array or other data types. It can only be on jit.

Public API coming soon.

Co-authored-by: Roy Frostig <frostig@google.com>
PiperOrigin-RevId: 582692036

											
										
										
											2023-11-15 08:48:17 -08:00
+								  in_layouts_flat = (None,) * num_extra_args + in_layouts_flat
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  donated_invars = (False,) * num_extra_args + donated_invars
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  assert (len(in_shardings_flat) == len(in_layouts_flat) ==
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								          len(donated_invars) == num_states_in + len(consts) + len(args_flat))
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
 								  params = dict(
 								      jaxpr=jaxpr,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      in_shardings=in_shardings_flat,
 								      out_shardings=out_shardings_flat,
 								      in_layouts=in_layouts_flat,
 								      out_layouts=out_layouts_flat,
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      resource_env=resource_env,
 								      donated_invars=donated_invars,
-												Prefer `__qualname__` as a pjit_p name.

If applying `jit` to a class method, it is often important to know the class name in the jaxpr.

											
										
										
											2024-07-02 13:07:46 -04:00
+								      name=fun_qual_name(flat_fun),
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								      keep_unused=ji.keep_unused,
 								      inline=ji.inline,
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  )
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  return PjitParams(consts, params, in_avals, in_tree, out_tree(),
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								                    donated_invars, dbg.arg_names if dbg else None, len(consts),
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								                    attrs_tracked), args_flat
 								class InferParamsCacheEntry:
 								  """Mutable value object for _infer_params_cached."""
 								  __slots__ = ['pjit_params']
 								  pjit_params: PjitParams | None
 								  def __init__(self):
 								    self.pjit_params = None
 								# We use an outer cache that is keyed on the signature of the arguments, but
 								# when populating a cache entry using _infer_params_impl, we need to provide
 								# actual arguments. In principle we could refactor _infer_params_impl to look
 								# only at an argument signature instead of args/kwargs in those cases that we
 								# cache, but this was a more minimal change.
 								@util.weakref_lru_cache
 								def _infer_params_cached(
 								    fun: Callable,
 								    jit_info: PjitInfo,
 								    signature: jax_jit.ArgumentSignature,
 								    in_avals: tuple[core.AbstractValue, ...],
 								    pjit_mesh: mesh_lib.Mesh | None,
 								    resource_env: mesh_lib.ResourceEnv | None,
 								) -> InferParamsCacheEntry:
 								  return InferParamsCacheEntry()
 								def _infer_params(
 								    fun: Callable, ji: PjitInfo, args: tuple[Any, ...], kwargs: dict[str, Any]
 								) -> tuple[PjitParams, list[Any]]:
 								  if ji.use_resource_env:
 								    # We need to fetch the mesh from inside the wrapped function, because
 								    # meshes are dynamically scoped (i.e., with a context manager).
 								    resource_env = mesh_lib.thread_resources.env
 								    pjit_mesh = resource_env.physical_mesh
 								  else:
 								    resource_env = None
 								    pjit_mesh = None
-												Bump minimum jaxlib version to 0.4.31. The corresponding xla_extension_version is 279 and mlir_api_version is 57

PiperOrigin-RevId: 657400413

											
										
										
											2024-07-29 18:43:56 -07:00
+								  skip_cache = config.dynamic_shapes.value
-												[JAX] Add caching to pjit._infer_params.

When tracing inner jits, we currently redo a lot of tracing work, which we can cache. Just as we have a C++ fast path for top-level jit calls, we can reuse the same logic for inner jits. We use part of the C++ fast path code to compute the signature of the arguments and split apart the dynamic arguments to compute a cache key. If we have seen the cache key before, we can avoid doing most of the work of _infer_params.

In passing, fix a bug where DynamicJaxprTracer's shaped_abstractify rule sometimes produces concrete avals.

```
name           old cpu/op   new cpu/op   delta
jit_add_chain  59.1ms ±14%  49.4ms ±10%  -16.32%  (p=0.008 n=5+5)

name           old time/op          new time/op          delta
jit_add_chain  60.3ms ±14%          50.7ms ±11%  -15.99%          (p=0.008 n=5+5)
```

PiperOrigin-RevId: 645491650

											
										
										
											2024-06-21 13:52:19 -07:00
+								  if not skip_cache:
 								    signature, dynargs = jax_jit.parse_arguments(
 								        args, tuple(kwargs.values()), tuple(kwargs.keys()), ji.static_argnums,
 								        ji.static_argnames, tree_util.default_registry)
 								    try:
 								      avals = tuple(shaped_abstractify(a) for a in dynargs)
 								    except (OverflowError, TypeError):
 								      # If we see something we don't understand, use the slow path.
 								      skip_cache = True
 								  if skip_cache:
 								    p, args_flat = _infer_params_impl(fun, ji, pjit_mesh, resource_env, args,
 								                                      kwargs, in_avals=None)
 								    return p, p.consts + args_flat
 								  entry = _infer_params_cached(
 								      fun, ji, signature, avals, pjit_mesh, resource_env)
 								  if entry.pjit_params is None:
 								    p, args_flat = _infer_params_impl(
 								        fun, ji, pjit_mesh, resource_env, args, kwargs, in_avals=avals)
 								    if p.attrs_tracked:
 								      # If there are attrs_tracked, don't use the cache.
 								      return p, p.consts + args_flat
 								    else:
 								      entry.pjit_params = p
 								  return entry.pjit_params, entry.pjit_params.consts + dynargs
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								def _extract_implicit_args(
-												Use lower-case PEP 585 names for types.

Issue https://github.com/google/jax/issues/16537

PiperOrigin-RevId: 542969282

											
										
										
											2023-06-23 15:11:37 -07:00
+								  in_type: Sequence[tuple[core.AbstractValue, bool]],
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  explicit_args: Sequence[Any]
 								) -> Sequence[core.Tracer]:
 								  """
 								  Given an input type and explicitly-passed arguments (per the user-facing API
 								  calling convention), extract implicit axis size arguments from shapes of
 								  explicit arguments (for the trace-time / jaxpr-level calling convention).
 								  """
 								  # First, using `in_type` construct a list to represent the full argument list,
 								  # leaving the implicit arguments as None placeholders for now.
 								  explicit_args_ = iter(explicit_args)
 								  args = [next(explicit_args_) if expl else None for _, expl in in_type]
 								  assert next(explicit_args_, None) is None
 								  del explicit_args, explicit_args_
 								  # Next, populate the implicit arguments using the DBIdxs in `in_type`.
 								  for i, (aval, explicit) in enumerate(in_type):
 								    if not explicit or not isinstance(aval, core.DShapedArray):
 								      continue  # can't populate an implicit argument
 								    arg = args[i]
 								    assert arg is not None
 								    for d1, d2 in zip(aval.shape, arg.aval.shape):
 								      if isinstance(d1, core.DBIdx):
 								        if args[d1.val] is None:
 								          args[d1.val] = d2
 								        assert core.same_referent(args[d1.val], d2)
 								  assert all(x is not None for x in args)
-												Merge pull request #21273 from superbobry:mypy-ruff

PiperOrigin-RevId: 636146344

											
										
										
											2024-05-22 06:35:38 -07:00
+								  return [x for x, (_, e) in zip(args, in_type) if not e]  # pytype: disable=bad-return-type
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
 								def _flat_axes_specs(abstracted_axes, *args, **kwargs
-												Upgrade remaining sources to Python 3.9

This PR is a follow up to #18881.

The changes were generated by adding

    from __future__ import annotations

to the files which did not already have them and running

    pyupgrade --py39-plus --keep-percent-format {jax,tests,jaxlib,examples,benchmarks}/**/*.py

											
										
										
											2023-12-11 13:59:29 +00:00
+								                     ) -> list[pe.AbstractedAxesSpec] | None:
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  if abstracted_axes is None: return None
 								  if kwargs: raise NotImplementedError
 								  def ax_leaf(l):
 								    return (isinstance(l, dict) and all_leaves(l.values()) or
 								            isinstance(l, tuple) and all_leaves(l, lambda x: x is None))
 								  return broadcast_prefix(abstracted_axes, args, ax_leaf)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
-												Make eval_shape a wrapper around `jax.jit(f).eval_shape(*args, **kwargs)`

PiperOrigin-RevId: 599724490

											
										
										
											2024-01-18 22:10:24 -08:00
+								class JitWrapped(stages.Wrapped):
 								  def eval_shape(self, *args, **kwargs):
 								    """See ``jax.eval_shape``."""
 								    raise NotImplementedError
-												rename `Specialized` to `Traced` (and `specialize` to `trace`)

PiperOrigin-RevId: 641076488

											
										
										
											2024-06-06 17:42:25 -07:00
+								  def trace(self, *args, **kwargs) -> stages.Traced:
-												Add `specialize` on jax.jit and make it a `Stage`.

Eventually, we should use this in jax.make_jaxpr and delete all the duplicated code.

PiperOrigin-RevId: 640707223

											
										
										
											2024-06-05 17:45:34 -07:00
+								    raise NotImplementedError
-												Make eval_shape a wrapper around `jax.jit(f).eval_shape(*args, **kwargs)`

PiperOrigin-RevId: 599724490

											
										
										
											2024-01-18 22:10:24 -08:00
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								# in_shardings and out_shardings can't be None as the default value
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								# because `None` means that the input is fully replicated.
 								def pjit(
 								    fun: Callable,
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    in_shardings=UNSPECIFIED,
 								    out_shardings=UNSPECIFIED,
-												Upgrade remaining sources to Python 3.9

This PR is a follow up to #18881.

The changes were generated by adding

    from __future__ import annotations

to the files which did not already have them and running

    pyupgrade --py39-plus --keep-percent-format {jax,tests,jaxlib,examples,benchmarks}/**/*.py

											
										
										
											2023-12-11 13:59:29 +00:00
+								    static_argnums: int | Sequence[int] | None = None,
 								    static_argnames: str | Iterable[str] | None = None,
 								    donate_argnums: int | Sequence[int] | None = None,
 								    donate_argnames: str | Iterable[str] | None = None,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    keep_unused: bool = False,
-												Upgrade remaining sources to Python 3.9

This PR is a follow up to #18881.

The changes were generated by adding

    from __future__ import annotations

to the files which did not already have them and running

    pyupgrade --py39-plus --keep-percent-format {jax,tests,jaxlib,examples,benchmarks}/**/*.py

											
										
										
											2023-12-11 13:59:29 +00:00
+								    device: xc.Device | None = None,
 								    backend: str | None = None,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    inline: bool = False,
-												Upgrade remaining sources to Python 3.9

This PR is a follow up to #18881.

The changes were generated by adding

    from __future__ import annotations

to the files which did not already have them and running

    pyupgrade --py39-plus --keep-percent-format {jax,tests,jaxlib,examples,benchmarks}/**/*.py

											
										
										
											2023-12-11 13:59:29 +00:00
+								    abstracted_axes: Any | None = None,
-												Make eval_shape a wrapper around `jax.jit(f).eval_shape(*args, **kwargs)`

PiperOrigin-RevId: 599724490

											
										
										
											2024-01-18 22:10:24 -08:00
+								) -> JitWrapped:
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  """Makes ``fun`` compiled and automatically partitioned across multiple devices.
-												Adds a note that pjit is equivalent to jit.

PiperOrigin-RevId: 535296532

											
										
										
											2023-05-25 10:13:50 -07:00
+								  NOTE: This function is now equivalent to jax.jit please use that instead.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  The returned function has semantics equivalent to those of ``fun``, but is
 								  compiled to an XLA computation that runs across multiple devices
 								  (e.g. multiple GPUs or multiple TPU cores). This can be useful if the jitted
 								  version of ``fun`` would not fit in a single device's memory, or to speed up
 								  ``fun`` by running each operation in parallel across multiple devices.
 								  The partitioning over devices happens automatically based on the
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  propagation of the input partitioning specified in ``in_shardings`` and
 								  the output partitioning specified in ``out_shardings``. The resources
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  specified in those two arguments must refer to mesh axes, as defined by
-												Replace uses of deprecated JAX sharding APIs with their new names in jax.sharding.

This change updates:
* {jax.experimental.maps.Mesh, jax.interpreters.pxla.Mesh} to jax.sharding.Mesh
* {jax.experimental.PartitionSpec, jax.experimental.pjit.PartitionSpec, jax.interpreters.pxla.PartitionSpec, jax.pxla.PartitionSpec} to jax.sharding.PartitionSpec
* jax.experimental.maps.NamedSharding to jax.sharding.NamedSharding.

PiperOrigin-RevId: 506994892

											
										
										
											2023-02-03 14:28:07 -08:00
+								  the :py:func:`jax.sharding.Mesh` context manager. Note that the mesh
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  definition at :func:`~pjit` application time is ignored, and the returned function
 								  will use the mesh definition available at each call site.
 								  Inputs to a :func:`~pjit`'d function will be automatically partitioned across devices
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  if they're not already correctly partitioned based on ``in_shardings``.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  In some scenarios, ensuring that the inputs are already correctly pre-partitioned
 								  can increase performance. For example, if passing the output of one
 								  :func:`~pjit`'d function to another :func:`~pjit`’d function (or the same
 								  :func:`~pjit`’d function in a loop), make sure the relevant
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  ``out_shardings`` match the corresponding ``in_shardings``.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  .. note::
 								    **Multi-process platforms:** On multi-process platforms such as TPU pods,
 								    :func:`~pjit` can be used to run computations across all available devices across
 								    processes. To achieve this, :func:`~pjit` is designed to be used in SPMD Python
 								    programs, where every process is running the same Python code such that all
 								    processes run the same :func:`~pjit`'d function in the same order.
 								    When running in this configuration, the mesh should contain devices across
-												Update the multi-process note in pjit's docstring

PiperOrigin-RevId: 632160561

											
										
										
											2024-05-09 08:37:43 -07:00
+								    all processes. All inputs arguments must be globally shaped.
 								    ``fun`` will still be executed across *all* devices in the mesh,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    including those from other processes, and will be given a global view of the
-												Update the multi-process note in pjit's docstring

PiperOrigin-RevId: 632160561

											
										
										
											2024-05-09 08:37:43 -07:00
+								    data spread across multiple processes as a single array.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								    The SPMD model also requires that the same multi-process :func:`~pjit`'d
 								    functions must be run in the same order on all processes, but they can be
 								    interspersed with arbitrary operations running in a single process.
 								  Args:
 								    fun: Function to be compiled. Should be a pure function, as side-effects may
 								      only be executed once. Its arguments and return value should be arrays,
 								      scalars, or (nested) standard Python containers (tuple/list/dict) thereof.
 								      Positional arguments indicated by ``static_argnums`` can be anything at
 								      all, provided they are hashable and have an equality operation defined.
 								      Static arguments are included as part of a compilation cache key, which is
 								      why hash and equality operators must be defined.
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    in_shardings: Pytree of structure matching that of arguments to ``fun``,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      with all actual arguments replaced by resource assignment specifications.
 								      It is also valid to specify a pytree prefix (e.g. one value in place of a
 								      whole subtree), in which case the leaves get broadcast to all values in
 								      that subtree.
-												Improve the error message raised from jax.jit if Pspec or None is passed

PiperOrigin-RevId: 522377813

											
										
										
											2023-04-06 10:49:57 -07:00
+								      The ``in_shardings`` argument is optional. JAX will infer the shardings
 								      from the input :py:class:`jax.Array`'s, and defaults to replicating the input
 								      if the sharding cannot be inferred.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      The valid resource assignment specifications are:
-												fix formatting in pjit doc

											
										
										
											2023-04-07 09:35:51 -07:00
-												Deprecate `XLACompatibleSharding` in favor of `jax.sharding.Sharding`.

PiperOrigin-RevId: 640544939

											
										
										
											2024-06-05 09:06:36 -07:00
+								      - :py:class:`Sharding`, which will decide how the value
-												fix formatting in pjit doc

											
										
										
											2023-04-07 09:35:51 -07:00
+								        will be partitioned. With this, using a mesh context manager is not
 								        required.
-												Allow None to be passed to in_shardings and out_shardings. The default is still UNSPECIFIED to handle edge cases around the old semantics where None is treated as fully replicated.

The semantics are as follow:

* if the mesh context manager is not provided, None will be treated as UNSPECIFIED for both in_shardings and out_shardings

* If the mesh context manager is provided, None will be treated as fully replicated as per the old semantics.

This will make sure that we don't break existing code depending on None meaning replicated but also start making the transition to None meaning UNSPECIFIED for jit and pjit.

PiperOrigin-RevId: 540705660

											
										
										
											2023-06-15 15:21:36 -07:00
+								      - :py:obj:`None` is a special case whose semantics are:
-												Fix the docs build

											
										
										
											2023-06-16 13:14:38 -07:00
+								          - if the mesh context manager is *not* provided, JAX has the freedom to
 								            choose whatever sharding it wants.
 								            For in_shardings, JAX will mark is as replicated but this behavior
 								            can change in the future.
 								            For out_shardings, we will rely on the XLA GSPMD partitioner to
 								            determine the output shardings.
 								          - If the mesh context manager is provided, None will imply that the
 								            value will be replicated on all devices of the mesh.
-												fix formatting in pjit doc

											
										
										
											2023-04-07 09:35:51 -07:00
+								      - For backwards compatibility, in_shardings still supports ingesting
-												Allow None to be passed to in_shardings and out_shardings. The default is still UNSPECIFIED to handle edge cases around the old semantics where None is treated as fully replicated.

The semantics are as follow:

* if the mesh context manager is not provided, None will be treated as UNSPECIFIED for both in_shardings and out_shardings

* If the mesh context manager is provided, None will be treated as fully replicated as per the old semantics.

This will make sure that we don't break existing code depending on None meaning replicated but also start making the transition to None meaning UNSPECIFIED for jit and pjit.

PiperOrigin-RevId: 540705660

											
										
										
											2023-06-15 15:21:36 -07:00
+								        :py:class:`PartitionSpec`. This option can *only* be used with the
 								        mesh context manager.
-												Fix the docs build

											
										
										
											2023-06-16 13:14:38 -07:00
-												fix formatting in pjit doc

											
										
										
											2023-04-07 09:35:51 -07:00
+								        - :py:class:`PartitionSpec`, a tuple of length at most equal to the rank
 								          of the partitioned value. Each element can be a :py:obj:`None`, a mesh
 								          axis or a tuple of mesh axes, and specifies the set of resources assigned
 								          to partition the value's dimension matching its position in the spec.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								      The size of every dimension has to be a multiple of the total number of
 								      resources assigned to it.
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								    out_shardings: Like ``in_shardings``, but specifies resource
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      assignment for function outputs.
-												fix formatting in pjit doc

											
										
										
											2023-04-07 09:35:51 -07:00
+								      The ``out_shardings`` argument is optional. If not specified, :py:func:`jax.jit`
 								      will use GSPMD's sharding propagation to determine how to shard the outputs.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    static_argnums: An optional int or collection of ints that specify which
 								      positional arguments to treat as static (compile-time constant).
 								      Operations that only depend on static arguments will be constant-folded in
 								      Python (during tracing), and so the corresponding argument values can be
 								      any Python object.
 								      Static arguments should be hashable, meaning both ``__hash__`` and
 								      ``__eq__`` are implemented, and immutable. Calling the jitted function
 								      with different values for these constants will trigger recompilation.
 								      Arguments that are not arrays or containers thereof must be marked as
 								      static.
 								      If ``static_argnums`` is not provided, no arguments are treated as static.
 								    static_argnames: An optional string or collection of strings specifying
 								      which named arguments to treat as static (compile-time constant). See the
 								      comment on ``static_argnums`` for details. If not
 								      provided but ``static_argnums`` is set, the default is based on calling
 								      ``inspect.signature(fun)`` to find corresponding named arguments.
-												jax.jit now works correctly if both donate_argnums and donate_argnames are specified.

Update the docstring and changelog too to mention `donate_argnames`.

PiperOrigin-RevId: 548223395

											
										
										
											2023-07-14 14:27:29 -07:00
+								    donate_argnums: Specify which positional argument buffers are "donated" to
 								      the computation. It is safe to donate argument buffers if you no longer
 								      need them once the computation has finished. In some cases XLA can make
 								      use of donated buffers to reduce the amount of memory needed to perform a
 								      computation, for example recycling one of your input buffers to store a
 								      result. You should not reuse buffers that you donate to a computation, JAX
 								      will raise an error if you try to. By default, no argument buffers are
 								      donated.
 								      If neither ``donate_argnums`` nor ``donate_argnames`` is provided, no
 								      arguments are donated. If ``donate_argnums`` is not provided but
 								      ``donate_argnames`` is, or vice versa, JAX uses
 								      :code:`inspect.signature(fun)` to find any positional arguments that
 								      correspond to ``donate_argnames``
 								      (or vice versa). If both ``donate_argnums`` and ``donate_argnames`` are
 								      provided, ``inspect.signature`` is not used, and only actual
 								      parameters listed in either ``donate_argnums`` or ``donate_argnames`` will
 								      be donated.
 								      For more details on buffer donation see the
 								      `FAQ <https://jax.readthedocs.io/en/latest/faq.html#buffer-donation>`_.
-												Add donate_argnames to jax.jit. This works similarly to static_argnames.

Note that if donate_argnames is not None and donate_argnums is None, then JAX will infer donate_argnums from the names which will then we used to find the donation_vector. This is fine because currently, the same thing happens from static_argnums and static_argnames.

I'll fix the TODOs, etc in follow up CLs.

Fixes https://github.com/google/jax/issues/10539

PiperOrigin-RevId: 547612861

											
										
										
											2023-07-12 15:09:18 -07:00
+								    donate_argnames: An optional string or collection of strings specifying
 								      which named arguments are donated to the computation. See the
 								      comment on ``donate_argnums`` for details. If not
 								      provided but ``donate_argnums`` is set, the default is based on calling
 								      ``inspect.signature(fun)`` to find corresponding named arguments.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    keep_unused: If `False` (the default), arguments that JAX determines to be
 								      unused by `fun` *may* be dropped from resulting compiled XLA executables.
 								      Such arguments will not be transferred to the device nor provided to the
 								      underlying executable. If `True`, unused arguments will not be pruned.
 								    device: This argument is deprecated. Please put your arguments on the
 								      device you want before passing them to jit.
 								      Optional, the Device the jitted function will run on. (Available devices
 								      can be retrieved via :py:func:`jax.devices`.) The default is inherited
 								      from XLA's DeviceAssignment logic and is usually to use
 								      ``jax.devices()[0]``.
 								    backend: This argument is deprecated. Please put your arguments on the
 								      backend you want before passing them to jit.
 								      Optional, a string representing the XLA backend: ``'cpu'``, ``'gpu'``, or
 								      ``'tpu'``.
-												fix formatting in pjit doc

											
										
										
											2023-04-07 09:35:51 -07:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  Returns:
 								    A wrapped version of ``fun``, set up for just-in-time compilation and
 								    automatically partitioned by the mesh available at each call site.
 								  For example, a convolution operator can be automatically partitioned over
 								  an arbitrary set of devices by a single :func:`~pjit` application:
 								  >>> import jax
 								  >>> import jax.numpy as jnp
 								  >>> import numpy as np
-												Move functions into `api_util.py` and `dispatch.py` to remove circular import error when pjit is imported in `api.py` for merging the `jit` and `pjit` frontend API.

PiperOrigin-RevId: 497172760

											
										
										
											2022-12-22 08:40:36 -08:00
+								  >>> from jax.sharding import Mesh, PartitionSpec
 								  >>> from jax.experimental.pjit import pjit
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  >>>
 								  >>> x = jnp.arange(8, dtype=jnp.float32)
 								  >>> f = pjit(lambda x: jax.numpy.convolve(x, jnp.asarray([0.5, 1.0, 0.5]), 'same'),
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								  ...         in_shardings=None, out_shardings=PartitionSpec('devices'))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  >>> with Mesh(np.array(jax.devices()), ('devices',)):
 								  ...   print(f(x))  # doctest: +SKIP
 								  [ 0.5  2.   4.   6.   8.  10.  12.  10. ]
 								  """
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								  return make_jit(
-												Add donate_argnames to jax.jit. This works similarly to static_argnames.

Note that if donate_argnames is not None and donate_argnums is None, then JAX will infer donate_argnums from the names which will then we used to find the donation_vector. This is fine because currently, the same thing happens from static_argnums and static_argnames.

I'll fix the TODOs, etc in follow up CLs.

Fixes https://github.com/google/jax/issues/10539

PiperOrigin-RevId: 547612861

											
										
										
											2023-07-12 15:09:18 -07:00
+								       fun, in_shardings, out_shardings, donate_argnums, donate_argnames,
-												Refactorings to the jit implementation.

Notably:
* We can share more code between jit/pjit. There's no significant difference between the two, other than the handling of the resource environment, so we can share more of the code.
* Rather than having an infer_params callback, we can just teach common_infer_params (now named _infer_params) to handle the resource environment, which is the only meaningful difference. common_infer_params already had to understand the two cases, so there's no reason we need to hoist part of that logic into a callback.
* If we slightly alter the role of PjitInfo so it contains only the things we know about a jit() or can deduce from its arguments, we can construct it ahead of time. This does require that we split out a couple of things that we cannot deduce at that time, namely the resource environment and the two layout parameters into separate arguments, but the result reads more cleanly to me.

No functional changes intended, this is just to improve readability.

PiperOrigin-RevId: 617812557

											
										
										
											2024-03-21 05:35:44 -07:00
+								       static_argnums, static_argnames, device, backend, abstracted_axes,
 								       keep_unused, inline, use_resource_env=True)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def hashable_pytree(pytree):
 								  vals, treedef = tree_flatten(pytree)
 								  vals = tuple(vals)
 								  return HashableFunction(lambda: tree_unflatten(treedef, vals),
 								                          closure=(treedef, vals))
-												Improve the error raised when wsc is passed a PartitionSpec without a mesh context manager

PiperOrigin-RevId: 529260748

											
										
										
											2023-05-03 19:28:54 -07:00
+								def _create_sharding_for_array(mesh, x, name, api_name):
-												Allow None to be passed to in_shardings and out_shardings. The default is still UNSPECIFIED to handle edge cases around the old semantics where None is treated as fully replicated.

The semantics are as follow:

* if the mesh context manager is not provided, None will be treated as UNSPECIFIED for both in_shardings and out_shardings

* If the mesh context manager is provided, None will be treated as fully replicated as per the old semantics.

This will make sure that we don't break existing code depending on None meaning replicated but also start making the transition to None meaning UNSPECIFIED for jit and pjit.

PiperOrigin-RevId: 540705660

											
										
										
											2023-06-15 15:21:36 -07:00
+								  if x is None and (mesh is None or mesh.empty):
 								    return UNSPECIFIED
-												Deprecate `XLACompatibleSharding` in favor of `jax.sharding.Sharding`.

PiperOrigin-RevId: 640544939

											
										
										
											2024-06-05 09:06:36 -07:00
+								  if isinstance(x, sharding.Sharding) or is_unspecified_or_auto(x):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return x
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  if mesh is None:
-												Deprecate `XLACompatibleSharding` in favor of `jax.sharding.Sharding`.

PiperOrigin-RevId: 640544939

											
										
										
											2024-06-05 09:06:36 -07:00
+								    msg = ('jax.jit only supports `Sharding`s being passed to'
-												Improve the error message raised from jax.jit if Pspec or None is passed

PiperOrigin-RevId: 522377813

											
										
										
											2023-04-06 10:49:57 -07:00
+								           f' {name}. Looks like you are passing either `PartitionSpec` or `None`'
 								           f' which is not allowed in jax.jit.\n')
 								    if name == 'in_shardings':
 								      msg += (f'Note that {name} argument is optional. JAX will infer the shardings'
 								              " from the input jax.Array's and will default to replicating the"
 								              ' input if the sharding cannot be inferred.')
 								    elif name == 'out_shardings':
 								      msg += (f'Note that {name} is optional. If not specified, jax.jit will'
 								              " use GSPMD's sharding propagation to figure out what the sharding"
 								              ' of the output(s) should be.')
 								    raise RuntimeError(msg)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  if mesh.empty:
-												Improve the empty mesh error message raised in pjit if mesh is not used and Pspec is passed to in|out_shardings

PiperOrigin-RevId: 517495400

											
										
										
											2023-03-17 13:33:45 -07:00
+								    raise RuntimeError(
-												Improve the error raised when wsc is passed a PartitionSpec without a mesh context manager

PiperOrigin-RevId: 529260748

											
										
										
											2023-05-03 19:28:54 -07:00
+								        f'{api_name} requires a non-empty mesh if you are passing'
 								        f' `PartitionSpec`s or `None` to {name}! Is a mesh defined at the call'
-												Deprecate `XLACompatibleSharding` in favor of `jax.sharding.Sharding`.

PiperOrigin-RevId: 640544939

											
										
										
											2024-06-05 09:06:36 -07:00
+								        f' site? Alternatively, provide `Sharding`s to {name} and'
-												Improve the error raised when wsc is passed a PartitionSpec without a mesh context manager

PiperOrigin-RevId: 529260748

											
										
										
											2023-05-03 19:28:54 -07:00
+								        ' then the mesh context manager is not required.')
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  # A nice user error is raised in prepare_axis_resources.
-												Allow None to be passed to in_shardings and out_shardings. The default is still UNSPECIFIED to handle edge cases around the old semantics where None is treated as fully replicated.

The semantics are as follow:

* if the mesh context manager is not provided, None will be treated as UNSPECIFIED for both in_shardings and out_shardings

* If the mesh context manager is provided, None will be treated as fully replicated as per the old semantics.

This will make sure that we don't break existing code depending on None meaning replicated but also start making the transition to None meaning UNSPECIFIED for jit and pjit.

PiperOrigin-RevId: 540705660

											
										
										
											2023-06-15 15:21:36 -07:00
+								  assert x is None or isinstance(x, ParsedPartitionSpec), x
-												Cleanup `ParsedPartitionSpec` and remove `CanonicalizedParsedPartitionSpec`. Also mark `user_spec` as private.

PiperOrigin-RevId: 676498946

											
										
										
											2024-09-19 11:38:01 -07:00
+								  return (pxla.create_mesh_pspec_sharding(mesh, x) if x is None else
 								          pxla.create_mesh_pspec_sharding(mesh, x.get_partition_spec(), x))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def _create_sharding_with_device_backend(device, backend):
 								  if device is not None:
 								    assert backend is None
 								    out = SingleDeviceSharding(device)
 								  elif backend is not None:
 								    assert device is None
-												Remove use of get_default_device_assignment().

This is the only caller of this API in JAX, and it can be simplified.

Change in preparation for removing get_default_device_assignment() from the Python bindings.

PiperOrigin-RevId: 563770199

											
										
										
											2023-09-08 09:17:53 -07:00
+								    out = SingleDeviceSharding(xb.get_backend(backend).local_devices()[0])
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								  else:
 								    raise AssertionError('Unreachable!')
 								  out._device_backend = True
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return out
 								def flatten_axis_resources(what, tree, shardings, tupled_args):
 								  try:
 								    return tuple(flatten_axes(what, tree, shardings, tupled_args=tupled_args))
 								  except ValueError:
 								    pass  # Raise a tree prefix error below
 								  # Tree leaves are always valid prefixes, so if there was a prefix error as
 								  # assumed here, axis_resources must not be a leaf.
 								  assert not treedef_is_leaf(tree_structure(shardings))
 								  # Check the type directly rather than using isinstance because of namedtuples.
 								  if tupled_args and (type(shardings) is not tuple or
 								                      len(shardings) != len(tree.children())):
 								    # We know axis_resources is meant to be a tuple corresponding to the args
 								    # tuple, but while it is a non-leaf pytree, either it wasn't a tuple or it
 								    # wasn't the right length.
 								    msg = (f"{what} specification must be a tree prefix of the positional "
 								           f"arguments tuple passed to the `pjit`-decorated function. In "
 								           f"particular, {what} must either be a None, a PartitionSpec, or "
 								           f"a tuple of length equal to the number of positional arguments.")
 								    # If `tree` represents an args tuple, then `axis_resources` must be a tuple.
 								    # TODO(mattjj,apaszke): disable implicit list casts, remove 'or list' below
 								    if type(shardings) is not tuple:
 								      msg += f" But {what} is not a tuple: got {type(shardings)} instead."
 								    elif len(shardings) != len(tree.children()):
 								      msg += (f" But {what} is the wrong length: got a tuple or list of length "
 								              f"{len(shardings)} for an args tuple of length "
 								              f"{len(tree.children())}.")
 								    # As an extra hint, let's check if the user just forgot to wrap
 								    # shardings in a singleton tuple.
 								    if len(tree.children()) == 1:
 								      try: flatten_axes(what, tree, (shardings,))
 								      except ValueError: pass  # That's not the issue.
 								      else:
 								        msg += (f" Given the corresponding argument being "
 								                f"passed, it looks like {what} might need to be wrapped in "
 								                f"a singleton tuple.")
 								    raise ValueError(msg)
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  axis_tree = shardings
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[jax2tf] Fix grad of pjit in native lowering.

Since jax2tf.convert is called recursively for the purpose of
serializing the vjp function, we must ensure that if the primal
function is a pjit with shardings then the vjp function must also
be converted as a pjit.

Without this fix the serialization with gradients of a pjit function
will fail the an error that there are shardings but not pjit at
the top-level.

											
										
										
											2023-03-21 06:01:10 +01:00
+								  # Because we only have the `tree` treedef and not the full pytree here,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  # we construct a dummy tree to compare against. Revise this in callers?
 								  dummy_tree = tree_unflatten(tree, [PytreeLeaf()] * tree.num_leaves)
 								  errors = prefix_errors(axis_tree, dummy_tree)
 								  if errors:
 								    e = errors[0]  # Only show information about the first disagreement found.
 								    raise e(what)
 								  # At this point we've failed to find a tree prefix error.
 								  assert False, "Please open a bug report!"  # This should be unreachable.
 								class PytreeLeaf:
 								  def __repr__(self): return "pytree leaf"
-												Add `util.cache` to `jax.clear_caches` and move pjit, sharding, array, etc uses of `functools.lru_cache` to `util.cache` so that those caches will be cleared if `jax.clear_caches` is called.

PiperOrigin-RevId: 642359226

											
										
										
											2024-06-11 12:46:11 -07:00
+								@util.cache(max_size=4096, trace_context_in_key=False)
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								def _process_in_axis_resources(in_shardings_treedef, in_shardings_leaves,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                               in_layouts_treedef, in_layouts_leaves,
 								                               in_avals, in_tree, debug_info,
-												Always flatten args and kwargs together i.e. `tree_flatten((args, kwargs))` so that we have a uniform in_tree structure everywhere.

Leads to a code cleanup and more standardization in jit.

PiperOrigin-RevId: 592388438

											
										
										
											2023-12-19 17:31:25 -08:00
+								                               device_or_backend_set, kws):
 								  if not kws:
 								    in_tree, _ = treedef_children(in_tree)
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								  orig_in_shardings = tree_unflatten(in_shardings_treedef, in_shardings_leaves)
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								  # Only do this if original in_shardings are unspecified. If it is AUTO, go
 								  # via flatten_axis_resources.
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if is_unspecified(orig_in_shardings):
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								    in_shardings_flat = (orig_in_shardings,) * len(in_avals)
-												Don't depend on `flatten_axis_resources` which will error because `flatten_axes` passes a dummy `object()` which doesn't work with checks in user pytrees.

Only do this if the original {in|out}_shardings are _UNSPECIFIED.

PiperOrigin-RevId: 502792305

											
										
										
											2023-01-18 00:12:25 -08:00
+								  else:
 								    in_shardings_flat = flatten_axis_resources(
-												Add a private API to allow setting layouts on jitted computations.

We expose 3 modes:

* `SpecifiedLayout`: User specifies the `minor_to_major` field of the layout. Tiling not exposed yet.

* `DefaultLayout`: PJRT chooses the layout. It defaults to the current behavior.

* `AUTO`: Compiler chooses the layout. This field is not a layout per se. It's a request to get the layout from the compiler. This field cannot be on an Array or other data types. It can only be on jit.

Public API coming soon.

Co-authored-by: Roy Frostig <frostig@google.com>
PiperOrigin-RevId: 582692036

											
										
										
											2023-11-15 08:48:17 -08:00
+								        "pjit in_shardings", in_tree, orig_in_shardings, tupled_args=True)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  in_layouts = tree_unflatten(in_layouts_treedef, in_layouts_leaves)
-												Add a private API to allow setting layouts on jitted computations.

We expose 3 modes:

* `SpecifiedLayout`: User specifies the `minor_to_major` field of the layout. Tiling not exposed yet.

* `DefaultLayout`: PJRT chooses the layout. It defaults to the current behavior.

* `AUTO`: Compiler chooses the layout. This field is not a layout per se. It's a request to get the layout from the compiler. This field cannot be on an Array or other data types. It can only be on jit.

Public API coming soon.

Co-authored-by: Roy Frostig <frostig@google.com>
PiperOrigin-RevId: 582692036

											
										
										
											2023-11-15 08:48:17 -08:00
+								  if in_layouts is None:
 								    in_layouts_flat = (in_layouts,) * len(in_avals)
 								  else:
 								    in_layouts_flat = flatten_axis_resources(
 								        "pjit in_layouts", in_tree, in_layouts, tupled_args=True)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												[attrs] allow passing a jax-attrs object to jit functions

currently we don't get any interesting cache hits; only on object identity
match

											
										
										
											2024-02-13 16:45:27 -08:00
+								  # TODO(dougalm,mattjj): enable debug info with attrs_tracked
 								  attrs_tracked = debug_info and len(debug_info.arg_names) != len(in_avals)
 								  if not config.dynamic_shapes.value and not attrs_tracked:
-												Deprecate FROM_GDA and remove its support from pjit's code since jax.Array inside pjit has sharding inference capabilities by default.

PiperOrigin-RevId: 520067392

											
										
										
											2023-03-28 10:29:01 -07:00
+								    pjit_check_aval_sharding(in_shardings_flat, in_avals,
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								                             None if debug_info is None else debug_info.arg_names,
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								                             "pjit arguments", allow_uneven_sharding=False)
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								    check_aval_layout_compatibility(
 								        in_layouts_flat, in_avals,
 								        None if debug_info is None else debug_info.arg_names, "jit arguments")
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  return in_shardings_flat, in_layouts_flat
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								callsites: set[str] = set()
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								def explain_tracing_cache_miss(
-												``debug_info`` no longer requires non-None ``func_src_info``

I suspect in the past lack of source info meant that the function also has
no signature, but this is no longer the case.

I also removed an unused parameter from ``explain_tracing_cache_miss`` as
a drive by change.

This is a follow up to #22269.

											
										
										
											2024-07-05 09:51:02 +01:00
+								    f: Callable, unseen_f: bool, cache: dict, key: tuple):
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								  if config.check_tracer_leaks.value: return
 								  def unpack(key):
-												fix weak key cache stuff

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-05-29 04:03:02 +00:00
+								    transforms, (), _, (in_type, _, debug_info, _, inline), *_, ctx = key
-												[attrs] allow passing a jax-attrs object to jit functions

currently we don't get any interesting cache hits; only on object identity
match

											
										
										
											2024-02-13 16:45:27 -08:00
+								    # TODO(dougalm,mattjj): enable cache miss explanation with attrs
-												Avoid jax_explain_cache_misses unpacking error.

PiperOrigin-RevId: 618931412

											
										
										
											2024-03-25 12:54:06 -07:00
+								    _, (_, (in_tree,)), *_ = transforms
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								    return in_tree, in_type, debug_info, inline.val, ctx
 								  in_tree, in_type, debug_info, inline, ctx = unpack(key)
 								  if inline: return
 								  msg: list[str] = []
 								  p = msg.append
 								  done = lambda: logger.log(logging.WARNING, '\n'.join(msg))
 								  callsite = source_info_util.summarize(source_info_util.current())
 								  p(f"TRACING CACHE MISS at {callsite} because:")
 								  # have we seen this function before at all?
 								  fun_name = getattr(f, '__qualname__', f)
-												Handle missing ``debug_info`` in ``explain_tracing_cache_miss``

											
										
										
											2024-07-04 09:37:18 +01:00
+								  if debug_info is not None and debug_info.func_src_info:
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								    _, _, *rest = debug_info.func_src_info.split(' ')
 								    src_info = " defined at "  + ' '.join(rest)
 								  else:
 								    src_info = ''
 								  if unseen_f:
 								    p(f"  never seen function:\n    {fun_name} id={id(f)}{src_info}")
 								    if callsite in callsites:
 								      p("  but seen another function defined on the same line; maybe the function is\n"
 								        "  being re-defined repeatedly, preventing caching?")
 								    callsites.add(callsite)
 								    return done()
 								  else:
 								    p(f"  for {fun_name}{src_info}")
 								  seen_keys = map(unpack, cache.keys())
 								  # have we maybe switched some args to be kwargs or visa-versa?
 								  args_tree, kwargs_tree = treedef_children(in_tree)
 								  args_kwargs_trees = [treedef_children(k) for k, *_ in seen_keys]
 								  args_kwargs_match = [t for t in args_kwargs_trees
 								                       if t == [args_tree, kwargs_tree]]
 								  if not args_kwargs_match:
 								    num_args = len(treedef_children(args_tree))
 								    _, kwarg_keys = kwargs_tree.node_data()  # type: ignore
 								    p(f"  never seen passing {num_args} positional args and {len(kwarg_keys)} "
 								      "keyword args with keys:\n"
 								      f"    {', '.join(map(repr, kwarg_keys))}")
 								    dont_match = [set(t[1].node_data()[1]) for t in args_kwargs_trees  # type: ignore
 								                  if t != [args_tree, kwargs_tree]]
-												Avoid "min() arg is an empty sequence" error after enabling "jax_explain_cache_misses".

PiperOrigin-RevId: 641381432

											
										
										
											2024-06-07 15:51:52 -07:00
+								    close_kwargs = min(
 								        dont_match, key=set(kwarg_keys).symmetric_difference, default=None
 								    )
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								    if not close_kwargs:
 								      p("  closest seen is passing no keyword args")
 								    else:
 								      p(f"  closest seen passes {len(close_kwargs)} keyword args with keys:\n"
 								        f"    {', '.join(map(repr, close_kwargs))}")
 								    return done()
 								  # have we never seen this tracing context before?
 								  ctxs_match = [c for *_, c in seen_keys if c == ctx]
 								  if not ctxs_match:
 								    p("  tracing context doesn't match, e.g. due to config or context manager")
 								    dont_match = [c for *_, c in seen_keys if c != ctx]
 								    closest_ctx = min(dont_match, key=lambda c: sum(map(op.ne, c, ctx)))
 								    idxs = [i for i, (c1, c2) in enumerate(zip(ctx, closest_ctx)) if c1 != c2]
 								    p("  closest seen context tuple differs at positions:\n"
 								      f"    {', '.join(map(str, idxs))}\n"
 								      "  compare to tuple returned by config._trace_context() in jax/_src/config.py.")
 								    return done()
 								  # have we never seen this input pytree before?
 								  trees_match = [k for k in seen_keys if k[0] == in_tree]
 								  if not trees_match:
 								    in_tree_str = f':\n    {in_tree}' if len(str(in_tree)) < 76 else ''
 								    p(f"  never seen input pytree{in_tree_str}")
 								    dont_match = [t for t, *_ in seen_keys if t != in_tree]
 								    closest_tree = min(dont_match, key=lambda t: abs(t.num_leaves - in_tree.num_leaves))
-												[pallas] Improve some error messages and add API tests.

We make the following improvements:

  * pytree structural disequality messages now attempt to localize the
    mismatch using tree_util.KeyPath.
  * we generate a simpler error message for when `in_specs` is not
    a sequence, instead of the current PyTreeDef mismatch error.
  * we generate an error message for when the index map function
    in a BlockSpec returns an unexpected number of results.
  * added error localization to the existing shape polymorphism
    check that the block shapes are static.
  * We check that the kernel function returns None. Without this
    we used to get `body_fun output and input must have same type structure`
    in the interpreter, `assert len(jaxpr.outvars) == 0` on GPU,
    and `INTERNAL: Mosaic failed to compile TPU kernel: has 1 operands, but enclosing function (@main) returns 0`
    on TPU.
  * we check that the rank of the block_shape matches the rank of
    the overall array. Without this we used to get a `safe_zip`
    error. We also carry the pytree paths to localize the error.

To simplify the generation of the error messages we added a helper
function `tree_util.equality_errors_pytreedef`, which is just like
`tree_util.equality_errors` but takes `PyTreeDef` inputs rather than
PyTrees. We then used this new helper function in `pjit.py` and `stages.py`.

											
										
										
											2024-07-02 00:40:13 -07:00
+								    errs = list(tree_util.equality_errors_pytreedef(in_tree, closest_tree))  # type: ignore[arg-type]
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								    p(f"  closest seen input pytree has {len(errs)} mismatches, including:")
 								    for path, thing1, thing2, explanation in errs:
 								      fst, *path = path  # type: ignore
 								      base = ['args', 'kwargs'][fst.idx]
-												Merge pull request #21273 from superbobry:mypy-ruff

PiperOrigin-RevId: 636146344

											
										
										
											2024-05-22 06:35:38 -07:00
+								      p(f"    * at {base}{keystr(tuple(path))}, seen {thing2} but now given {thing1},"
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								        f"      so {explanation}")
 								    return done()
 								  # have we never seen these input types (eg shapes, dtypes) before?
 								  types_match = [k for k in trees_match if k[1] == in_type]
 								  if not types_match:
 								    if len(in_type) < 5:
 								      in_type_str = ':\n    {}'.format(',  '.join(
 								          f'{n}: {ty.str_short(short_dtypes=True)}'
 								          for n, ty in zip(debug_info.arg_names, in_type)))
 								    else:
 								      in_type_str = ''
 								    p(f"  never seen input type signature{in_type_str}")
 								    dont_match = [t for _, t, *_ in trees_match if t != in_type]
 								    closest_ty = min(dont_match, key=lambda t: sum(map(op.ne, t, in_type)))
 								    num_mismatch = sum(map(op.ne, closest_ty, in_type))
 								    p(f"  closest seen input type signature has {num_mismatch} mismatches, including:")
 								    add_weak_type_hint = False
 								    for name, ty1, ty2 in zip(debug_info.arg_names, closest_ty, in_type):
 								      if ty1 != ty2:
 								        if type(ty1) == type(ty2) == core.ShapedArray:
 								          s1, s2 = ty1.str_short(True), ty2.str_short(True)
 								          if s1 == s2:  # weak types don't show up in str_short()
 								            assert ty1.weak_type ^ ty2.weak_type
 								            s1 += f'{{weak_type={ty1.weak_type}}}'
 								            s2 += f'{{weak_type={ty2.weak_type}}}'
 								            add_weak_type_hint = True
 								        else:
 								          s1, s2 = str(ty1), str(ty2)
 								        p(f"    * at {name}, seen {s1}, but now given {s2}")
 								    if add_weak_type_hint:
 								      p('where weak_type=True often means a Python builtin numeric value, and ')
 								      p('weak_type=False means a jax.Array.')
 								      p('See https://jax.readthedocs.io/en/latest/type_promotion.html#weak-types')
 								    return done()
 								  # we think this is unreachable...
-												Update references to the GitHub url in JAX codebase to reflect move from google/jax to jax-ml/jax

PiperOrigin-RevId: 676843138

											
										
										
											2024-09-20 07:51:48 -07:00
+								  p("explanation unavailable! please open an issue at https://github.com/jax-ml/jax")
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								  return done()
 								@partial(lu.cache, explain=explain_tracing_cache_miss)
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								def _create_pjit_jaxpr(
 								    fun: lu.WrappedFun,
 								    in_type: core.InputType | Sequence[core.AbstractValue],
 								    attr_data: int,
 								    debug_info: lu.TracingDebugInfo,
 								    out_paths: Callable,
 								    ignored_inline: IgnoreKey
 								) -> tuple[core.ClosedJaxpr, list[Any], list[core.AbstractValue],
 								           list[tuple[PyTreeDef, PyTreeDef, tuple[Any, str]]]]:
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								  del ignored_inline  # just for explain_cache_miss
-												Remove the f-string evaluation during logging the elapsed time by passing in fun_name to log_elapsed_time

PiperOrigin-RevId: 532132574

											
										
										
											2023-05-15 09:15:22 -07:00
+								  with dispatch.log_elapsed_time(
-												Show elapsed time in nanoseconds

											
										
										
											2024-07-25 22:20:25 +03:00
+								      "Finished tracing + transforming {fun_name} for pjit in {elapsed_time:.9f} sec",
-												Remove the f-string evaluation during logging the elapsed time by passing in fun_name to log_elapsed_time

PiperOrigin-RevId: 532132574

											
										
										
											2023-05-15 09:15:22 -07:00
+								      fun_name=fun.__name__, event=dispatch.JAXPR_TRACE_EVENT):
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								    pe_debug = debug_info and pe.debug_info_final(fun, debug_info.traced_for)
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								    if config.dynamic_shapes.value:
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								      jaxpr, global_out_avals, consts = pe.trace_to_jaxpr_dynamic2(
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								          lu.annotate(fun, cast(core.InputType, in_type)), debug_info=pe_debug)
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								      attrs_tracked = []
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    else:
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								      jaxpr, global_out_avals, consts, attrs_tracked = pe.trace_to_jaxpr_dynamic(
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								          fun, in_type, debug_info=pe_debug)
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								      # assert attr_data is sentinel or attr_data matches attrs_tracked
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								  # TODO(dougalm,mattjj): enable debug info with attrs_tracked
 								  if not config.dynamic_shapes.value and not attrs_tracked:
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    jaxpr = jaxpr_debug_info(jaxpr, debug_info, out_paths())
-												Fix pjit's initial style usage of consts.

Instead of smuggling them via the jaxpr, pull it out and pass them with args. This is because consts can be tracers and that fails down the stack when lowering to mlir.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 500544141

											
										
										
											2023-01-08 10:37:40 -08:00
-												[key reuse] rename flag to jax_debug_key_reuse

											
										
										
											2024-03-21 10:47:16 -07:00
+								  if config.debug_key_reuse.value:
-												Add experimental static key reuse checking

											
										
										
											2023-12-11 12:03:48 -08:00
+								    # Import here to avoid circular imports
 								    from jax.experimental.key_reuse._core import check_key_reuse_jaxpr
 								    check_key_reuse_jaxpr(jaxpr)
-												Fix pjit's initial style usage of consts.

Instead of smuggling them via the jaxpr, pull it out and pass them with args. This is because consts can be tracers and that fails down the stack when lowering to mlir.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 500544141

											
										
										
											2023-01-08 10:37:40 -08:00
+								  if any(isinstance(c, core.Tracer) for c in consts):
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								    closed_jaxpr = pe.close_jaxpr(pe.convert_constvars_jaxpr(jaxpr))
-												Fix pjit's initial style usage of consts.

Instead of smuggling them via the jaxpr, pull it out and pass them with args. This is because consts can be tracers and that fails down the stack when lowering to mlir.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 500544141

											
										
										
											2023-01-08 10:37:40 -08:00
+								    final_consts = consts
 								  else:
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								    closed_jaxpr = core.ClosedJaxpr(jaxpr, consts)
-												Fix pjit's initial style usage of consts.

Instead of smuggling them via the jaxpr, pull it out and pass them with args. This is because consts can be tracers and that fails down the stack when lowering to mlir.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 500544141

											
										
										
											2023-01-08 10:37:40 -08:00
+								    final_consts = []
-												integrate attrs in jax.jit

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-01-25 22:20:36 -08:00
+								  return closed_jaxpr, final_consts, global_out_avals, attrs_tracked
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Add `util.cache` to `jax.clear_caches` and move pjit, sharding, array, etc uses of `functools.lru_cache` to `util.cache` so that those caches will be cleared if `jax.clear_caches` is called.

PiperOrigin-RevId: 642359226

											
										
										
											2024-06-11 12:46:11 -07:00
+								@util.cache(max_size=4096, trace_context_in_key=False)
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
+								def _check_and_canonicalize_out_shardings(
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								    out_shardings_treedef, out_shardings_leaves, out_layouts_treedef,
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								    out_layouts_leaves, out_tree, out_avals, debug_info, device_or_backend_set):
-												Don't tree_flatten in_shardings and out_shardings each time a jit() is traced.

Do it once when the jit is constructed.

(In general we do a bit too much switching back and forth between flattened and unflattened representations, and we'd probably do well just to keep things flattened.)

PiperOrigin-RevId: 617859205

											
										
										
											2024-03-21 08:59:28 -07:00
+								  orig_out_shardings = tree_unflatten(out_shardings_treedef, out_shardings_leaves)
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if (is_unspecified(orig_out_shardings) or
-												Deprecate `XLACompatibleSharding` in favor of `jax.sharding.Sharding`.

PiperOrigin-RevId: 640544939

											
										
										
											2024-06-05 09:06:36 -07:00
+								      isinstance(orig_out_shardings, sharding.Sharding)):
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								    out_shardings_flat = (orig_out_shardings,) * len(out_avals)
-												Don't depend on `flatten_axis_resources` which will error because `flatten_axes` passes a dummy `object()` which doesn't work with checks in user pytrees.

Only do this if the original {in|out}_shardings are _UNSPECIFIED.

PiperOrigin-RevId: 502792305

											
										
										
											2023-01-18 00:12:25 -08:00
+								  else:
 								    out_shardings_flat = flatten_axis_resources(
-												Add in_shardings and out_shardings argument to pjit and jit to start deprecating in_axis_resources and out_axis_resources.

PiperOrigin-RevId: 508934327

											
										
										
											2023-02-11 15:29:38 -08:00
+								        "pjit out_shardings", out_tree(), orig_out_shardings,
-												Don't depend on `flatten_axis_resources` which will error because `flatten_axes` passes a dummy `object()` which doesn't work with checks in user pytrees.

Only do this if the original {in|out}_shardings are _UNSPECIFIED.

PiperOrigin-RevId: 502792305

											
										
										
											2023-01-18 00:12:25 -08:00
+								        tupled_args=False)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  out_layouts = tree_unflatten(out_layouts_treedef, out_layouts_leaves)
-												Add a private API to allow setting layouts on jitted computations.

We expose 3 modes:

* `SpecifiedLayout`: User specifies the `minor_to_major` field of the layout. Tiling not exposed yet.

* `DefaultLayout`: PJRT chooses the layout. It defaults to the current behavior.

* `AUTO`: Compiler chooses the layout. This field is not a layout per se. It's a request to get the layout from the compiler. This field cannot be on an Array or other data types. It can only be on jit.

Public API coming soon.

Co-authored-by: Roy Frostig <frostig@google.com>
PiperOrigin-RevId: 582692036

											
										
										
											2023-11-15 08:48:17 -08:00
+								  if out_layouts is None:
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								    out_layouts_flat = (out_layouts,) * len(out_avals)
-												Add a private API to allow setting layouts on jitted computations.

We expose 3 modes:

* `SpecifiedLayout`: User specifies the `minor_to_major` field of the layout. Tiling not exposed yet.

* `DefaultLayout`: PJRT chooses the layout. It defaults to the current behavior.

* `AUTO`: Compiler chooses the layout. This field is not a layout per se. It's a request to get the layout from the compiler. This field cannot be on an Array or other data types. It can only be on jit.

Public API coming soon.

Co-authored-by: Roy Frostig <frostig@google.com>
PiperOrigin-RevId: 582692036

											
										
										
											2023-11-15 08:48:17 -08:00
+								  else:
 								    out_layouts_flat = flatten_axis_resources(
 								        "pjit out_layouts", out_tree(), out_layouts, tupled_args=False)
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								  if not config.dynamic_shapes.value:
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								    pjit_check_aval_sharding(
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								        out_shardings_flat, out_avals,
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								        None if debug_info is None else debug_info.result_paths,
 								        "pjit outputs", allow_uneven_sharding=False)
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								    check_aval_layout_compatibility(
 								        out_layouts_flat, out_avals,
 								        None if debug_info is None else debug_info.result_paths, "jit outputs")
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  return out_shardings_flat, out_layouts_flat
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Make the _pjit_jaxpr cache more by not depending on the out_shardings. So if out_shardings argument of pjit changes, it should affect the jaxpr created because jaxpr creation is not dependent on out_shardings.

PiperOrigin-RevId: 510488544

											
										
										
											2023-02-17 12:01:50 -08:00
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								AttrRecord = tuple[object, str, PyTreeDef, list[core.AbstractValue]]
-												fix weak key cache stuff

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-05-29 04:03:02 +00:00
+								_seen_attrs = weakref.WeakKeyDictionary()  # type: ignore
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								def seen_attrs_get(
 								    fun: lu.WrappedFun,
 								    in_type: core.InputType | tuple[core.AbstractValue, ...]
 								) -> list:
-												fix weak key cache stuff

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-05-29 04:03:02 +00:00
+								  cache = _seen_attrs.setdefault(fun.f, defaultdict(list))
 								  assert fun.in_type is None or fun.in_type == in_type
 								  return cache[(fun.transforms, fun.params, in_type)]
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
-												pjit.py cleanups.

Refactoring only, NFC intended.

* add types to more places.
* don't unpack PjitInfo positionally, since it's a 23-tuple and that seems rather error prone.
* change _infer_params to produce a new PjitParams NamedTuple, rather than having callers unpack a 9-tuple positionally.
* inline _pjit_jaxpr into its caller, since it only has one caller and the wrapper doesn't really clarify anything.
* note the return type of transformation_with_aux is a Callable.

PiperOrigin-RevId: 645068326

											
										
										
											2024-06-20 09:57:41 -07:00
+								def _attr_token(
 								    fun: lu.WrappedFun,
 								    in_type: core.InputType | tuple[core.AbstractValue, ...]
 								) -> int:
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								  from jax.experimental.attrs import jax_getattr
-												fix weak key cache stuff

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-05-29 04:03:02 +00:00
+								  cases = seen_attrs_get(fun, in_type)
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								  for i, records in enumerate(cases):
 								    for obj, attr, treedef, avals in records:
 								      val = jax_getattr(obj, attr)
 								      vals, treedef_ = tree_flatten(val)
 								      avals_ = map(shaped_abstractify, vals)
 								      if treedef != treedef_ or avals != avals_: break
 								    else:
 								      return i
 								  return len(cases)
 								def _attr_update(fun, in_type, i, attrs_tracked):
 								  from jax.experimental.attrs import jax_getattr
 								  leaves = lambda obj, attr: tree_leaves(jax_getattr(obj, attr))
 								  records = [(obj, attr, init_tree, map(shaped_abstractify, leaves(obj, attr)))
 								             for init_tree, _, (obj, attr) in attrs_tracked]
-												fix weak key cache stuff

Co-authored-by: Dougal Maclaurin <dougalm@google.com>

											
										
										
											2024-05-29 04:03:02 +00:00
+								  cases = seen_attrs_get(fun, in_type)
-												Make attrs work with pytrees

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-05-22 23:30:55 -04:00
+								  if i == len(cases):
 								    cases.append(records)
 								  else:
 								    assert i < len(cases) and cases[i] == records
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												add jax.explain_cache_misses tracing cache miss explanations

As part of making JAX's behavior more transparent, it must be clear not only
when code is slow because it's spending all its time missing caches (and hence
retracing/recompiling), but also _why_ it missed those caches. That is, just
knowing (from e.g. setting jax_log_compiles) that code is retracing a lot
doesn't tell the user what to do to fix things. But once the user knows that
the cache misses are due to changing dtypes, or due to jit being passed a new
callable object on every iteration of a loop, it's often clear what to do. And
JAX can provide that information

The main idea here is that pointing out which parts of the cache key differs
from previously-seen keys can constitute a pretty good explanation.

This PR adds an explanation mechanism. It can be enabled in a few different ways:
  * setting the `JAX_EXPLAIN_CACHE_MISSES` shell environment variable to something truthy;
  * setting the config option `jax.config.update('jax_explain_cache_misses', True)`;
  * using the context manager `jax._src.config.explain_cache_misses` context
    manager (not in public namespace yet);
  * when parsing command line flags with absl, using the
    `--jax_explain_cache_misses` flag.

Co-authored-by: Yash Katariya <yashkatariya@google.com>

											
										
										
											2023-06-09 14:43:42 -07:00
+								@dataclasses.dataclass(frozen=True)
 								class IgnoreKey:
 								  val: Any
 								  def __hash__(self):
 								    return hash(self.__class__)
 								  def __eq__(self, other):
 								    return isinstance(other, IgnoreKey)  # ignore self.val!
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								def pjit_check_aval_sharding(
-												Upgrade remaining sources to Python 3.9

This PR is a follow up to #18881.

The changes were generated by adding

    from __future__ import annotations

to the files which did not already have them and running

    pyupgrade --py39-plus --keep-percent-format {jax,tests,jaxlib,examples,benchmarks}/**/*.py

											
										
										
											2023-12-11 13:59:29 +00:00
+								    shardings, flat_avals, names: tuple[str, ...] | None,
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								    what_aval: str, allow_uneven_sharding: bool):
 								  new_names = [''] * len(shardings) if names is None else names
 								  for aval, s, name in zip(flat_avals, shardings, new_names):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    if is_unspecified_or_auto(s):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      continue
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								    name_str = f' with pytree key path {name}' if name else ''
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    shape = aval.shape
 								    try:
-												Rename `is_compatible_aval` to `check_compatible_aval` since it returns None and not a `bool`.

PiperOrigin-RevId: 638431968

											
										
										
											2024-05-29 15:28:14 -07:00
+								      # Sharding interfaces can implement `check_compatible_aval` as an optional
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      # method to raise a more meaningful error.
-												Rename `is_compatible_aval` to `check_compatible_aval` since it returns None and not a `bool`.

PiperOrigin-RevId: 638431968

											
										
										
											2024-05-29 15:28:14 -07:00
+								      if hasattr(s, 'check_compatible_aval'):
 								        s.check_compatible_aval(shape)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      else:
-												Use `_to_xla_hlo_sharding` everywhere in JAX. Remove `_to_xla_op_sharding` in favor of `_to_xla_hlo_sharding` since constructing a C++ class is faster than protos and will help with further changes coming to HloSharding.

PiperOrigin-RevId: 537969500

											
										
										
											2023-06-05 13:40:59 -07:00
+								        s._to_xla_hlo_sharding(len(shape))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    except ValueError as e:
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								      raise ValueError(
 								          f'One of {what_aval}{name_str} is incompatible with its sharding '
-												MAINT Do not use `str()` and `repr()` in f-string replacement fields

`str()` is called by default by the formatting machinery, and `repr()` only
needs `!r`.

											
										
										
											2023-10-23 15:11:15 +01:00
+								          f'annotation {s}: {e}')
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    # Use the `OpSharding` proto to find out how many ways each dimension of
 								    # the aval is sharded. This approach will work across all
-												Deprecate `XLACompatibleSharding` in favor of `jax.sharding.Sharding`.

PiperOrigin-RevId: 640544939

											
										
										
											2024-06-05 09:06:36 -07:00
+								    # Sharding.
-												Use `_to_xla_hlo_sharding` everywhere in JAX. Remove `_to_xla_op_sharding` in favor of `_to_xla_hlo_sharding` since constructing a C++ class is faster than protos and will help with further changes coming to HloSharding.

PiperOrigin-RevId: 537969500

											
										
										
											2023-06-05 13:40:59 -07:00
+								    hlo_sharding = s._to_xla_hlo_sharding(len(shape))
 								    assert hlo_sharding is not None
 								    num_ways_dim_sharded, _ = op_shardings.get_num_ways_dim_sharded(hlo_sharding)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    for i, size in enumerate(num_ways_dim_sharded):
 								      if not allow_uneven_sharding and shape[i] % size != 0:
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								        raise ValueError(f"One of {what_aval}{name_str} was given the sharding "
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								                         f"of {s}, which implies that "
-												Remove global_str since all avals in pjit are global

PiperOrigin-RevId: 522443476

											
										
										
											2023-04-06 14:51:30 -07:00
+								                         f"the global size of its dimension {i} should be "
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								                         f"divisible by {size}, but it is equal to {shape[i]} "
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								                         f"(full shape: {shape})")
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								def check_aval_layout_compatibility(
 								    layouts, flat_avals, names: tuple[str, ...] | None, what_aval: str):
 								  new_names = [''] * len(layouts) if names is None else names
 								  for aval, l, name in zip(flat_avals, layouts, new_names):
 								    if l is None or isinstance(l, AutoLayout):
 								      continue
 								    name_str = f' with pytree key path {name}' if name else ''
 								    shape = aval.shape
 								    try:
 								      l.check_compatible_aval(shape)
 								    except ValueError as e:
 								      raise ValueError(
 								          f'One of {what_aval}{name_str} is incompatible with its layout '
 								          f'annotation {l}: {e}')
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								# -------------------- pjit rules --------------------
-												Make `pjit` an AxisPrimitive so that it can run the batching rules even if the argument is not batched but there is a axis_index/named shapes inside the pjitted function.

PiperOrigin-RevId: 502955369

											
										
										
											2023-01-18 12:55:31 -08:00
+								pjit_p = core.AxisPrimitive("pjit")
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								pjit_p.multiple_results = True
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								def _resolve_in_layouts(args, jit_in_layouts, resolved_in_shardings, in_avals):
-												[Take 2] Expose .layout on jax.Array. Also add checks in the AOT path to make sure that the input Array's layout matches the layout given to jax.jit.

Reverts cd79e71d85621a8d6dede9a710bdb2a29bb380fd

PiperOrigin-RevId: 618878870

											
										
										
											2024-03-25 10:07:55 -07:00
+								  # If device or backend is set, return the default layout. This is because you
 								  # can pass arrays on cpu (with untiled layouts) to jit with backend='tpu'
 								  # which causes error checks to fail. Returning the default layout allows
 								  # this to exist. It's the same for handling shardings.
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  if pxla.check_device_backend_on_shardings(resolved_in_shardings):
-												[Take 2] Expose .layout on jax.Array. Also add checks in the AOT path to make sure that the input Array's layout matches the layout given to jax.jit.

Reverts cd79e71d85621a8d6dede9a710bdb2a29bb380fd

PiperOrigin-RevId: 618878870

											
										
										
											2024-03-25 10:07:55 -07:00
+								    return (None,) * len(jit_in_layouts)
 								  resolved_in_layouts = []
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  for arg, jit_in_l, rs, aval in safe_zip(
 								      args, jit_in_layouts, resolved_in_shardings, in_avals):
-												Standardize default layout to `None` in internals (dispatch, lowering and compilation) and non-default layouts to concrete layouts.

This massively simplifies the amount of checks we need and improves dispatch time too. It also fixes a donation bug being hit in serving code related to layouts and non-standardization of default layout in JAX.

PiperOrigin-RevId: 668527139

											
										
										
											2024-08-28 11:05:45 -07:00
+								    committed = getattr(arg, '_committed', True)
 								    # `arg_layout` is only used for checking purposes in the `else` branch
 								    # below. We cannot replace default layout with None to raise nicer errors.
 								    # `dispatch_arg_layout` replaces default layouts with `None` to simplify
 								    # dispatch and lowering logic downstream.
 								    if hasattr(arg, 'layout'):
 								      arg_layout = arg.layout.device_local_layout
 								      dispatch_arg_layout = (None if pxla.is_default_layout(arg_layout, rs, aval)
 								                             else arg_layout)
 								    else:
 								      arg_layout, dispatch_arg_layout = None, None
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								    # Sharding can be unspecified when array is committed if it's a PmapSharding.
 								    is_pmap_sharding = (is_unspecified(rs) or
 								                        isinstance(getattr(arg, 'sharding', None), PmapSharding))
-												[Take 2] Expose .layout on jax.Array. Also add checks in the AOT path to make sure that the input Array's layout matches the layout given to jax.jit.

Reverts cd79e71d85621a8d6dede9a710bdb2a29bb380fd

PiperOrigin-RevId: 618878870

											
										
										
											2024-03-25 10:07:55 -07:00
+								    if jit_in_l is None:
 								      if committed:
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								        if is_pmap_sharding:
 								          resolved_in_layouts.append(None)
 								        else:
-												Standardize default layout to `None` in internals (dispatch, lowering and compilation) and non-default layouts to concrete layouts.

This massively simplifies the amount of checks we need and improves dispatch time too. It also fixes a donation bug being hit in serving code related to layouts and non-standardization of default layout in JAX.

PiperOrigin-RevId: 668527139

											
										
										
											2024-08-28 11:05:45 -07:00
+								          resolved_in_layouts.append(dispatch_arg_layout)
-												[Take 2] Expose .layout on jax.Array. Also add checks in the AOT path to make sure that the input Array's layout matches the layout given to jax.jit.

Reverts cd79e71d85621a8d6dede9a710bdb2a29bb380fd

PiperOrigin-RevId: 618878870

											
										
										
											2024-03-25 10:07:55 -07:00
+								      else:
 								        resolved_in_layouts.append(None)
 								    else:
-												`device_local_layout` can be None on a jax.Array for backends that don't implement certain required methods for a jax.Array to populate the `device_local_layout`.

Skip the error checks when arr.layout.device_local_layout is None.

PiperOrigin-RevId: 622007598

											
										
										
											2024-04-04 16:41:36 -07:00
+								      # arg_layout can be None because some backends don't implement the
 								      # required layout methods. Hence `arr.layout` can return
 								      # `Layout(None, sharding)`
-												Check for layout mismatch between array's layout and layout specified via in_shardings to jit by only checking `major_to_minor` if `_tiling` is None. Otherwise, check the entire layout.

PiperOrigin-RevId: 651796471

											
										
										
											2024-07-12 09:22:44 -07:00
+								      if (committed
 								          and not is_pmap_sharding
 								          and arg_layout is not None
 								          and not pxla.is_user_xla_layout_equal(jit_in_l, arg_layout)):
-												Improve the error message when users pass DeviceLocalLayout.AUTO to `jax.jit` and a jax.Array as an argument.

PiperOrigin-RevId: 638797194

											
										
										
											2024-05-30 15:06:12 -07:00
+								        extra_msg = ''
 								        if isinstance(jit_in_l, AutoLayout):
 								          extra_msg = (
 								              ' The layout given to `jax.jit` is `DeviceLocalLayout.AUTO` but'
 								              ' the corresponding argument passed is a `jax.Array` with a'
 								              ' concrete layout. Consider passing a `jax.ShapeDtypeStruct`'
 								              ' instead of `jax.Array` as an argument to the jitted function '
 								              ' when using `DeviceLocalLayout.AUTO`.'
 								          )
-												[Take 2] Expose .layout on jax.Array. Also add checks in the AOT path to make sure that the input Array's layout matches the layout given to jax.jit.

Reverts cd79e71d85621a8d6dede9a710bdb2a29bb380fd

PiperOrigin-RevId: 618878870

											
										
										
											2024-03-25 10:07:55 -07:00
+								        raise ValueError('Layout passed to jit does not match the layout '
 								                          'on the respective arg. '
 								                          f'Got pjit layout: {jit_in_l},\n'
-												`device_local_layout` can be None on a jax.Array for backends that don't implement certain required methods for a jax.Array to populate the `device_local_layout`.

Skip the error checks when arr.layout.device_local_layout is None.

PiperOrigin-RevId: 622007598

											
										
										
											2024-04-04 16:41:36 -07:00
+								                          f'arg layout: {arg_layout} for '
-												Improve the error message when users pass DeviceLocalLayout.AUTO to `jax.jit` and a jax.Array as an argument.

PiperOrigin-RevId: 638797194

											
										
										
											2024-05-30 15:06:12 -07:00
+								                          f'arg shape: {shaped_abstractify(arg).str_short()}.'
 								                          f'{extra_msg}')
-												[Take 2] Expose .layout on jax.Array. Also add checks in the AOT path to make sure that the input Array's layout matches the layout given to jax.jit.

Reverts cd79e71d85621a8d6dede9a710bdb2a29bb380fd

PiperOrigin-RevId: 618878870

											
										
										
											2024-03-25 10:07:55 -07:00
+								      resolved_in_layouts.append(jit_in_l)
 								  return tuple(resolved_in_layouts)
-												Remove the device assignment check in _resolve_in_shardings since that's historical and not needed anymore

PiperOrigin-RevId: 674091716

											
										
										
											2024-09-12 18:47:25 -07:00
+								def _resolve_in_shardings(args, pjit_in_shardings: Sequence[PjitSharding]
 								                          ) -> Sequence[PjitSharding]:
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  # If True, means that device or backend is set by the user on pjit and it
 								  # has the same semantics as device_put i.e. doesn't matter which device the
 								  # arg is on, reshard it to the device mentioned. So don't do any of the
 								  # checks and just return the pjit_in_shardings directly. `shard_args` will
 								  # handle the resharding.
-												Prune accidental exports from jax.interpreters.pxla.

These imports do not appear to have users outside JAX itself.

PiperOrigin-RevId: 507835295

											
										
										
											2023-02-07 11:16:01 -08:00
+								  if pxla.check_device_backend_on_shardings(pjit_in_shardings):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return pjit_in_shardings
 								  committed_arg_shardings = []
 								  for a in args:
-												Dedupe shardings before passing them to _get_and_check_device_assignment

In practice, the number of different shardings is usually much smaller then
the number of inputs/output.

PiperOrigin-RevId: 600558309

											
										
										
											2024-01-22 13:44:34 -08:00
+								    arg_s = getattr(a, 'sharding', None)
 								    # arg sharding can be None in case of ShapeDtypeStruct. jax.Array does
 								    # not allow None as the sharding.
 								    if arg_s is None:
 								      continue
 								    # Don't consider PmapSharding inputs as committed. They will get resharded
 								    # unconditionally.
 								    if isinstance(arg_s, PmapSharding):
 								      continue
 								    if getattr(a, '_committed', True):
 								      committed_arg_shardings.append((arg_s, pxla.MismatchType.ARG_SHARDING, None))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  resolved_in_shardings = []
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  for arg, pjit_in_s in zip(args, pjit_in_shardings):
-												Make `sharding` on ShapeDtypeStruct a property that always exists. The previous behavior was it only existed if sharding was not None.

sharding=None means that JAX is free to choose whatever sharding it wants. As it stands, jax will choose to mark the input as replicated but JAX reserves the right to change that as it sees fit.
PiperOrigin-RevId: 543630595

											
										
										
											2023-06-26 21:46:02 -07:00
+								    # arg sharding can be None in case of ShapeDtypeStruct. jax.Array does
 								    # not allow None as the sharding.
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    arg_s, committed = ((arg.sharding, getattr(arg, '_committed', True))
-												Make `sharding` on ShapeDtypeStruct a property that always exists. The previous behavior was it only existed if sharding was not None.

sharding=None means that JAX is free to choose whatever sharding it wants. As it stands, jax will choose to mark the input as replicated but JAX reserves the right to change that as it sees fit.
PiperOrigin-RevId: 543630595

											
										
										
											2023-06-26 21:46:02 -07:00
+								                        if hasattr(arg, 'sharding') and arg.sharding is not None
 								                        else (UNSPECIFIED, False))
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								    if is_unspecified(pjit_in_s):
 								      if is_unspecified(arg_s):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								        resolved_in_shardings.append(arg_s)
 								      else:
 								        if committed:
-												Reshard pmap unconditionally if arguments with PmapSharding are passed to pjit. This is to support all the jit use cases with pjit to merge their API.

PiperOrigin-RevId: 499338100

											
										
										
											2023-01-03 16:08:07 -08:00
+								          # If the arg has a PmapSharding, then reshard it unconditionally.
 								          if isinstance(arg_s, PmapSharding):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								            resolved_in_shardings.append(UNSPECIFIED)
-												Reshard pmap unconditionally if arguments with PmapSharding are passed to pjit. This is to support all the jit use cases with pjit to merge their API.

PiperOrigin-RevId: 499338100

											
										
										
											2023-01-03 16:08:07 -08:00
+								          else:
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								            resolved_in_shardings.append(arg_s)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								        else:
 								          if dispatch.is_single_device_sharding(arg_s):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								            resolved_in_shardings.append(UNSPECIFIED)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								          else:
 								            raise NotImplementedError('Having uncommitted Array sharded on '
 								                                      'multiple devices is not supported.')
 								    else:
-												Rename jax._src.sharding_utils to jax._src.op_shardings.

Move some more op_sharding related helpers to that module.

PiperOrigin-RevId: 522343010

											
										
										
											2023-04-06 08:31:47 -07:00
+								      if (isinstance(arg, np.ndarray) and
-												Add is_fully_replicated method to Shardings. This allows to scrub the usage of is_op_sharding_replicated from JAX because we can just query it on Shardings and save an expensive round trip to OpSharding creation.

PiperOrigin-RevId: 524379122

											
										
										
											2023-04-14 13:55:52 -07:00
+								          not pjit_in_s.is_fully_replicated and  # type: ignore
 								          xb.process_count() > 1):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								        raise ValueError(
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								            'Passing non-trivial shardings for numpy '
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								            'inputs is not allowed. To fix this error, either specify a '
 								            'replicated sharding explicitly or use '
 								            '`jax.experimental.multihost_utils.host_local_array_to_global_array(...)` '
 								            'to convert your host local numpy inputs to a jax.Array which you '
 								            'can pass to pjit. '
 								            'If the numpy input is the same on each process, then you can use '
 								            '`jax.make_array_from_callback(...) to create a `jax.Array` which '
 								            'you can pass to pjit. '
 								            'Please see the jax.Array migration guide for more information '
 								            'https://jax.readthedocs.io/en/latest/jax_array_migration.html#handling-of-host-local-inputs-to-pjit-like-batch-etc. '
 								            f'Got arg shape: {arg.shape}, arg value: {arg}')
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								      if not is_unspecified(arg_s):
-												[Memories] Add Memories support to jax.jit and jax.device_put!

These are the following changes:

* Add a temporary flag (`JAX_FETCH_MEMORY_KIND_ON_EXECUTABLE`) (should not be used by user but needed in C++ in pjrt-ifrt code) on whether to fetch memory kinds from executable. If it is set to True, the host runtime dep needs to be linked in and should also work in OSS (more work needs to happen for that). So only the test sets it to True for now until jax memories is under development.

* Add with_memory_kind method on Sharding to allow for easier creation of shardings with different memory kind.

* Add lowering rules for device_put and jax.jit.
  * For device_put, we always add the annotation that describes a transfer to a memory and a sharding annotation.
  * For jax.jit, if the argument is on host memory, it will have an extra attribute _xla_buffer_placement.

* Handle the correct output sharding in pxla.py by extracting the memory kind from the executable.

* Handle the caching of pjit caches by canonicalizing the memory_kinds so that `NS(mesh, pspec) == NS(mesh, pspec, memory_kind='tpu_hbm')`. Also canonicalize memory_kind in `__hash__` and `__eq__` of shardings.
  * This is to not change the StableHLO to include device placement annotations right now since the host aware passes are not enabled by default and the work is under progress to make it work everywhere.

PiperOrigin-RevId: 553833344

											
										
										
											2023-08-04 09:43:39 -07:00
+								        # jax.jit does not allow resharding across different memory kinds even
 								        # if the argument is uncommitted. Use jax.device_put for those cases,
 								        # either outside or inside jax.jit.
-												Canonicalize to default memory in init of Shardings only on the backends that support memories right now.

PiperOrigin-RevId: 553942534

											
										
										
											2023-08-04 16:26:31 -07:00
+								        if pjit_in_s.memory_kind != arg_s.memory_kind:  # type: ignore
-												[Memories] Add Memories support to jax.jit and jax.device_put!

These are the following changes:

* Add a temporary flag (`JAX_FETCH_MEMORY_KIND_ON_EXECUTABLE`) (should not be used by user but needed in C++ in pjrt-ifrt code) on whether to fetch memory kinds from executable. If it is set to True, the host runtime dep needs to be linked in and should also work in OSS (more work needs to happen for that). So only the test sets it to True for now until jax memories is under development.

* Add with_memory_kind method on Sharding to allow for easier creation of shardings with different memory kind.

* Add lowering rules for device_put and jax.jit.
  * For device_put, we always add the annotation that describes a transfer to a memory and a sharding annotation.
  * For jax.jit, if the argument is on host memory, it will have an extra attribute _xla_buffer_placement.

* Handle the correct output sharding in pxla.py by extracting the memory kind from the executable.

* Handle the caching of pjit caches by canonicalizing the memory_kinds so that `NS(mesh, pspec) == NS(mesh, pspec, memory_kind='tpu_hbm')`. Also canonicalize memory_kind in `__hash__` and `__eq__` of shardings.
  * This is to not change the StableHLO to include device placement annotations right now since the host aware passes are not enabled by default and the work is under progress to make it work everywhere.

PiperOrigin-RevId: 553833344

											
										
										
											2023-08-04 09:43:39 -07:00
+								          raise ValueError(
 								              'Memory kinds passed to jax.jit does not match memory kind on the'
 								              f' respective arg. Got pjit memory kind: {pjit_in_s.memory_kind}, '  # type: ignore
-												Merge pull request #21273 from superbobry:mypy-ruff

PiperOrigin-RevId: 636146344

											
										
										
											2024-05-22 06:35:38 -07:00
+								              f'arg memory kind: {arg_s.memory_kind} for '  # pytype: disable=attribute-error
-												Print `str_short` of the arg and remove printing the value of the arg.

PiperOrigin-RevId: 559524941

											
										
										
											2023-08-23 13:24:08 -07:00
+								              f'arg shape: {shaped_abstractify(arg).str_short()}')
-												Reshard pmap unconditionally if arguments with PmapSharding are passed to pjit. This is to support all the jit use cases with pjit to merge their API.

PiperOrigin-RevId: 499338100

											
										
										
											2023-01-03 16:08:07 -08:00
+								        if (committed and
 								            not isinstance(arg_s, PmapSharding) and
-												Rename jax._src.sharding_utils to jax._src.op_shardings.

Move some more op_sharding related helpers to that module.

PiperOrigin-RevId: 522343010

											
										
										
											2023-04-06 08:31:47 -07:00
+								            not op_shardings.are_op_shardings_equal(
-												Use `_to_xla_hlo_sharding` everywhere in JAX. Remove `_to_xla_op_sharding` in favor of `_to_xla_hlo_sharding` since constructing a C++ class is faster than protos and will help with further changes coming to HloSharding.

PiperOrigin-RevId: 537969500

											
										
										
											2023-06-05 13:40:59 -07:00
+								                pjit_in_s._to_xla_hlo_sharding(arg.ndim),  # type: ignore
 								                arg_s._to_xla_hlo_sharding(arg.ndim))):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								          raise ValueError('Sharding passed to pjit does not match the sharding '
 								                           'on the respective arg. '
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								                           f'Got pjit sharding: {pjit_in_s},\n'
-												Print `str_short` of the arg and remove printing the value of the arg.

PiperOrigin-RevId: 559524941

											
										
										
											2023-08-23 13:24:08 -07:00
+								                           f'arg sharding: {arg_s} for '
 								                           f'arg shape: {shaped_abstractify(arg).str_short()}')
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      resolved_in_shardings.append(pjit_in_s)
 								  return tuple(resolved_in_shardings)
-												Share lowering code between jit and aot jit path

PiperOrigin-RevId: 622487044

											
										
										
											2024-04-06 13:43:32 -07:00
+								def _resolve_and_lower(
 								    args, jaxpr, in_shardings, out_shardings, in_layouts,
 								    out_layouts, resource_env, donated_invars, name, keep_unused, inline,
-												Add `lowering_platforms` to `traced.lower()` to allow lowering to different backends and multi-backend lowering too. In other words, enable cross-lowering!

The motivation for doing this is 2-fold:

1) This will help with deprecating and eventually deleting `jax.xla_computation` which allows for cross backend lowering.

2) Allow for cross-backend and multi-backend lowering via jax AOT APIs which will help cleanup some hacks implemented for `jax.export`.

Note that this is only available by `.trace.lower(lowering_platforms=('tpu',))`. You cannot use `.lower` to do cross-lowering. We can introduce top-level APIs in the future to allow for composable aot apis to make this easier if `.trace(*args).lower(lowering_platforms)` is cumbersome to write.

Designed with @froystig!

PiperOrigin-RevId: 644087787

											
										
										
											2024-06-17 11:58:18 -07:00
+								    lowering_platforms, lowering_parameters, pgle_profiler):
-												Remove the device assignment check in _resolve_in_shardings since that's historical and not needed anymore

PiperOrigin-RevId: 674091716

											
										
										
											2024-09-12 18:47:25 -07:00
+								  in_shardings = _resolve_in_shardings(args, in_shardings)
-												Share lowering code between jit and aot jit path

PiperOrigin-RevId: 622487044

											
										
										
											2024-04-06 13:43:32 -07:00
+								  in_layouts = _resolve_in_layouts(args, in_layouts, in_shardings,
 								                                   jaxpr.in_avals)
 								  lowered = _pjit_lower(
 								      jaxpr, in_shardings, out_shardings, in_layouts, out_layouts, resource_env,
-												Default jax_spmd_mode to allow_jit which will allow explicit jax.jit to not raise the multihost error (since jit and pjit have been merged).

Implicit jit and apply_primitive will still raise an error though (which is recognized via inline parameter). Majority of jnp operations in JAX should be inlined.

PiperOrigin-RevId: 527398394

											
										
										
											2023-04-26 15:54:50 -07:00
+								      donated_invars, name, keep_unused, inline,
-												Add `lowering_platforms` to `traced.lower()` to allow lowering to different backends and multi-backend lowering too. In other words, enable cross-lowering!

The motivation for doing this is 2-fold:

1) This will help with deprecating and eventually deleting `jax.xla_computation` which allows for cross backend lowering.

2) Allow for cross-backend and multi-backend lowering via jax AOT APIs which will help cleanup some hacks implemented for `jax.export`.

Note that this is only available by `.trace.lower(lowering_platforms=('tpu',))`. You cannot use `.lower` to do cross-lowering. We can introduce top-level APIs in the future to allow for composable aot apis to make this easier if `.trace(*args).lower(lowering_platforms)` is cumbersome to write.

Designed with @froystig!

PiperOrigin-RevId: 644087787

											
										
										
											2024-06-17 11:58:18 -07:00
+								      lowering_platforms=lowering_platforms,
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								      lowering_parameters=lowering_parameters,
 								      pgle_profiler=pgle_profiler)
-												Share lowering code between jit and aot jit path

PiperOrigin-RevId: 622487044

											
										
										
											2024-04-06 13:43:32 -07:00
+								  return lowered
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								def _pjit_call_impl_python(
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								    *args, jaxpr, in_shardings, out_shardings, in_layouts, out_layouts,
 								    resource_env, donated_invars, name, keep_unused, inline):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  global _most_recent_pjit_call_executable
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								  compile_options = None
 								  pgle_profiler = None
 								  pgle_profiler_dict = _most_recent_pjit_call_executable.weak_pgle_profiler_dict
 								  if config.enable_pgle.value and config.pgle_profiling_runs.value > 0:
 								    if jaxpr not in pgle_profiler_dict:
 								      pgle_profiler_dict[jaxpr] = profiler.PGLEProfiler(
 								          config.pgle_profiling_runs.value,
 								          config.pgle_aggregation_percentile.value)
 								    pgle_profiler = pgle_profiler_dict[jaxpr]
 								    # The method below will return FDO profile when module was profiled
 								    # config.jax_pgle_profiling_runs amount of times, otherwise the result will
 								    # be None.
 								    fdo_profile = pgle_profiler.consume_fdo_profile()
 								    if fdo_profile is not None:
 								      compile_options = {'fdo_profile': fdo_profile}
 								  # TODO(patrios): Do not pass mutable profile session through cached lowering
 								  # chain. Instead we need to move profilers dictionary to pxla module and use
 								  # module as key. Right now we can't do that since there is no way to evict _pjit_lower_cached cache for in PGLE mode.
-												Share lowering code between jit and aot jit path

PiperOrigin-RevId: 622487044

											
										
										
											2024-04-06 13:43:32 -07:00
+								  compiled = _resolve_and_lower(
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								      args, jaxpr=jaxpr, in_shardings=in_shardings,
 								      out_shardings=out_shardings, in_layouts=in_layouts,
 								      out_layouts=out_layouts, resource_env=resource_env,
-												Share lowering code between jit and aot jit path

PiperOrigin-RevId: 622487044

											
										
										
											2024-04-06 13:43:32 -07:00
+								      donated_invars=donated_invars, name=name, keep_unused=keep_unused,
-												Add `lowering_platforms` to `traced.lower()` to allow lowering to different backends and multi-backend lowering too. In other words, enable cross-lowering!

The motivation for doing this is 2-fold:

1) This will help with deprecating and eventually deleting `jax.xla_computation` which allows for cross backend lowering.

2) Allow for cross-backend and multi-backend lowering via jax AOT APIs which will help cleanup some hacks implemented for `jax.export`.

Note that this is only available by `.trace.lower(lowering_platforms=('tpu',))`. You cannot use `.lower` to do cross-lowering. We can introduce top-level APIs in the future to allow for composable aot apis to make this easier if `.trace(*args).lower(lowering_platforms)` is cumbersome to write.

Designed with @froystig!

PiperOrigin-RevId: 644087787

											
										
										
											2024-06-17 11:58:18 -07:00
+								      inline=inline, lowering_platforms=None,
 								      lowering_parameters=mlir.LoweringParameters(),
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								      pgle_profiler=pgle_profiler
 								  ).compile(compile_options)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Change the _most_recent_executable logic to store a weakref dict of jaxpr -> executable so that with the inner cpp cache and outer cpp cache, we extract the correct executable.

PiperOrigin-RevId: 537908874

											
										
										
											2023-06-05 10:06:30 -07:00
+								  _most_recent_pjit_call_executable.weak_key_dict[jaxpr] = compiled
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  # This check is expensive so only do it if enable_checks is on.
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								  if compiled._auto_spmd_lowering and config.enable_checks.value:
-												[Take 2] Expose .layout on jax.Array. Also add checks in the AOT path to make sure that the input Array's layout matches the layout given to jax.jit.

Reverts cd79e71d85621a8d6dede9a710bdb2a29bb380fd

PiperOrigin-RevId: 618878870

											
										
										
											2024-03-25 10:07:55 -07:00
+								    pxla.check_array_xla_sharding_layout_match(
 								        args, compiled._in_shardings, compiled._in_layouts,
-												Remove the sharding and layout checks for non-DCE'd arguments during AOT safe call.

This is because the tracing, lowering and compilation caches do not register a miss if sharding/layout of a DCE'd arg changes when it's passed again to a jitted function.

This is not true for avals so that check still exists.

PiperOrigin-RevId: 623375760

											
										
										
											2024-04-09 22:11:17 -07:00
+								        jaxpr.jaxpr.debug_info, compiled._kept_var_idx)
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								  if config.distributed_debug.value:
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    # Defensively only perform fingerprint logic if debug logging is enabled
 								    # NOTE(skyewm): I didn't benchmark this
 								    fingerprint = None
 								    if hasattr(compiled.runtime_executable(), "fingerprint"):
 								      fingerprint = compiled.runtime_executable().fingerprint
 								    if fingerprint is not None:
 								      fingerprint = fingerprint.hex()
 								    distributed_debug_log(("Running pjit'd function", name),
 								                          ("in_shardings", in_shardings),
 								                          ("out_shardings", out_shardings),
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                          ("in_layouts", in_layouts),
 								                          ("out_layouts", out_layouts),
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								                          ("abstract args", map(xla.abstractify, args)),
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								                          ("fingerprint", fingerprint))
-												Fix debug nans test after merging `jit` and `pjit` codepaths

PiperOrigin-RevId: 501122848

											
										
										
											2023-01-10 16:26:18 -08:00
+								  try:
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								    return compiled.unsafe_call(*args), compiled
-												Replace apply_primitive internals with `jax.jit`.

This allows deletion of a lot of code and leads to ~40% eager performance speedup.

Benchmarks:

```
name                                                      old time/op          new time/op          delta
eager_unary_dispatch                                      31.3µs ± 1%          19.4µs ± 6%  -37.91%    (p=0.016 n=4+5)
eager_unary                                               32.1µs ± 0%          19.8µs ± 4%  -38.26%    (p=0.016 n=4+5)
eager_binary_dispatch                                     35.9µs ± 1%          20.5µs ± 4%  -42.93%    (p=0.016 n=4+5)
eager_binary                                              36.6µs ± 1%          21.1µs ± 4%  -42.29%    (p=0.016 n=4+5)
jit_trivial_dispatch                                      3.87µs ± 2%          4.12µs ±25%     ~       (p=1.000 n=5+5)
jit_trivial                                               4.75µs ± 2%          4.82µs ±11%     ~       (p=0.690 n=5+5)
jit_simple_dispatch                                       2.95µs ± 2%          2.97µs ± 7%     ~       (p=1.000 n=5+5)
jit_simple                                                3.52µs ± 6%          3.51µs ± 5%     ~       (p=0.841 n=5+5)
jit_simple_dispatch_array                                 2.95µs ± 2%          2.96µs ± 6%     ~       (p=1.000 n=5+5)
jit_simple_array                                          3.46µs ± 2%          3.51µs ± 5%     ~       (p=0.690 n=5+5)
jit_small_matmul                                          3.01µs ± 1%          3.00µs ± 4%     ~       (p=0.548 n=5+5)
jit_big_matmul                                            34.0µs ±18%          35.5µs ±17%     ~       (p=0.310 n=5+5)
jit_simple_many_args_dispatch/num_args:10                 6.93µs ± 6%          6.80µs ± 6%     ~     (p=0.481 n=10+10)
jit_simple_many_args_dispatch/num_args:100                47.7µs ± 7%          45.4µs ± 2%     ~      (p=0.237 n=10+8)
jit_simple_many_args_dispatch/num_args:1000                545µs ± 8%           516µs ± 2%     ~      (p=0.101 n=10+8)
jit_simple_many_args_dispatch/num_args:2000               1.12ms ± 7%          1.07ms ± 2%     ~      (p=0.237 n=10+8)
jit_simple_many_args/num_args:10                          7.42µs ± 5%          7.23µs ± 2%     ~      (p=0.173 n=10+8)
jit_simple_many_args/num_args:100                         48.4µs ± 7%          45.6µs ± 2%     ~      (p=0.237 n=10+8)
jit_simple_many_args/num_args:1000                         542µs ± 6%           524µs ± 8%     ~     (p=0.089 n=10+10)
jit_simple_many_args/num_args:2000                        1.12ms ± 7%          1.08ms ± 1%     ~      (p=0.068 n=10+8)
jit_simple_pruned_args_dispatch_10                        4.79µs ± 8%          4.98µs ±10%     ~       (p=0.421 n=5+5)
jit_simple_pruned_args_10                                 5.32µs ± 6%          5.30µs ± 4%     ~       (p=1.000 n=5+5)
jit_simple_pruned_args_dispatch_100                       24.7µs ± 6%          23.8µs ± 8%     ~       (p=0.548 n=5+5)
jit_simple_pruned_args_100                                25.2µs ± 6%          24.4µs ± 8%     ~       (p=0.690 n=5+5)
jit_simple_pruned_args_dispatch_1000                       238µs ± 7%           232µs ± 8%     ~       (p=0.841 n=5+5)
jit_simple_pruned_args_1000                                240µs ± 7%           234µs ± 8%     ~       (p=1.000 n=5+5)
jit_simple_pruned_args_dispatch_2000                       516µs ± 6%           497µs ± 1%     ~       (p=0.413 n=5+4)
jit_simple_pruned_args_2000                                517µs ± 6%           505µs ± 7%     ~       (p=0.690 n=5+5)
jit_dispatch_without_transfer                              719µs ± 9%           751µs ± 8%     ~       (p=0.222 n=5+5)
jit_dispatch_with_transfer                                 799µs ±14%           793µs ± 9%     ~       (p=1.000 n=5+5)
pmap_trivial_2_devices                                    49.9µs ±40%          48.2µs ±42%     ~       (p=0.841 n=5+5)
pmap_trivial_dispatch_8_devices                           74.5µs ±24%          78.9µs ±29%     ~       (p=0.421 n=5+5)
pmap_trivial_8_devices                                    79.3µs ± 6%          82.7µs ±20%     ~       (p=0.841 n=5+5)
pmap_simple_2_devices                                     47.1µs ±17%          49.1µs ±20%     ~       (p=0.548 n=5+5)
pmap_simple_dispatch_8_devices                            73.4µs ±16%          76.8µs ±21%     ~       (p=0.690 n=5+5)
pmap_simple_8_devices                                     76.0µs ±10%          80.6µs ±29%     ~       (p=1.000 n=5+5)
pmap_simple_dispatch_8_devices_100_args                   1.12ms ±22%          1.08ms ±42%     ~       (p=0.841 n=5+5)
pmap_simple_8_devices_100_args                            12.5ms ± 8%          12.8ms ±10%     ~       (p=1.000 n=5+5)
sda_index_1                                                413µs ± 1%           686µs ± 4%  +66.08%    (p=0.008 n=5+5)
sda_index_2                                                850µs ± 1%          1378µs ± 4%  +62.02%    (p=0.008 n=5+5)
sda_index_8                                               3.60ms ± 1%          5.69ms ± 4%  +58.00%    (p=0.008 n=5+5)
bench_shaped_abstractify                                   300µs ± 1%           305µs ± 3%     ~       (p=0.056 n=5+5)
bench_xla_abstractify_scalar_int                          6.45µs ± 1%          6.50µs ± 3%     ~       (p=0.548 n=5+5)
bench_xla_abstractify_scalar_float                        3.73µs ± 1%          3.73µs ± 3%     ~       (p=0.690 n=5+5)
bench_xla_abstractify_scalar_numpy_int32                  4.97µs ± 1%          4.83µs ± 3%     ~       (p=0.095 n=5+5)
bench_xla_abstractify_scalar_numpy_uint32                 4.91µs ± 1%          4.75µs ± 0%   -3.30%    (p=0.016 n=5+4)
bench_xla_abstractify_numpy_random                        4.34µs ± 2%          4.31µs ± 3%     ~       (p=0.310 n=5+5)
bench_xla_abstractify_numpy_arange_100_float32            3.94µs ± 1%          3.93µs ± 3%     ~       (p=0.548 n=5+5)
bench_xla_abstractify_enum                                6.85µs ± 1%          7.06µs ± 7%   +3.07%    (p=0.032 n=5+5)
bench_are_op_shardings_equal                              26.9µs ± 2%          27.0µs ± 3%     ~       (p=0.841 n=5+5)
bench_pjit_check_aval_sharding                             691µs ± 2%           711µs ±13%     ~       (p=0.841 n=5+5)
bench_addressable_shards_index                             656ns ± 4%           688ns ± 9%     ~       (p=0.095 n=5+5)
bench_remat_eager_retracing_overheads                     12.7ms ± 4%          10.7ms ± 1%  -15.48%    (p=0.016 n=5+4)
bench_remat_eager_retracing_overheads_static_argnums      13.0ms ± 2%          11.3ms ± 6%  -13.71%    (p=0.008 n=5+5)
bench_slicing_compilation                                 12.1ms ± 1%          12.3ms ± 4%     ~       (p=0.690 n=5+5)
bench_slicing_compilation2                                11.3ms ± 0%          11.5ms ± 6%     ~       (p=0.690 n=5+5)
bench_repeated_static_indexing                            62.5ms ± 2%          40.8ms ± 8%  -34.77%    (p=0.008 n=5+5)
bench_repeated_static_slicing                             46.7ms ± 1%          31.4ms ± 2%  -32.76%    (p=0.008 n=5+5)
pjit_simple_1_device/num_args:1                           2.72µs ± 2%          2.68µs ± 5%     ~       (p=0.151 n=5+5)
pjit_simple_1_device/num_args:10                          12.6µs ± 7%          12.3µs ± 3%     ~       (p=0.310 n=5+5)
pjit_simple_1_device/num_args:100                          109µs ± 3%           108µs ± 4%     ~       (p=0.548 n=5+5)
pjit_simple_4_device/num_args:1                           38.0µs ±26%          36.8µs ±19%     ~       (p=0.690 n=5+5)
pjit_simple_4_device/num_args:10                          93.3µs ±19%          96.6µs ±23%     ~       (p=0.841 n=5+5)
pjit_simple_4_device/num_args:100                          730µs ±16%           698µs ±48%     ~       (p=0.841 n=5+5)
pjit_aot_1_device/num_args:1                              3.29µs ± 2%          3.12µs ± 4%   -5.24%    (p=0.016 n=4+5)
pjit_aot_1_device/num_args:10                             13.0µs ± 1%          12.7µs ± 2%     ~       (p=0.063 n=4+5)
pjit_aot_1_device/num_args:100                             111µs ± 5%           110µs ±11%     ~       (p=0.421 n=5+5)
pjit_aot_4_device/num_args:1                              38.4µs ±19%          38.9µs ±24%     ~       (p=1.000 n=5+5)
pjit_aot_4_device/num_args:10                             91.3µs ±15%          96.9µs ±29%     ~       (p=0.548 n=5+5)
pjit_aot_4_device/num_args:100                             676µs ±20%           689µs ±41%     ~       (p=0.841 n=5+5)
host_local_array_to_global_array                           196µs ± 6%           194µs ± 4%     ~       (p=0.548 n=5+5)
device_put                                                50.8µs ± 1%          50.7µs ± 4%     ~       (p=0.413 n=4+5)
device_put_sharded                                         176µs ± 0%           177µs ± 4%     ~       (p=0.190 n=4+5)
device_get_8_devices                                      3.96ms ± 4%          4.03ms ± 7%     ~       (p=0.413 n=4+5)
np_asarray_8_devices                                      3.34ms ±18%          3.30ms ±10%     ~       (p=0.548 n=5+5)
jax_array_arrays_8_devices                                5.01ms ±10%          5.09ms ±21%     ~       (p=0.421 n=5+5)
batch_inplace_while_scatter                                440µs ± 1%           439µs ± 1%     ~       (p=0.421 n=5+5)
batch_inplace_while_dynamic_update_slice                   454µs ± 0%           457µs ± 1%     ~       (p=0.905 n=4+5)
serial_dot_products                                       4.51µs ± 3%          4.41µs ± 2%     ~       (p=0.151 n=5+5)
bench_make_array_from_callback_fully_replicated_sharding  26.6µs ± 1%          27.0µs ± 2%     ~       (p=0.056 n=5+5)
```

PiperOrigin-RevId: 586505950

											
										
										
											2023-11-29 18:06:36 -08:00
+								  except FloatingPointError as e:
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								    assert config.debug_nans.value or config.debug_infs.value  # compiled_fun can only raise in this case
-												Catch the NaN's and raise a better error message when jax_debug_nans flag is True.

PiperOrigin-RevId: 509552717

											
										
										
											2023-02-14 09:26:53 -08:00
-												Replace apply_primitive internals with `jax.jit`.

This allows deletion of a lot of code and leads to ~40% eager performance speedup.

Benchmarks:

```
name                                                      old time/op          new time/op          delta
eager_unary_dispatch                                      31.3µs ± 1%          19.4µs ± 6%  -37.91%    (p=0.016 n=4+5)
eager_unary                                               32.1µs ± 0%          19.8µs ± 4%  -38.26%    (p=0.016 n=4+5)
eager_binary_dispatch                                     35.9µs ± 1%          20.5µs ± 4%  -42.93%    (p=0.016 n=4+5)
eager_binary                                              36.6µs ± 1%          21.1µs ± 4%  -42.29%    (p=0.016 n=4+5)
jit_trivial_dispatch                                      3.87µs ± 2%          4.12µs ±25%     ~       (p=1.000 n=5+5)
jit_trivial                                               4.75µs ± 2%          4.82µs ±11%     ~       (p=0.690 n=5+5)
jit_simple_dispatch                                       2.95µs ± 2%          2.97µs ± 7%     ~       (p=1.000 n=5+5)
jit_simple                                                3.52µs ± 6%          3.51µs ± 5%     ~       (p=0.841 n=5+5)
jit_simple_dispatch_array                                 2.95µs ± 2%          2.96µs ± 6%     ~       (p=1.000 n=5+5)
jit_simple_array                                          3.46µs ± 2%          3.51µs ± 5%     ~       (p=0.690 n=5+5)
jit_small_matmul                                          3.01µs ± 1%          3.00µs ± 4%     ~       (p=0.548 n=5+5)
jit_big_matmul                                            34.0µs ±18%          35.5µs ±17%     ~       (p=0.310 n=5+5)
jit_simple_many_args_dispatch/num_args:10                 6.93µs ± 6%          6.80µs ± 6%     ~     (p=0.481 n=10+10)
jit_simple_many_args_dispatch/num_args:100                47.7µs ± 7%          45.4µs ± 2%     ~      (p=0.237 n=10+8)
jit_simple_many_args_dispatch/num_args:1000                545µs ± 8%           516µs ± 2%     ~      (p=0.101 n=10+8)
jit_simple_many_args_dispatch/num_args:2000               1.12ms ± 7%          1.07ms ± 2%     ~      (p=0.237 n=10+8)
jit_simple_many_args/num_args:10                          7.42µs ± 5%          7.23µs ± 2%     ~      (p=0.173 n=10+8)
jit_simple_many_args/num_args:100                         48.4µs ± 7%          45.6µs ± 2%     ~      (p=0.237 n=10+8)
jit_simple_many_args/num_args:1000                         542µs ± 6%           524µs ± 8%     ~     (p=0.089 n=10+10)
jit_simple_many_args/num_args:2000                        1.12ms ± 7%          1.08ms ± 1%     ~      (p=0.068 n=10+8)
jit_simple_pruned_args_dispatch_10                        4.79µs ± 8%          4.98µs ±10%     ~       (p=0.421 n=5+5)
jit_simple_pruned_args_10                                 5.32µs ± 6%          5.30µs ± 4%     ~       (p=1.000 n=5+5)
jit_simple_pruned_args_dispatch_100                       24.7µs ± 6%          23.8µs ± 8%     ~       (p=0.548 n=5+5)
jit_simple_pruned_args_100                                25.2µs ± 6%          24.4µs ± 8%     ~       (p=0.690 n=5+5)
jit_simple_pruned_args_dispatch_1000                       238µs ± 7%           232µs ± 8%     ~       (p=0.841 n=5+5)
jit_simple_pruned_args_1000                                240µs ± 7%           234µs ± 8%     ~       (p=1.000 n=5+5)
jit_simple_pruned_args_dispatch_2000                       516µs ± 6%           497µs ± 1%     ~       (p=0.413 n=5+4)
jit_simple_pruned_args_2000                                517µs ± 6%           505µs ± 7%     ~       (p=0.690 n=5+5)
jit_dispatch_without_transfer                              719µs ± 9%           751µs ± 8%     ~       (p=0.222 n=5+5)
jit_dispatch_with_transfer                                 799µs ±14%           793µs ± 9%     ~       (p=1.000 n=5+5)
pmap_trivial_2_devices                                    49.9µs ±40%          48.2µs ±42%     ~       (p=0.841 n=5+5)
pmap_trivial_dispatch_8_devices                           74.5µs ±24%          78.9µs ±29%     ~       (p=0.421 n=5+5)
pmap_trivial_8_devices                                    79.3µs ± 6%          82.7µs ±20%     ~       (p=0.841 n=5+5)
pmap_simple_2_devices                                     47.1µs ±17%          49.1µs ±20%     ~       (p=0.548 n=5+5)
pmap_simple_dispatch_8_devices                            73.4µs ±16%          76.8µs ±21%     ~       (p=0.690 n=5+5)
pmap_simple_8_devices                                     76.0µs ±10%          80.6µs ±29%     ~       (p=1.000 n=5+5)
pmap_simple_dispatch_8_devices_100_args                   1.12ms ±22%          1.08ms ±42%     ~       (p=0.841 n=5+5)
pmap_simple_8_devices_100_args                            12.5ms ± 8%          12.8ms ±10%     ~       (p=1.000 n=5+5)
sda_index_1                                                413µs ± 1%           686µs ± 4%  +66.08%    (p=0.008 n=5+5)
sda_index_2                                                850µs ± 1%          1378µs ± 4%  +62.02%    (p=0.008 n=5+5)
sda_index_8                                               3.60ms ± 1%          5.69ms ± 4%  +58.00%    (p=0.008 n=5+5)
bench_shaped_abstractify                                   300µs ± 1%           305µs ± 3%     ~       (p=0.056 n=5+5)
bench_xla_abstractify_scalar_int                          6.45µs ± 1%          6.50µs ± 3%     ~       (p=0.548 n=5+5)
bench_xla_abstractify_scalar_float                        3.73µs ± 1%          3.73µs ± 3%     ~       (p=0.690 n=5+5)
bench_xla_abstractify_scalar_numpy_int32                  4.97µs ± 1%          4.83µs ± 3%     ~       (p=0.095 n=5+5)
bench_xla_abstractify_scalar_numpy_uint32                 4.91µs ± 1%          4.75µs ± 0%   -3.30%    (p=0.016 n=5+4)
bench_xla_abstractify_numpy_random                        4.34µs ± 2%          4.31µs ± 3%     ~       (p=0.310 n=5+5)
bench_xla_abstractify_numpy_arange_100_float32            3.94µs ± 1%          3.93µs ± 3%     ~       (p=0.548 n=5+5)
bench_xla_abstractify_enum                                6.85µs ± 1%          7.06µs ± 7%   +3.07%    (p=0.032 n=5+5)
bench_are_op_shardings_equal                              26.9µs ± 2%          27.0µs ± 3%     ~       (p=0.841 n=5+5)
bench_pjit_check_aval_sharding                             691µs ± 2%           711µs ±13%     ~       (p=0.841 n=5+5)
bench_addressable_shards_index                             656ns ± 4%           688ns ± 9%     ~       (p=0.095 n=5+5)
bench_remat_eager_retracing_overheads                     12.7ms ± 4%          10.7ms ± 1%  -15.48%    (p=0.016 n=5+4)
bench_remat_eager_retracing_overheads_static_argnums      13.0ms ± 2%          11.3ms ± 6%  -13.71%    (p=0.008 n=5+5)
bench_slicing_compilation                                 12.1ms ± 1%          12.3ms ± 4%     ~       (p=0.690 n=5+5)
bench_slicing_compilation2                                11.3ms ± 0%          11.5ms ± 6%     ~       (p=0.690 n=5+5)
bench_repeated_static_indexing                            62.5ms ± 2%          40.8ms ± 8%  -34.77%    (p=0.008 n=5+5)
bench_repeated_static_slicing                             46.7ms ± 1%          31.4ms ± 2%  -32.76%    (p=0.008 n=5+5)
pjit_simple_1_device/num_args:1                           2.72µs ± 2%          2.68µs ± 5%     ~       (p=0.151 n=5+5)
pjit_simple_1_device/num_args:10                          12.6µs ± 7%          12.3µs ± 3%     ~       (p=0.310 n=5+5)
pjit_simple_1_device/num_args:100                          109µs ± 3%           108µs ± 4%     ~       (p=0.548 n=5+5)
pjit_simple_4_device/num_args:1                           38.0µs ±26%          36.8µs ±19%     ~       (p=0.690 n=5+5)
pjit_simple_4_device/num_args:10                          93.3µs ±19%          96.6µs ±23%     ~       (p=0.841 n=5+5)
pjit_simple_4_device/num_args:100                          730µs ±16%           698µs ±48%     ~       (p=0.841 n=5+5)
pjit_aot_1_device/num_args:1                              3.29µs ± 2%          3.12µs ± 4%   -5.24%    (p=0.016 n=4+5)
pjit_aot_1_device/num_args:10                             13.0µs ± 1%          12.7µs ± 2%     ~       (p=0.063 n=4+5)
pjit_aot_1_device/num_args:100                             111µs ± 5%           110µs ±11%     ~       (p=0.421 n=5+5)
pjit_aot_4_device/num_args:1                              38.4µs ±19%          38.9µs ±24%     ~       (p=1.000 n=5+5)
pjit_aot_4_device/num_args:10                             91.3µs ±15%          96.9µs ±29%     ~       (p=0.548 n=5+5)
pjit_aot_4_device/num_args:100                             676µs ±20%           689µs ±41%     ~       (p=0.841 n=5+5)
host_local_array_to_global_array                           196µs ± 6%           194µs ± 4%     ~       (p=0.548 n=5+5)
device_put                                                50.8µs ± 1%          50.7µs ± 4%     ~       (p=0.413 n=4+5)
device_put_sharded                                         176µs ± 0%           177µs ± 4%     ~       (p=0.190 n=4+5)
device_get_8_devices                                      3.96ms ± 4%          4.03ms ± 7%     ~       (p=0.413 n=4+5)
np_asarray_8_devices                                      3.34ms ±18%          3.30ms ±10%     ~       (p=0.548 n=5+5)
jax_array_arrays_8_devices                                5.01ms ±10%          5.09ms ±21%     ~       (p=0.421 n=5+5)
batch_inplace_while_scatter                                440µs ± 1%           439µs ± 1%     ~       (p=0.421 n=5+5)
batch_inplace_while_dynamic_update_slice                   454µs ± 0%           457µs ± 1%     ~       (p=0.905 n=4+5)
serial_dot_products                                       4.51µs ± 3%          4.41µs ± 2%     ~       (p=0.151 n=5+5)
bench_make_array_from_callback_fully_replicated_sharding  26.6µs ± 1%          27.0µs ± 2%     ~       (p=0.056 n=5+5)
```

PiperOrigin-RevId: 586505950

											
										
										
											2023-11-29 18:06:36 -08:00
+								    if len(jaxpr.eqns) > 1:
 								      _ = core.jaxpr_as_fun(jaxpr)(*args)  # may raise, not return
-												Catch the NaN's and raise a better error message when jax_debug_nans flag is True.

PiperOrigin-RevId: 509552717

											
										
										
											2023-02-14 09:26:53 -08:00
 								    # If control reaches this line, we got a NaN on the output of `compiled`
 								    # but not `fun.call_wrapped` on the same arguments. Let's tell the user.
-												Replace apply_primitive internals with `jax.jit`.

This allows deletion of a lot of code and leads to ~40% eager performance speedup.

Benchmarks:

```
name                                                      old time/op          new time/op          delta
eager_unary_dispatch                                      31.3µs ± 1%          19.4µs ± 6%  -37.91%    (p=0.016 n=4+5)
eager_unary                                               32.1µs ± 0%          19.8µs ± 4%  -38.26%    (p=0.016 n=4+5)
eager_binary_dispatch                                     35.9µs ± 1%          20.5µs ± 4%  -42.93%    (p=0.016 n=4+5)
eager_binary                                              36.6µs ± 1%          21.1µs ± 4%  -42.29%    (p=0.016 n=4+5)
jit_trivial_dispatch                                      3.87µs ± 2%          4.12µs ±25%     ~       (p=1.000 n=5+5)
jit_trivial                                               4.75µs ± 2%          4.82µs ±11%     ~       (p=0.690 n=5+5)
jit_simple_dispatch                                       2.95µs ± 2%          2.97µs ± 7%     ~       (p=1.000 n=5+5)
jit_simple                                                3.52µs ± 6%          3.51µs ± 5%     ~       (p=0.841 n=5+5)
jit_simple_dispatch_array                                 2.95µs ± 2%          2.96µs ± 6%     ~       (p=1.000 n=5+5)
jit_simple_array                                          3.46µs ± 2%          3.51µs ± 5%     ~       (p=0.690 n=5+5)
jit_small_matmul                                          3.01µs ± 1%          3.00µs ± 4%     ~       (p=0.548 n=5+5)
jit_big_matmul                                            34.0µs ±18%          35.5µs ±17%     ~       (p=0.310 n=5+5)
jit_simple_many_args_dispatch/num_args:10                 6.93µs ± 6%          6.80µs ± 6%     ~     (p=0.481 n=10+10)
jit_simple_many_args_dispatch/num_args:100                47.7µs ± 7%          45.4µs ± 2%     ~      (p=0.237 n=10+8)
jit_simple_many_args_dispatch/num_args:1000                545µs ± 8%           516µs ± 2%     ~      (p=0.101 n=10+8)
jit_simple_many_args_dispatch/num_args:2000               1.12ms ± 7%          1.07ms ± 2%     ~      (p=0.237 n=10+8)
jit_simple_many_args/num_args:10                          7.42µs ± 5%          7.23µs ± 2%     ~      (p=0.173 n=10+8)
jit_simple_many_args/num_args:100                         48.4µs ± 7%          45.6µs ± 2%     ~      (p=0.237 n=10+8)
jit_simple_many_args/num_args:1000                         542µs ± 6%           524µs ± 8%     ~     (p=0.089 n=10+10)
jit_simple_many_args/num_args:2000                        1.12ms ± 7%          1.08ms ± 1%     ~      (p=0.068 n=10+8)
jit_simple_pruned_args_dispatch_10                        4.79µs ± 8%          4.98µs ±10%     ~       (p=0.421 n=5+5)
jit_simple_pruned_args_10                                 5.32µs ± 6%          5.30µs ± 4%     ~       (p=1.000 n=5+5)
jit_simple_pruned_args_dispatch_100                       24.7µs ± 6%          23.8µs ± 8%     ~       (p=0.548 n=5+5)
jit_simple_pruned_args_100                                25.2µs ± 6%          24.4µs ± 8%     ~       (p=0.690 n=5+5)
jit_simple_pruned_args_dispatch_1000                       238µs ± 7%           232µs ± 8%     ~       (p=0.841 n=5+5)
jit_simple_pruned_args_1000                                240µs ± 7%           234µs ± 8%     ~       (p=1.000 n=5+5)
jit_simple_pruned_args_dispatch_2000                       516µs ± 6%           497µs ± 1%     ~       (p=0.413 n=5+4)
jit_simple_pruned_args_2000                                517µs ± 6%           505µs ± 7%     ~       (p=0.690 n=5+5)
jit_dispatch_without_transfer                              719µs ± 9%           751µs ± 8%     ~       (p=0.222 n=5+5)
jit_dispatch_with_transfer                                 799µs ±14%           793µs ± 9%     ~       (p=1.000 n=5+5)
pmap_trivial_2_devices                                    49.9µs ±40%          48.2µs ±42%     ~       (p=0.841 n=5+5)
pmap_trivial_dispatch_8_devices                           74.5µs ±24%          78.9µs ±29%     ~       (p=0.421 n=5+5)
pmap_trivial_8_devices                                    79.3µs ± 6%          82.7µs ±20%     ~       (p=0.841 n=5+5)
pmap_simple_2_devices                                     47.1µs ±17%          49.1µs ±20%     ~       (p=0.548 n=5+5)
pmap_simple_dispatch_8_devices                            73.4µs ±16%          76.8µs ±21%     ~       (p=0.690 n=5+5)
pmap_simple_8_devices                                     76.0µs ±10%          80.6µs ±29%     ~       (p=1.000 n=5+5)
pmap_simple_dispatch_8_devices_100_args                   1.12ms ±22%          1.08ms ±42%     ~       (p=0.841 n=5+5)
pmap_simple_8_devices_100_args                            12.5ms ± 8%          12.8ms ±10%     ~       (p=1.000 n=5+5)
sda_index_1                                                413µs ± 1%           686µs ± 4%  +66.08%    (p=0.008 n=5+5)
sda_index_2                                                850µs ± 1%          1378µs ± 4%  +62.02%    (p=0.008 n=5+5)
sda_index_8                                               3.60ms ± 1%          5.69ms ± 4%  +58.00%    (p=0.008 n=5+5)
bench_shaped_abstractify                                   300µs ± 1%           305µs ± 3%     ~       (p=0.056 n=5+5)
bench_xla_abstractify_scalar_int                          6.45µs ± 1%          6.50µs ± 3%     ~       (p=0.548 n=5+5)
bench_xla_abstractify_scalar_float                        3.73µs ± 1%          3.73µs ± 3%     ~       (p=0.690 n=5+5)
bench_xla_abstractify_scalar_numpy_int32                  4.97µs ± 1%          4.83µs ± 3%     ~       (p=0.095 n=5+5)
bench_xla_abstractify_scalar_numpy_uint32                 4.91µs ± 1%          4.75µs ± 0%   -3.30%    (p=0.016 n=5+4)
bench_xla_abstractify_numpy_random                        4.34µs ± 2%          4.31µs ± 3%     ~       (p=0.310 n=5+5)
bench_xla_abstractify_numpy_arange_100_float32            3.94µs ± 1%          3.93µs ± 3%     ~       (p=0.548 n=5+5)
bench_xla_abstractify_enum                                6.85µs ± 1%          7.06µs ± 7%   +3.07%    (p=0.032 n=5+5)
bench_are_op_shardings_equal                              26.9µs ± 2%          27.0µs ± 3%     ~       (p=0.841 n=5+5)
bench_pjit_check_aval_sharding                             691µs ± 2%           711µs ±13%     ~       (p=0.841 n=5+5)
bench_addressable_shards_index                             656ns ± 4%           688ns ± 9%     ~       (p=0.095 n=5+5)
bench_remat_eager_retracing_overheads                     12.7ms ± 4%          10.7ms ± 1%  -15.48%    (p=0.016 n=5+4)
bench_remat_eager_retracing_overheads_static_argnums      13.0ms ± 2%          11.3ms ± 6%  -13.71%    (p=0.008 n=5+5)
bench_slicing_compilation                                 12.1ms ± 1%          12.3ms ± 4%     ~       (p=0.690 n=5+5)
bench_slicing_compilation2                                11.3ms ± 0%          11.5ms ± 6%     ~       (p=0.690 n=5+5)
bench_repeated_static_indexing                            62.5ms ± 2%          40.8ms ± 8%  -34.77%    (p=0.008 n=5+5)
bench_repeated_static_slicing                             46.7ms ± 1%          31.4ms ± 2%  -32.76%    (p=0.008 n=5+5)
pjit_simple_1_device/num_args:1                           2.72µs ± 2%          2.68µs ± 5%     ~       (p=0.151 n=5+5)
pjit_simple_1_device/num_args:10                          12.6µs ± 7%          12.3µs ± 3%     ~       (p=0.310 n=5+5)
pjit_simple_1_device/num_args:100                          109µs ± 3%           108µs ± 4%     ~       (p=0.548 n=5+5)
pjit_simple_4_device/num_args:1                           38.0µs ±26%          36.8µs ±19%     ~       (p=0.690 n=5+5)
pjit_simple_4_device/num_args:10                          93.3µs ±19%          96.6µs ±23%     ~       (p=0.841 n=5+5)
pjit_simple_4_device/num_args:100                          730µs ±16%           698µs ±48%     ~       (p=0.841 n=5+5)
pjit_aot_1_device/num_args:1                              3.29µs ± 2%          3.12µs ± 4%   -5.24%    (p=0.016 n=4+5)
pjit_aot_1_device/num_args:10                             13.0µs ± 1%          12.7µs ± 2%     ~       (p=0.063 n=4+5)
pjit_aot_1_device/num_args:100                             111µs ± 5%           110µs ±11%     ~       (p=0.421 n=5+5)
pjit_aot_4_device/num_args:1                              38.4µs ±19%          38.9µs ±24%     ~       (p=1.000 n=5+5)
pjit_aot_4_device/num_args:10                             91.3µs ±15%          96.9µs ±29%     ~       (p=0.548 n=5+5)
pjit_aot_4_device/num_args:100                             676µs ±20%           689µs ±41%     ~       (p=0.841 n=5+5)
host_local_array_to_global_array                           196µs ± 6%           194µs ± 4%     ~       (p=0.548 n=5+5)
device_put                                                50.8µs ± 1%          50.7µs ± 4%     ~       (p=0.413 n=4+5)
device_put_sharded                                         176µs ± 0%           177µs ± 4%     ~       (p=0.190 n=4+5)
device_get_8_devices                                      3.96ms ± 4%          4.03ms ± 7%     ~       (p=0.413 n=4+5)
np_asarray_8_devices                                      3.34ms ±18%          3.30ms ±10%     ~       (p=0.548 n=5+5)
jax_array_arrays_8_devices                                5.01ms ±10%          5.09ms ±21%     ~       (p=0.421 n=5+5)
batch_inplace_while_scatter                                440µs ± 1%           439µs ± 1%     ~       (p=0.421 n=5+5)
batch_inplace_while_dynamic_update_slice                   454µs ± 0%           457µs ± 1%     ~       (p=0.905 n=4+5)
serial_dot_products                                       4.51µs ± 3%          4.41µs ± 2%     ~       (p=0.151 n=5+5)
bench_make_array_from_callback_fully_replicated_sharding  26.6µs ± 1%          27.0µs ± 2%     ~       (p=0.056 n=5+5)
```

PiperOrigin-RevId: 586505950

											
										
										
											2023-11-29 18:06:36 -08:00
+								    msg = (f"{str(e)}. Because "
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								           "jax_config.debug_nans.value and/or config.jax_debug_infs is set, the "
-												Fix debug nans test after merging `jit` and `pjit` codepaths

PiperOrigin-RevId: 501122848

											
										
										
											2023-01-10 16:26:18 -08:00
+								           "de-optimized function (i.e., the function as if the `jit` "
 								           "decorator were removed) was called in an attempt to get a more "
 								           "precise error message. However, the de-optimized function did not "
 								           "produce invalid values during its execution. This behavior can "
-												Fix typo "invalud" -> "invalid" in error message.

PiperOrigin-RevId: 503452691

											
										
										
											2023-01-20 08:47:45 -08:00
+								           "result from `jit` optimizations causing the invalid value to be "
-												Fix debug nans test after merging `jit` and `pjit` codepaths

PiperOrigin-RevId: 501122848

											
										
										
											2023-01-10 16:26:18 -08:00
+								           "produced. It may also arise from having nan/inf constants as "
 								           "outputs, like `jax.jit(lambda ...: jax.numpy.nan)(...)`. "
 								           "\n\n"
 								           "It may be possible to avoid the invalid value by removing the "
 								           "`jit` decorator, at the cost of losing optimizations. "
 								           "\n\n"
 								           "If you see this error, consider opening a bug report at "
-												Update references to the GitHub url in JAX codebase to reflect move from google/jax to jax-ml/jax

PiperOrigin-RevId: 676843138

											
										
										
											2024-09-20 07:51:48 -07:00
+								           "https://github.com/jax-ml/jax.")
-												Fix debug nans test after merging `jit` and `pjit` codepaths

PiperOrigin-RevId: 501122848

											
										
										
											2023-01-10 16:26:18 -08:00
+								    raise FloatingPointError(msg)
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
 								@weakref_lru_cache
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								def _get_jaxpr_as_fun(jaxpr, in_shardings, out_shardings, in_layouts,
 								                      out_layouts, resource_env, donated_invars, name,
 								                      keep_unused, inline):
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								  # The input jaxpr to `_get_jaxpr_as_fun` is under a weakref_lru_cache so
 								  # returning `core.jaxpr_as_fun(jaxpr)` directly creates a strong reference to
 								  # the jaxpr defeating the purpose of weakref_lru_cache. So return a function
 								  # that closes over a weakrefed jaxpr and gets called inside that function.
 								  # This way there won't be a strong reference to the jaxpr from the output
 								  # function.
 								  jaxpr = weakref.ref(jaxpr)
 								  return lambda *args: core.jaxpr_as_fun(jaxpr())(*args)  # pylint: disable=unnecessary-lambda
 								def _pjit_call_impl(*args, jaxpr,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                    in_shardings, out_shardings, in_layouts, out_layouts,
 								                    resource_env,
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								                    donated_invars, name, keep_unused, inline):
 								  def call_impl_cache_miss(*args_, **kwargs_):
 								    out_flat, compiled = _pjit_call_impl_python(
 								        *args, jaxpr=jaxpr, in_shardings=in_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								        out_shardings=out_shardings, in_layouts=in_layouts,
 								        out_layouts=out_layouts, resource_env=resource_env,
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								        donated_invars=donated_invars, name=name, keep_unused=keep_unused,
 								        inline=inline)
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								    pgle_profiler = _read_pgle_profiler(jaxpr)
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								    fastpath_data = _get_fastpath_data(
-												Remove `_python_pjit` and make `_cpp_pjit` the only function wrapper.

PiperOrigin-RevId: 617846352

											
										
										
											2024-03-21 08:09:37 -07:00
+								        compiled, tree_structure(out_flat), args, out_flat, [], jaxpr.effects,
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								        jaxpr.consts, None, pgle_profiler)
-												Bump minimum jaxlib version to v0.4.30.

This corresponds to xla_extension_version 271 and mlir_api_version 57.

											
										
										
											2024-06-18 11:31:09 -04:00
+								    return out_flat, fastpath_data, _need_to_rebuild_with_fdo(pgle_profiler)
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
 								  f = _get_jaxpr_as_fun(
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      jaxpr, in_shardings, out_shardings, in_layouts, out_layouts,
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
+								      resource_env, donated_invars, name, keep_unused, inline)
-												[Take 2] Generalize global jit cpp cache keys so we can add more keys than the current donate_argnums.

This allows us to get more cache hits globally. For example:

Before:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache miss
After:

jax.jit(f, out_shardings=s)(arr)
jax.jit(f, out_shardings=s)(arr)  # cpp cache hit

Reverts b615266175effe4aefeb903620a19f3719a604da

PiperOrigin-RevId: 675746175

											
										
										
											2024-09-17 16:10:41 -07:00
+								  donated_argnums = tuple(i for i, d in enumerate(donated_invars) if d)
 								  if xla_extension_version >= 286:
 								    cache_key = pxla.JitGlobalCppCacheKeys(
 								        donate_argnums=donated_argnums, donate_argnames=None,
 								        device=None, backend=None,
 								        in_shardings_treedef=None, in_shardings_leaves=in_shardings,
 								        out_shardings_treedef=None, out_shardings_leaves=out_shardings,
 								        in_layouts_treedef=None, in_layouts_leaves=in_layouts,
 								        out_layouts_treedef=None, out_layouts_leaves=out_layouts,
 								        use_resource_env=resource_env is not None)
 								    return xc._xla.pjit(
 								        name, f, call_impl_cache_miss, [], [], cache_key,
 								        tree_util.dispatch_registry, pxla.cc_shard_arg,
 								        _get_cpp_global_cache(cache_key.contains_explicit_attributes))(*args)
 								  else:
 								    has_explicit_sharding = _pjit_explicit_sharding_and_layout(
 								        in_shardings, out_shardings, in_layouts, out_layouts, None, None)
 								    return xc._xla.pjit(
 								        name, f, call_impl_cache_miss, [], [], donated_argnums,
 								        tree_util.dispatch_registry, pxla.cc_shard_arg,
 								        _get_cpp_global_cache(has_explicit_sharding))(*args)
-												Make pjit_call_impl go via C++ dispatch.

This is required for APIs like `eval_jaxpr` and `jaxpr_as_fun` that don't call the top level pjit/jit function but rather go via pjit_p.bind directly which calls into _pjit_call_impl.

PiperOrigin-RevId: 535630905

											
										
										
											2023-05-26 08:56:56 -07:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								pjit_p.def_impl(_pjit_call_impl)
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								def _pjit_lower(*args, **kwargs):
 								  return _pjit_lower_cached(*args, **kwargs)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								@weakref_lru_cache
 								def _pjit_lower_cached(
 								    jaxpr: core.ClosedJaxpr,
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								    in_shardings,
 								    out_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								    in_layouts: pxla.MaybeLayout,
 								    out_layouts: pxla.MaybeLayout,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    resource_env,
 								    donated_invars,
 								    name: str,
 								    keep_unused: bool,
-												Default jax_spmd_mode to allow_jit which will allow explicit jax.jit to not raise the multihost error (since jit and pjit have been merged).

Implicit jit and apply_primitive will still raise an error though (which is recognized via inline parameter). Majority of jnp operations in JAX should be inlined.

PiperOrigin-RevId: 527398394

											
										
										
											2023-04-26 15:54:50 -07:00
+								    inline: bool,
-												[jax2tf] Clean up the support for cross-lowering.

In a previous CL we introduced cross-lowering support without any
changes in JAX core, but at the expense of some overly complex code
in jax2tf, along with overriding a JAX core function. Plus, those
changes were not enough to handle some xmap and pmap cases.

Here we introduce a `_experimental_lowering_platform: Optional[str]` parameter
to the `.lower()` methods and then we thread the `lowering_platform`
all the way to the calls to `mlir.lower_jaxpr_to_module2`. That's it.

Note that this parameter to `.lower()` is experimental and not supposed
to be used outside jax2tf. It may also gobble user kwargs.

											
										
										
											2023-02-28 11:30:23 +01:00
+								    *,
-												Add `lowering_platforms` to `traced.lower()` to allow lowering to different backends and multi-backend lowering too. In other words, enable cross-lowering!

The motivation for doing this is 2-fold:

1) This will help with deprecating and eventually deleting `jax.xla_computation` which allows for cross backend lowering.

2) Allow for cross-backend and multi-backend lowering via jax AOT APIs which will help cleanup some hacks implemented for `jax.export`.

Note that this is only available by `.trace.lower(lowering_platforms=('tpu',))`. You cannot use `.lower` to do cross-lowering. We can introduce top-level APIs in the future to allow for composable aot apis to make this easier if `.trace(*args).lower(lowering_platforms)` is cumbersome to write.

Designed with @froystig!

PiperOrigin-RevId: 644087787

											
										
										
											2024-06-17 11:58:18 -07:00
+								    lowering_platforms: tuple[str, ...] | None,
-												[JAX] Automatically share PGO data for GPU latency-hiding scheduler.

Overall the idea is to collect profile data for each module given amount of times (which can be configured) then recompile the module with the aggregated profile data.

1. We need to track how many times each module were profiled and collect profiling results. For this i added a ProfileSessionRunner class at profile.py. The class can track how many times an instance of it was called to profile a session and also can aggregate profile results.

2. We need associate profiling session to the module at the interpreter. To do this i added a dictionary to pjit.py which associates Jaxpr with profile session runner.

3. The profile session runner should be passed to pxla.py and then called.

4. We need to correctly deal with fast path at the interpreter level, so JAX won't use HLO directly if PGLE need to be collected, but also JAX will not recompiled the module only for PGLE. See changes in pjit.py and in lru_cache.h

5. Once FDO is collected we need to share it between hosts to keep deterministic compilation.

PiperOrigin-RevId: 638197166

											
										
										
											2024-05-29 01:49:06 -07:00
+								    lowering_parameters: mlir.LoweringParameters,
 								    pgle_profiler: profiler.PGLEProfiler | None):
-												Skip the global jit cpp cache if in/out_layouts are not None

PiperOrigin-RevId: 665085182

											
										
										
											2024-08-19 18:42:45 -07:00
+								  mesh, api_name = ((resource_env.physical_mesh, 'pjit')
 								                    if resource_env is not None else (None, 'jit'))
-												Remove dead code now that xmap is deleted

PiperOrigin-RevId: 655664512

											
										
										
											2024-07-24 12:39:42 -07:00
+								  return pxla.lower_sharding_computation(
 								      jaxpr, api_name, name, in_shardings, out_shardings,
 								      in_layouts, out_layouts, tuple(donated_invars),
-												Thread the mesh context manager to the place where we recover out_shardings back from GSPMDShardings. Before if you had a program like this:

```
with mesh:
  out = pjit(lambda: 1)()
```

The sharding of `out` was a `GSPMDSharding` which is not ideal. This change fixes that and returns a `NamedSharding` instead.

This is also required for `Shardy` integration.

PiperOrigin-RevId: 658842350

											
										
										
											2024-08-02 11:04:01 -07:00
+								      keep_unused=keep_unused, context_mesh=mesh,
-												Add `lowering_platforms` to `traced.lower()` to allow lowering to different backends and multi-backend lowering too. In other words, enable cross-lowering!

The motivation for doing this is 2-fold:

1) This will help with deprecating and eventually deleting `jax.xla_computation` which allows for cross backend lowering.

2) Allow for cross-backend and multi-backend lowering via jax AOT APIs which will help cleanup some hacks implemented for `jax.export`.

Note that this is only available by `.trace.lower(lowering_platforms=('tpu',))`. You cannot use `.lower` to do cross-lowering. We can introduce top-level APIs in the future to allow for composable aot apis to make this easier if `.trace(*args).lower(lowering_platforms)` is cumbersome to write.

Designed with @froystig!

PiperOrigin-RevId: 644087787

											
										
										
											2024-06-17 11:58:18 -07:00
+								      lowering_platforms=lowering_platforms,
-												Remove dead code now that xmap is deleted

PiperOrigin-RevId: 655664512

											
										
										
											2024-07-24 12:39:42 -07:00
+								      lowering_parameters=lowering_parameters,
 								      pgle_profiler=pgle_profiler)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def pjit_staging_rule(trace, *args, **params):
-												add pjit forwarding rule

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-05-24 01:14:16 +00:00
+								  jaxpr, in_fwd, out_shardings, out_layouts = _pjit_forwarding(
 								      params['jaxpr'], params['out_shardings'], params['out_layouts'])
 								  params = dict(params, jaxpr=jaxpr, out_shardings=out_shardings,
 								                out_layouts=out_layouts)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  if (params["inline"] and
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								      all(is_unspecified(i) for i in params["in_shardings"]) and
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      all(is_unspecified(o) for o in params["out_shardings"]) and
 								      all(i is None for i in params["in_layouts"]) and
 								      all(o is None for o in params["out_layouts"])):
-												Don't recompute abstract eval rules when inlining a jit jaxpr.

The current implementation of jit inlining uses core.eval_jaxpr() and retraces the subjaxpr. This ends up performing abstract evaluation a second time. Instead, write a direct implementation of inlining that doesn't use the tracing machinery.

PiperOrigin-RevId: 607418006

											
										
										
											2024-02-15 12:27:13 -08:00
+								    if config.dynamic_shapes.value:
 								      # Inline jaxpr doesn't handle dynamic shapes when inlining. If dynamic
 								      # shapes are enabled, use eval_jaxpr, which uses the tracing machinery,
 								      # but redundantly performs abstract evaluation again.
-												add pjit forwarding rule

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-05-24 01:14:16 +00:00
+								      out_tracers = core.eval_jaxpr(jaxpr.jaxpr, jaxpr.consts, *args,
 								                                    propagate_source_info=False)
-												Don't recompute abstract eval rules when inlining a jit jaxpr.

The current implementation of jit inlining uses core.eval_jaxpr() and retraces the subjaxpr. This ends up performing abstract evaluation a second time. Instead, write a direct implementation of inlining that doesn't use the tracing machinery.

PiperOrigin-RevId: 607418006

											
										
										
											2024-02-15 12:27:13 -08:00
+								    else:
-												add pjit forwarding rule

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-05-24 01:14:16 +00:00
+								      out_tracers = pe.inline_jaxpr_into_trace(
 								          trace, jaxpr.jaxpr, jaxpr.consts, *args)
-												Migrate a subset of internal modules to use state objects

The motivation here is to gradually replace all dynamic lookups on `jax.config`
with statically-typed state objects, which are more type checker/IDE friendly.

PiperOrigin-RevId: 571932143

											
										
										
											2023-10-09 07:28:18 -07:00
+								  elif config.dynamic_shapes.value:
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    source_info = source_info_util.current()
 								    out_tracers = []
-												add pjit forwarding rule

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-05-24 01:14:16 +00:00
+								    for aval in _out_type(jaxpr):
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								      if type(aval) is core.DShapedArray:
 								        shape = [args[d.val] if type(d) is core.InDBIdx else
 								                 out_tracers[d.val] if type(d) is core.OutDBIdx else
 								                 d for d in aval.shape]
 								        aval = aval.update(shape=tuple(core.get_referent(d) for d in shape))
 								      out_tracers.append(pe.DynamicJaxprTracer(trace, aval, source_info))
 								    eqn = core.new_jaxpr_eqn(
 								      map(trace.getvar, args), map(trace.makevar, out_tracers), pjit_p, params,
-												add pjit forwarding rule

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-05-24 01:14:16 +00:00
+								      jaxpr.effects, source_info)
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    trace.frame.add_eqn(eqn)
-												add pjit forwarding rule

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-05-24 01:14:16 +00:00
+								  elif any(isinstance(c, core.MutableArray) for c in jaxpr.consts):
 								    jaxpr, consts = pxla._move_mutable_consts(jaxpr)
-												[mutable-arrays] support closed-over mutable arrays in jit

											
										
										
											2024-03-05 16:20:24 -08:00
+								    consts = map(trace.instantiate_const, consts)
 								    in_shardings = (*params['in_shardings'],) + (UNSPECIFIED,) * len(consts)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								    in_layouts = (*params['in_layouts'],) + (None,) * len(consts)
-												[mutable-arrays] support closed-over mutable arrays in jit

											
										
										
											2024-03-05 16:20:24 -08:00
+								    donated_invars = (*params['donated_invars'],) + (False,) * len(consts)
 								    new_params = dict(params, jaxpr=jaxpr, in_shardings=in_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                      in_layouts=in_layouts, donated_invars=donated_invars)
-												add pjit forwarding rule

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-05-24 01:14:16 +00:00
+								    out_tracers = trace.default_process_primitive(
 								        pjit_p, (*args, *consts), new_params)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  else:
-												add pjit forwarding rule

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-05-24 01:14:16 +00:00
+								    out_tracers = trace.default_process_primitive(pjit_p, args, params)
 								  out_tracers_ = iter(out_tracers)
 								  out_tracers = [args[f] if type(f) is int else next(out_tracers_)
 								                 for f in in_fwd]
 								  assert next(out_tracers_, None) is None
 								  return out_tracers
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								pe.custom_staging_rules[pjit_p] = pjit_staging_rule
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
-												add pjit forwarding rule

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-05-24 01:14:16 +00:00
+								def _pjit_forwarding(jaxpr, out_shardings, out_layouts):
 								  in_fwd: list[int | None] = pe._jaxpr_forwarding(jaxpr.jaxpr)
 								  in_fwd = [fwd if is_unspecified(os) and ol is None else None for fwd, os, ol
 								            in zip(in_fwd, out_shardings, out_layouts)]
 								  keep = [f is None for f in in_fwd]
 								  jaxpr = pe.prune_closed_jaxpr_outputs(jaxpr, keep)
 								  out_shardings = [o for o, k in zip(out_shardings, keep) if k]
 								  out_layouts   = [o for o, k in zip(out_layouts  , keep) if k]
 								  return jaxpr, in_fwd, out_shardings, out_layouts
 								def pjit_forwarding_rule(eqn):
 								  jaxpr, in_fwd, out_shardings, out_layouts = _pjit_forwarding(
 								      eqn.params['jaxpr'], eqn.params['out_shardings'], eqn.params['out_layouts'])
 								  new_outvars = [v for v, f in zip(eqn.outvars, in_fwd) if f is None]
 								  new_params = dict(eqn.params, jaxpr=jaxpr, out_shardings=(*out_shardings,),
 								                    out_layouts=(*out_layouts,))
 								  new_eqn = eqn.replace(params=new_params, outvars=new_outvars)
 								  fwd_vars = [eqn.invars[f] if f is not None else None for f in in_fwd]
 								  return fwd_vars, new_eqn
 								pe.forwarding_rules[pjit_p] = pjit_forwarding_rule
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								# TODO(mattjj): remove/trivialize this when jaxprs have type annotation on them,
 								# since it's actually not possible in general to infer the type from the term
-												Use lower-case PEP 585 names for types.

Issue https://github.com/google/jax/issues/16537

PiperOrigin-RevId: 542969282

											
										
										
											2023-06-23 15:11:37 -07:00
+								def _out_type(jaxpr: core.ClosedJaxpr) -> list[core.AbstractValue]:
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								  out = []
 								  in_idx = {v: i for i, v in enumerate(jaxpr.jaxpr.invars)}
 								  out_idx = {x: i for i, x in enumerate(jaxpr.jaxpr.invars)
 								             if type(x) is core.Var}
 								  for x in jaxpr.jaxpr.outvars:
 								    aval = x.aval
 								    if type(aval) is core.DShapedArray:
 								      shape = [core.InDBIdx(in_idx[d]) if d in in_idx else
 								               core.OutDBIdx(out_idx[d]) if d in out_idx else
 								               d for d in x.aval.shape]
 								      aval = aval.update(shape=tuple(shape))
 								    out.append(aval)
 								  return out
-												enable pjit recursive typechecking

Give pjit_p a custom typecheck rule, which basically just calls the
core._check_call utility (which was made for xla_call_p and core.call_p).

This revealed the need for a slight generalization of the custom_typecheck rule
signature, for better "context-aware" printing of jaxpr type errors: the rules
should have a `ctx_factory` first argument. **The reason this PR touches so
many files is just that it makes the trivial tweaks to all existing typecheck
rules to accomodate that new signature.** I didn't adapt any other higher-order
primitives' rules to actually use the context, but presumably errors for HOPs
like scan would be improved by using it. Follow-up work!

It's key that core._check_call works with dynamic shapes; this PR is soon to be
followed by some djax+pjit PRs!

											
										
										
											2023-03-21 21:43:20 -07:00
+								def _pjit_typecheck(ctx_factory, *in_atoms, jaxpr, **params):
 								  return core._check_call(ctx_factory, pjit_p, in_atoms,
 								                          dict(params, call_jaxpr=jaxpr.jaxpr))
 								core.custom_typechecks[pjit_p] = _pjit_typecheck
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								def _pjit_abstract_eval(*args, jaxpr, **_):
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  return jaxpr.out_avals, jaxpr.effects
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								pjit_p.def_effectful_abstract_eval(_pjit_abstract_eval)
-												[Try again] For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the lowering time.

Reverts 4a5c6f82009dee9c30ca4a85359a702d745ed035

PiperOrigin-RevId: 577974380

											
										
										
											2023-10-30 15:27:17 -07:00
+								def _pjit_cached_lower_jaxpr_to_fun(ctx, name, jaxpr, effects, in_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                                    out_shardings, in_layouts, out_layouts,
 								                                    api_name):
-												[Try again] For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the lowering time.

Reverts 4a5c6f82009dee9c30ca4a85359a702d745ed035

PiperOrigin-RevId: 577974380

											
										
										
											2023-10-30 15:27:17 -07:00
+								  mod_ctx = ctx.module_context
 								  axis_ctx = ctx.module_context.axis_context
-												Make lowering oblivious to real physical devices. Instead cache lowering on HloSharding only (which is based on logical device numbers)

Make an exception for callbacks and custom_partitioning because they need access to device_assignment during lowering.

PiperOrigin-RevId: 589244695

											
										
										
											2023-12-08 14:35:27 -08:00
+								  num_devices = None
-												[Try again] For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the lowering time.

Reverts 4a5c6f82009dee9c30ca4a85359a702d745ed035

PiperOrigin-RevId: 577974380

											
										
										
											2023-10-30 15:27:17 -07:00
+								  if isinstance(axis_ctx, sharding_impls.ShardingContext):
-												Make lowering oblivious to real physical devices. Instead cache lowering on HloSharding only (which is based on logical device numbers)

Make an exception for callbacks and custom_partitioning because they need access to device_assignment during lowering.

PiperOrigin-RevId: 589244695

											
										
										
											2023-12-08 14:35:27 -08:00
+								    num_devices = axis_ctx.num_devices
-												[Try again] For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the lowering time.

Reverts 4a5c6f82009dee9c30ca4a85359a702d745ed035

PiperOrigin-RevId: 577974380

											
										
										
											2023-10-30 15:27:17 -07:00
+								  elif isinstance(axis_ctx, sharding_impls.SPMDAxisContext):
-												Make lowering oblivious to real physical devices. Instead cache lowering on HloSharding only (which is based on logical device numbers)

Make an exception for callbacks and custom_partitioning because they need access to device_assignment during lowering.

PiperOrigin-RevId: 589244695

											
										
										
											2023-12-08 14:35:27 -08:00
+								    num_devices = axis_ctx.mesh.size
 								  key = (pjit_p, name, jaxpr, effects, num_devices,
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								         pxla.SemanticallyEqualShardings(in_shardings, jaxpr.in_avals),
 								         pxla.SemanticallyEqualShardings(out_shardings, jaxpr.out_avals),
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								         in_layouts, out_layouts, api_name)
-												[Try again] For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the lowering time.

Reverts 4a5c6f82009dee9c30ca4a85359a702d745ed035

PiperOrigin-RevId: 577974380

											
										
										
											2023-10-30 15:27:17 -07:00
 								  func = mod_ctx.cached_primitive_lowerings.get(key, None)
 								  if func is None:
-												Remove `physical_hlo_sharding` from TyRules.

The only caller of `physical_op_sharding` outside of TyRules was mlir.py. This CL also changes lower_jaxpr_to_fun to only accept logical arg_shardings and result_shardings which are XLACompatiableShardings.

PiperOrigin-RevId: 616267810

											
										
										
											2024-03-15 16:01:13 -07:00
+								    arg_shardings = [None if is_unspecified(i) else i for i in in_shardings]
 								    result_shardings = [None if is_unspecified(o) else o for o in out_shardings]
-												[Try again] For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the lowering time.

Reverts 4a5c6f82009dee9c30ca4a85359a702d745ed035

PiperOrigin-RevId: 577974380

											
										
										
											2023-10-30 15:27:17 -07:00
+								    # TODO(b/228598865): inlined calls cannot have shardings set directly on the
 								    # inputs or outputs because they are lost during MLIR->HLO conversion.
 								    # using_sharding_annotation=False means we add an identity operation instead.
 								    func = mlir.lower_jaxpr_to_fun(
-												Split name_stack out of mlir.ModuleContext.

A unique name_stack is built for every equation, which means that we're constantly rebuilding ModuleContext objects, even though the lifetime of almost everything else (naturally) is the Module scope. Split name_stack into an object that is threaded separately, including as part of mlir.LoweringRuleContext.

PiperOrigin-RevId: 608594374

											
										
										
											2024-02-20 07:16:38 -08:00
+								        mod_ctx, name, jaxpr, effects, ctx.name_stack,
 								        arg_shardings=arg_shardings, result_shardings=result_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								        use_sharding_annotations=False, api_name=api_name,
 								        arg_layouts=in_layouts, result_layouts=out_layouts)
-												[Try again] For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the lowering time.

Reverts 4a5c6f82009dee9c30ca4a85359a702d745ed035

PiperOrigin-RevId: 577974380

											
										
										
											2023-10-30 15:27:17 -07:00
+								    mod_ctx.cached_primitive_lowerings[key] = func
 								  return func
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								def _pjit_lowering(ctx, *args, name, jaxpr, in_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                   out_shardings, in_layouts, out_layouts, resource_env,
 								                   donated_invars, keep_unused, inline):
-												Fix nondeterminism issue with ordered effects

											
										
										
											2023-02-23 16:03:00 -08:00
+								  effects = list(ctx.tokens_in.effects())
-												Don't wrap singleton ir.Types during HLO lowering.

This is similar to https://github.com/google/jax/pull/22211, but for MLIR types instead of MLIR values.

											
										
										
											2024-07-03 16:38:18 -04:00
+								  output_types = map(mlir.aval_to_ir_type, ctx.avals_out)
-												Fix debugging primitives for pjit. This came up during jit/pjit merge

PiperOrigin-RevId: 501710198

											
										
										
											2023-01-12 17:40:06 -08:00
+								  output_types = [mlir.token_type()] * len(effects) + output_types
-												Don't wrap singleton ir.Types during HLO lowering.

This is similar to https://github.com/google/jax/pull/22211, but for MLIR types instead of MLIR values.

											
										
										
											2024-07-03 16:38:18 -04:00
+								  flat_output_types = mlir.flatten_ir_types(output_types)
-												For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the tracing time.

PiperOrigin-RevId: 532155068

											
										
										
											2023-05-15 10:31:38 -07:00
-												[Try again] For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the lowering time.

Reverts 4a5c6f82009dee9c30ca4a85359a702d745ed035

PiperOrigin-RevId: 577974380

											
										
										
											2023-10-30 15:27:17 -07:00
+								  func = _pjit_cached_lower_jaxpr_to_fun(
 								      ctx, name, jaxpr, tuple(effects), in_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      out_shardings, in_layouts, out_layouts,
 								      api_name=('jit' if resource_env is None else 'pjit'))
-												[Try again] For nested pjit's cache the generation of StableHLO if it satifies the key. This should help in improving the lowering time.

Reverts 4a5c6f82009dee9c30ca4a85359a702d745ed035

PiperOrigin-RevId: 577974380

											
										
										
											2023-10-30 15:27:17 -07:00
-												Fix nondeterminism issue with ordered effects

											
										
										
											2023-02-23 16:03:00 -08:00
+								  tokens_in = [ctx.tokens_in.get(eff) for eff in effects]
 								  args = (*ctx.dim_var_values, *tokens_in, *args)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  call = func_dialect.CallOp(flat_output_types,
 								                             ir.FlatSymbolRefAttr.get(func.name.value),
-												Don't wrap singleton ir.Values with tuples during HLO lowering.

In general a JAX value might correspond to multiple HLO values, which is why the HLO lowering represents each value as a tuple of zero or more ir.Values. However, the common case is that there is exactly one value, and almost all such lists are singletons.

To reduce the number of singleton list and tuple objects allocated during MLIR lowering, instead represent singleton values as unwrapped ir.Values, and only use a tuple if there is not exactly one ir.Value backing a JAX value.

											
										
										
											2024-07-01 08:42:48 -04:00
+								                             mlir.flatten_ir_values(args))
-												Support eager unified memory computations

PiperOrigin-RevId: 638073121

											
										
										
											2024-05-28 16:58:33 -07:00
+								  mlir.wrap_compute_type_in_place(ctx, call)
-												Don't wrap singleton ir.Types during HLO lowering.

This is similar to https://github.com/google/jax/pull/22211, but for MLIR types instead of MLIR values.

											
										
										
											2024-07-03 16:38:18 -04:00
+								  out_nodes = mlir.unflatten_ir_values_like_types(call.results, output_types)
-												Fix debugging primitives for pjit. This came up during jit/pjit merge

PiperOrigin-RevId: 501710198

											
										
										
											2023-01-12 17:40:06 -08:00
+								  tokens, out_nodes = split_list(out_nodes, [len(effects)])
 								  tokens_out = ctx.tokens_in.update_tokens(mlir.TokenSet(zip(effects, tokens)))
 								  ctx.set_tokens_out(tokens_out)
 								  return out_nodes
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								mlir.register_lowering(pjit_p, _pjit_lowering)
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								def _pjit_batcher(spmd_axis_name, axis_size, axis_name, main_type,
 								                  vals_in, dims_in, jaxpr, in_shardings, out_shardings,
 								                  in_layouts, out_layouts, resource_env, donated_invars, name,
 								                  keep_unused, inline):
-												Indirectify ragged axes across jitting boundaries, input- and output-side.

Also propagate DShapedArray through at least the simple cases of
shardings that show up in test cases.

Co-authored-by: Alexey Radul <axch@google.com>

											
										
										
											2023-06-30 14:34:48 -07:00
+								  segment_lens, dims_in = batching.indirectify_ragged_axes(dims_in)
-												Add batch_jaxpr2 which tells the caller where batch dims are.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 501746795

											
										
										
											2023-01-12 21:16:18 -08:00
+								  new_jaxpr, axes_out = batching.batch_jaxpr2(
-												Plumb spmd_axis_name through batch_jaxpr2 and batch_jaxpr

PiperOrigin-RevId: 509341618

											
										
										
											2023-02-13 14:57:50 -08:00
+								      jaxpr, axis_size, dims_in, axis_name=axis_name,
 								      spmd_axis_name=spmd_axis_name, main_type=main_type)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								  if resource_env is not None:
 								    mesh = resource_env.physical_mesh
 								  else:
 								    mesh = None
-												Indirectify ragged axes across jitting boundaries, input- and output-side.

Also propagate DShapedArray through at least the simple cases of
shardings that show up in test cases.

Co-authored-by: Alexey Radul <axch@google.com>

											
										
										
											2023-06-30 14:34:48 -07:00
+								  # TODO(axch): prepend with Nones (?) to account for new segment_lens inputs
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  in_shardings = tuple(
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								      _pjit_batcher_for_sharding(i, axis_in, spmd_axis_name, mesh, aval.ndim)
-												Add batch_jaxpr2 which tells the caller where batch dims are.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 501746795

											
										
										
											2023-01-12 21:16:18 -08:00
+								      if axis_in is not None else i
 								      for axis_in, i, aval in zip(dims_in, in_shardings, new_jaxpr.in_avals))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  out_shardings = tuple(
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								      _pjit_batcher_for_sharding(o, axis_out, spmd_axis_name, mesh, aval.ndim)
-												Add batch_jaxpr2 which tells the caller where batch dims are.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 501746795

											
										
										
											2023-01-12 21:16:18 -08:00
+								      if axis_out is not None else o
 								      for axis_out, o, aval in zip(axes_out, out_shardings, new_jaxpr.out_avals))
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  # TODO(yashkatariya): Figure out layouts should change under vmap.
 								  if not (all(l is None for l in in_layouts) and
 								          all(l is None for l in out_layouts)):
-												Make the vmap(jit) or vmap(wsc) with a concrete layout error more informative

PiperOrigin-RevId: 656176702

											
										
										
											2024-07-25 18:31:50 -07:00
+								    raise NotImplementedError(
 								        'Concrete layouts are not supported for vmap(jit).')
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  vals_out = pjit_p.bind(
 								    *vals_in,
 								    jaxpr=new_jaxpr,
 								    in_shardings=in_shardings,
 								    out_shardings=out_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								    in_layouts=in_layouts,
 								    out_layouts=out_layouts,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    resource_env=resource_env,
 								    donated_invars=donated_invars,
 								    name=name,
 								    keep_unused=keep_unused,
 								    inline=inline)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
-												jax.jit now works correctly if both donate_argnums and donate_argnames are specified.

Update the docstring and changelog too to mention `donate_argnames`.

PiperOrigin-RevId: 548223395

											
										
										
											2023-07-14 14:27:29 -07:00
+								  resolved_axes_out = batching.resolve_ragged_axes_against_inputs_outputs(
-												Indirectify ragged axes across jitting boundaries, input- and output-side.

Also propagate DShapedArray through at least the simple cases of
shardings that show up in test cases.

Co-authored-by: Alexey Radul <axch@google.com>

											
										
										
											2023-06-30 14:34:48 -07:00
+								      vals_in, vals_out, axes_out)
-												jax.jit now works correctly if both donate_argnums and donate_argnames are specified.

Update the docstring and changelog too to mention `donate_argnames`.

PiperOrigin-RevId: 548223395

											
										
										
											2023-07-14 14:27:29 -07:00
+								  return vals_out, resolved_axes_out
-												Add batch_jaxpr2 which tells the caller where batch dims are.

Co-authored-by: Matthew Johnson <mattjj@google.com>
PiperOrigin-RevId: 501746795

											
										
										
											2023-01-12 21:16:18 -08:00
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								batching.spmd_axis_primitive_batchers[pjit_p] = _pjit_batcher
 								batching.axis_primitive_batchers[pjit_p] = partial(_pjit_batcher, None)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def _pjit_batcher_for_sharding(
-												Deprecate `XLACompatibleSharding` in favor of `jax.sharding.Sharding`.

PiperOrigin-RevId: 640544939

											
										
										
											2024-06-05 09:06:36 -07:00
+								    s: sharding.Sharding | UnspecifiedValue,
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								    dim: int, spmd_axis_name: tuple[str, ...] | None, mesh, ndim: int):
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if is_unspecified(s):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    return s
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								  hlo_s = s._to_xla_hlo_sharding(ndim)  # type: ignore
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								  if spmd_axis_name is None:
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								    if sharding_impls.is_op_sharding_replicated(hlo_s):
-												Fix pjit + vmap when `device` is passed as an argument to pjit/jit

PiperOrigin-RevId: 529155035

											
										
										
											2023-05-03 11:54:46 -07:00
+								      return s
-												Introduce `jax.sharding.AbstractMesh(shape_tuple: tuple[tuple[str, int], ...])` and allow `with_sharding_constraint` and `shard_map` to accept an abstract mesh as input (`with_sharding_constraint` is via `NamedSharding(abstract_mesh, pspec)`).

**Semantics**

Inside jit, we don't need to talk about concrete devices ever so the semantics stay the same as today i.e. we can lower a NamedSharding with abstract mesh with only mesh axis names and sizes and PartitionSpec. The only restriction is that the number of devices need to be consistent throughout the program when we are tracing.
During compilation, the order of devices throughout the program needs to be consistent (same as before this change).

Outside jit i.e. eager mode, if a `shard_map` or `with_sharding_constraint` contains AbstractMesh, then the input to those primitives should contain a concrete Mesh with the same shape and names as the abstract mesh.

**Why do this?**

There are cases, where you want the change the devices in the mesh but keep the mesh shape the same (axis names and axis sizes). But this leads to a device mismatch error if you have `with_sharding_constraint` or `shard_map` in your computation because they embed concrete devices in their signature.

So to fix the error, you need to change the mesh in `wsc` and `shmap` which will lead to a tracing cache miss (because function id is now different) and consequently a lowering to stableHLO cache miss. Explaining via an example:

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(mesh1, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # DEVICE MISMATCH ERROR!
```

The same problem exists for `shard_map` since it takes a mesh with concrete devices in it's signature.

**Okay, so how do you fix this?**

As mentioned above, we need the above program to work and get tracing and lowering cache hits (**cache hits is the most important** part here)

The approach in this change, allows `with_sharding_constraint` to accept a `NamedSharding(abstract_mesh, pspec)` as input. This leads to no errors downstream and we get tracing and lowering cache hits since we don't encode the concrete devices anymore. Just the axis_names and axis_size of the mesh.

**The important part is that the concrete device information should only come from the arguments. Inside `jax.jit`, you should never reference concrete devices ever.**

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(abstract_mesh, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

**One caveat is that this only works with `jax.NamedSharding` but that's fine because `NamedSharding` is the most used `Sharding` in JAX.**

**What about `shard_map`?**

shard_map's signature will be: `shmap(f, mesh: Mesh | AbstractMesh, in_specs: Specs, out_specs: Specs)`.

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = shard_map(lambda x: x, mesh=abstract_mesh, in_specs=P('x'), out_specs=P('x'))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

This is a fully backwards change. So your current code will continue to work as is but you can opt-into this new behavior and get all the benefits!

PiperOrigin-RevId: 662670932

											
										
										
											2024-08-13 15:17:30 -07:00
+								    if isinstance(s, NamedSharding) and isinstance(s.mesh, AbstractMesh):
 								      parsed_pspec = s._parsed_pspec.insert_axis_partitions(dim, None)
 								      return NamedSharding._from_parsed_pspec(s.mesh, parsed_pspec)
-												Removed noop # type: ignore comments

mypy should now flag these by default.

											
										
										
											2024-05-17 09:46:36 +01:00
+								    new_op = hlo_s.to_proto().clone()
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    tad = list(new_op.tile_assignment_dimensions)
 								    tad.insert(dim, 1)
 								    new_op.tile_assignment_dimensions = tad
-												Bump minimum jaxlib version to 0.4.27

xla_extension_version is 261 and mlir_api_version is 56

PiperOrigin-RevId: 631579739

											
										
										
											2024-05-07 16:06:48 -07:00
+								    new_gs = GSPMDSharding(
 								        s._device_assignment, new_op,  # type: ignore
 								        _device_list=getattr(s, '_internal_device_list', None))
-												Removed noop # type: ignore comments

mypy should now flag these by default.

											
										
										
											2024-05-17 09:46:36 +01:00
+								    return pxla._get_out_sharding_from_orig_sharding([new_gs], [None], s, None)[0]
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  else:
-												Introduce `jax.sharding.AbstractMesh(shape_tuple: tuple[tuple[str, int], ...])` and allow `with_sharding_constraint` and `shard_map` to accept an abstract mesh as input (`with_sharding_constraint` is via `NamedSharding(abstract_mesh, pspec)`).

**Semantics**

Inside jit, we don't need to talk about concrete devices ever so the semantics stay the same as today i.e. we can lower a NamedSharding with abstract mesh with only mesh axis names and sizes and PartitionSpec. The only restriction is that the number of devices need to be consistent throughout the program when we are tracing.
During compilation, the order of devices throughout the program needs to be consistent (same as before this change).

Outside jit i.e. eager mode, if a `shard_map` or `with_sharding_constraint` contains AbstractMesh, then the input to those primitives should contain a concrete Mesh with the same shape and names as the abstract mesh.

**Why do this?**

There are cases, where you want the change the devices in the mesh but keep the mesh shape the same (axis names and axis sizes). But this leads to a device mismatch error if you have `with_sharding_constraint` or `shard_map` in your computation because they embed concrete devices in their signature.

So to fix the error, you need to change the mesh in `wsc` and `shmap` which will lead to a tracing cache miss (because function id is now different) and consequently a lowering to stableHLO cache miss. Explaining via an example:

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(mesh1, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # DEVICE MISMATCH ERROR!
```

The same problem exists for `shard_map` since it takes a mesh with concrete devices in it's signature.

**Okay, so how do you fix this?**

As mentioned above, we need the above program to work and get tracing and lowering cache hits (**cache hits is the most important** part here)

The approach in this change, allows `with_sharding_constraint` to accept a `NamedSharding(abstract_mesh, pspec)` as input. This leads to no errors downstream and we get tracing and lowering cache hits since we don't encode the concrete devices anymore. Just the axis_names and axis_size of the mesh.

**The important part is that the concrete device information should only come from the arguments. Inside `jax.jit`, you should never reference concrete devices ever.**

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(abstract_mesh, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

**One caveat is that this only works with `jax.NamedSharding` but that's fine because `NamedSharding` is the most used `Sharding` in JAX.**

**What about `shard_map`?**

shard_map's signature will be: `shmap(f, mesh: Mesh | AbstractMesh, in_specs: Specs, out_specs: Specs)`.

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = shard_map(lambda x: x, mesh=abstract_mesh, in_specs=P('x'), out_specs=P('x'))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

This is a fully backwards change. So your current code will continue to work as is but you can opt-into this new behavior and get all the benefits!

PiperOrigin-RevId: 662670932

											
										
										
											2024-08-13 15:17:30 -07:00
+								    if isinstance(s, NamedSharding) and isinstance(s.mesh, AbstractMesh):
 								      parsed_pspec = s._parsed_pspec.insert_axis_partitions(dim, spmd_axis_name)
 								      return NamedSharding._from_parsed_pspec(s.mesh, parsed_pspec)
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								    if isinstance(s, NamedSharding):
-												Removed noop # type: ignore comments

mypy should now flag these by default.

											
										
										
											2024-05-17 09:46:36 +01:00
+								      mesh = s.mesh
-												Raise a good error message when mesh is not provided to jax.jit when using spmd_axis_name parameter of jax.vmap

PiperOrigin-RevId: 561217612

											
										
										
											2023-08-29 20:58:20 -07:00
+								    if mesh is None or mesh.empty:
 								      raise ValueError(
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								          'If you are using spmd_axis_name parameter of jax.vmap,'
-												Raise a good error message when mesh is not provided to jax.jit when using spmd_axis_name parameter of jax.vmap

PiperOrigin-RevId: 561217612

											
										
										
											2023-08-29 20:58:20 -07:00
+								          ' please make sure to run your jitted function inside the mesh'
 								          ' context manager. Only `jax.lax.with_sharding_constraint` with'
 								          ' `jax.sharding.NamedSharding` as an input can be transformed with'
 								          ' spmd_axis_name batching rules outside of an explicit mesh context'
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								          f' manager scope{s!r}')
-												Removed noop # type: ignore comments

mypy should now flag these by default.

											
										
										
											2024-05-17 09:46:36 +01:00
+								    parsed_pspec = parse_flatten_op_sharding(hlo_s, mesh)[0]
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								    parsed_pspec = parsed_pspec.insert_axis_partitions(dim, spmd_axis_name)
-												Remove the canonicalization to GSPMDSharding internally in jit. This is not required anymore since the caches are split into tracing, lowering and compilation.

The canonicalization doesn't provide any value anymore and only makes the internals more complicated.

The canonicalization can be done by lowering to HloSharding in places where required and there are utilities to help with that.

PiperOrigin-RevId: 619292757

											
										
										
											2024-03-26 13:28:03 -07:00
+								    return NamedSharding._from_parsed_pspec(mesh, parsed_pspec)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								def _pjit_jvp(primals_in, tangents_in,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								              jaxpr, in_shardings, out_shardings, in_layouts, out_layouts,
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								              resource_env, donated_invars, name, keep_unused, inline):
-												Add a zeros rule for mutable arrays and test it using a custom vjp.

add jit compatibility (have pjit jvp instantiate all ref tangents)

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-04-04 14:33:06 -04:00
+								  if any(isinstance(c, core.MutableArray) for c in jaxpr.consts):
 								    jaxpr, mut_primals = pxla._move_mutable_consts(jaxpr)
 								    mut_tangents = map(ad_util.zeros_like_jaxval, mut_primals)
 								    primals_in = [*primals_in, *mut_primals]
 								    tangents_in = [*tangents_in, *mut_tangents]
 								    in_shardings = (*in_shardings,) + (UNSPECIFIED,) * len(mut_primals)
-												fix

											
										
										
											2024-04-12 14:25:38 -07:00
+								    in_layouts = (*in_layouts,) + (None,) * len(mut_primals)
-												Add a zeros rule for mutable arrays and test it using a custom vjp.

add jit compatibility (have pjit jvp instantiate all ref tangents)

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-04-04 14:33:06 -04:00
+								    donated_invars = (*donated_invars,) + (False,) * len(mut_primals)
 								  tangents_in = [ad_util.zeros_like_aval(a) if isinstance(a, AbstractRef) else x
 								                 for x, a in zip(tangents_in, jaxpr.in_avals)]
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  is_nz_tangents_in = [type(t) is not ad.Zero for t in tangents_in]
 								  jaxpr_jvp, is_nz_tangents_out = ad.jvp_jaxpr(
 								      jaxpr, is_nz_tangents_in, instantiate=False)
 								  def _filter_zeros(is_nz_l, l):
 								    return (x for nz, x in zip(is_nz_l, l) if nz)
 								  _filter_zeros_in = partial(_filter_zeros, is_nz_tangents_in)
 								  _filter_zeros_out = partial(_filter_zeros, is_nz_tangents_out)
 								  outputs = pjit_p.bind(
 								      *primals_in, *_filter_zeros_in(tangents_in),
 								      jaxpr=jaxpr_jvp,
 								      in_shardings=(*in_shardings, *_filter_zeros_in(in_shardings)),
 								      out_shardings=(*out_shardings, *_filter_zeros_out(out_shardings)),
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      in_layouts=(*in_layouts, *_filter_zeros_in(in_layouts)),
 								      out_layouts=(*out_layouts, *_filter_zeros_out(out_layouts)),
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      resource_env=resource_env,
 								      donated_invars=(*donated_invars, *_filter_zeros_in(donated_invars)),
-												Fix `name_stack` usage of pjit. Now all the metadata of transformations in hlo are correct.

PiperOrigin-RevId: 501918212

											
										
										
											2023-01-13 12:53:42 -08:00
+								      name=name,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      keep_unused=keep_unused,
 								      inline=inline)
 								  primals_out, tangents_out = split_list(outputs, [len(jaxpr.jaxpr.outvars)])
 								  assert len(primals_out) == len(jaxpr.jaxpr.outvars)
 								  tangents_out_it = iter(tangents_out)
 								  return primals_out, [next(tangents_out_it) if nz else ad.Zero(aval)
 								                       for nz, aval in zip(is_nz_tangents_out, jaxpr.out_avals)]
 								ad.primitive_jvps[pjit_p] = _pjit_jvp
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
+								@weakref_lru_cache
 								def _known_jaxpr_fwd(known_jaxpr: core.ClosedJaxpr,
-												typing: fix incorrect tuple annotations

											
										
										
											2024-02-23 10:23:31 -08:00
+								                     in_fwd: tuple[int | None, ...]) -> core.ClosedJaxpr:
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
+								  updated_jaxpr = known_jaxpr.jaxpr.replace(
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								      outvars=[x for x, i in zip(known_jaxpr.jaxpr.outvars, in_fwd)
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
+								               if i is None])
 								  return known_jaxpr.replace(jaxpr=updated_jaxpr)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								def _pjit_partial_eval(trace, *in_tracers,
 								                       jaxpr, in_shardings, out_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                       in_layouts, out_layouts, resource_env, donated_invars,
 								                       name, keep_unused, inline):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  in_pvals = [t.pval for t in in_tracers]
 								  known_ins = tuple(pv.is_known() for pv in in_pvals)
 								  unknown_ins = tuple(not k for k in known_ins)
-												fix residual forwarding bug, fixes #20267

											
										
										
											2024-03-15 10:00:27 -07:00
+								  known_jaxpr, unknown_jaxpr, unknown_outs, res_avals = \
 								      pe.partial_eval_jaxpr_nounits(jaxpr, unknown_ins, instantiate=False)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  unknown_outs = tuple(unknown_outs)
 								  known_outs = tuple(not uk for uk in unknown_outs)
 								  num_residuals = len(res_avals)
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								  res_shardings = (UNSPECIFIED,) * num_residuals
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  res_layouts = (None,) * num_residuals
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  def keep_where(l, should_keep):
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								    return tuple(x for x, keep in zip(l, should_keep) if keep)
-												fix residual forwarding bug, fixes #20267

											
										
										
											2024-03-15 10:00:27 -07:00
+								  known_out_shardings = keep_where(out_shardings, known_outs) + res_shardings
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  known_out_layouts = keep_where(out_layouts, known_outs) + res_layouts
-												disable optimization

											
										
										
											2024-03-15 10:14:57 -07:00
-												re-enable pjit forwarding optimization, add tests

											
										
										
											2024-03-15 12:09:21 -07:00
+								  # Input-to-output forwarding: compute which outputs are just forwarded inputs.
 								  num_out_primals = len(known_jaxpr.out_avals) - num_residuals
 								  in_fwd: list[int | None] = pe._jaxpr_forwarding(known_jaxpr.jaxpr)
 								  # Only forward primal outputs when corresponding out_sharding is UNSPECIFIED.
 								  in_fwd_primal, in_fwd_res = split_list(in_fwd, [num_out_primals])
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  in_fwd = [
 								      fwd if is_unspecified(os) and ol is None else None
 								      for os, ol, fwd in zip(
 								          keep_where(out_shardings, known_outs),
 								          keep_where(out_layouts, known_outs), in_fwd_primal)
 								  ] + in_fwd_res
-												re-enable pjit forwarding optimization, add tests

											
										
										
											2024-03-15 12:09:21 -07:00
+								  del in_fwd_primal, in_fwd_res
 								  # Prune jaxpr outputs and out_shardings by removing the input-forwards.
 								  keep = [f is None for f in in_fwd]
 								  known_jaxpr = pe.prune_closed_jaxpr_outputs(known_jaxpr, keep)
 								  known_out_shardings = keep_where(known_out_shardings, keep)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  known_out_layouts = keep_where(known_out_layouts, keep)
-												re-enable pjit forwarding optimization, add tests

											
										
										
											2024-03-15 12:09:21 -07:00
+								  # Update num_out_primals to reflect pruning.
 								  kept_primals, kept_res = split_list(keep, [num_out_primals])
 								  num_out_primals = sum(kept_primals)
 								  del keep, kept_primals, kept_res
 								  # Output-to-output forwarding: compute which residuals are just primal outputs
 								  out_vars, res_vars = split_list(known_jaxpr.jaxpr.outvars, [num_out_primals])
 								  idx_map = {id(v): i for i, v in enumerate(out_vars)}
 								  out_fwd = [None] * num_out_primals + [idx_map.get(id(v)) for v in res_vars]
 								  # Prune jaxpr outputs and out_shardings by removing forwarded residuals.
 								  keep = [f is None for f in out_fwd]
 								  known_jaxpr = pe.prune_closed_jaxpr_outputs(known_jaxpr, keep)
 								  known_out_shardings = keep_where(known_out_shardings, keep)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  known_out_layouts = keep_where(known_out_layouts, keep)
-												re-enable pjit forwarding optimization, add tests

											
										
										
											2024-03-15 12:09:21 -07:00
+								  del keep
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  known_params = dict(
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								      jaxpr=known_jaxpr, in_shardings=keep_where(in_shardings, known_ins),
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      out_shardings=known_out_shardings,
 								      in_layouts=keep_where(in_layouts, known_ins),
 								      out_layouts=known_out_layouts, resource_env=resource_env,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      donated_invars=keep_where(donated_invars, known_ins),
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								      name=name, keep_unused=keep_unused, inline=inline)
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
+								  assert len(known_params['out_shardings']) == len(known_params['jaxpr'].out_avals)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  assert len(known_params['out_layouts']) == len(known_params['jaxpr'].out_avals)
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
 								  # Bind known things to pjit_p.
 								  known_inputs = [pv.get_known() for pv in in_pvals if pv.is_known()]
 								  all_known_outs = pjit_p.bind(*known_inputs, **known_params)
-												re-enable pjit forwarding optimization, add tests

											
										
										
											2024-03-15 12:09:21 -07:00
+								  # Add back in the output fwds.
 								  all_known_outs = subs_list(out_fwd, all_known_outs, all_known_outs)
 								  # Add back in the input fwds.
 								  all_known_outs = subs_list(in_fwd, known_inputs, all_known_outs)
-												Add forwarding support to pjit which was introduced as an optimization. The inputs that are forwarded to outputs are pruned from the outputs of a known_jaxpr.

PiperOrigin-RevId: 503559787

											
										
										
											2023-01-20 18:03:24 -08:00
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								  known_out_vals, residual_vals = \
 								      split_list(all_known_outs, [len(all_known_outs) - num_residuals])
-												factor out subs_list and subs_list2

											
										
										
											2023-10-19 00:38:19 -07:00
+								  residual_tracers = map(trace.new_instantiated_const, residual_vals)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												fix residual forwarding bug, fixes #20267

											
										
										
											2024-03-15 10:00:27 -07:00
+								  # The convention of partial_eval_jaxpr_nounits is to place residual binders at
 								  # the front of the jaxpr produced, so we move them to the back since both the
 								  # jaxpr equation built below and the pjit transpose rule assume a
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  # residual-inputs-last convention.
 								  unknown_jaxpr = pe.move_binders_to_back(
 								      unknown_jaxpr, [True] * num_residuals + [False] * sum(unknown_ins))
 								  # Prepare unknown tracers
 								  unknown_params = dict(
 								      jaxpr=unknown_jaxpr,
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								      in_shardings=(keep_where(in_shardings, unknown_ins) + res_shardings),
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      out_shardings=keep_where(out_shardings, unknown_outs),
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      in_layouts=(keep_where(in_layouts, unknown_ins) + res_layouts),
 								      out_layouts=keep_where(out_layouts, unknown_outs),
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      resource_env=resource_env,
 								      donated_invars=(keep_where(donated_invars, unknown_ins) +
 								                      (False,) * num_residuals),
 								      name=name,
 								      keep_unused=keep_unused,
 								      inline=inline)
 								  unknown_tracers_in = [t for t in in_tracers if not t.pval.is_known()]
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  unknown_out_avals = unknown_jaxpr.out_avals
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  unknown_tracers_out = [
 								      pe.JaxprTracer(trace, pe.PartialVal.unknown(aval), None)
-												Make `jit` a thin wrapper around `pjit` which ignores the mesh context manager (just like how it is today)

Pass `None` as the resource_env via `jit` because `jit(pjit)` will ignore the outer mesh because `jit` will set the resource env to empty mesh.

This does not make `jit` and `pjit` the same API but it shares all the code between both the APIs (cpp and python) while preserving the current semantics of both `jit` and `pjit`.

PiperOrigin-RevId: 501707496

											
										
										
											2023-01-12 17:23:55 -08:00
+								      for aval in unknown_out_avals
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  ]
 								  eqn = pe.new_eqn_recipe((*unknown_tracers_in, *residual_tracers),
 								                          unknown_tracers_out,
 								                          pjit_p,
 								                          unknown_params,
 								                          unknown_jaxpr.effects,
-												Initialize JaxprEqnContext only in `new_jaxpr_eqn` and `new_eqn_recipe` with the current active compute type if no ctx is specified.

PiperOrigin-RevId: 636309959

											
										
										
											2024-05-22 15:16:07 -07:00
+								                          source_info_util.current())
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  for t in unknown_tracers_out: t.recipe = eqn
 								  return merge_lists(unknown_outs, known_out_vals, unknown_tracers_out)
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								pe.custom_partial_eval_rules[pjit_p] = _pjit_partial_eval
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								def _pjit_partial_eval_custom_params_updater(
 								    unks_in: Sequence[bool], inst_in: Sequence[bool],
 								    kept_outs_known: Sequence[bool], kept_outs_staged: Sequence[bool],
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								    num_res_out: int, num_res_in: int, params_known: dict, params_staged: dict
-												Use lower-case PEP 585 names for types.

Issue https://github.com/google/jax/issues/16537

PiperOrigin-RevId: 542969282

											
										
										
											2023-06-23 15:11:37 -07:00
+								  ) -> tuple[dict, dict]:
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  # prune inputs to jaxpr_known according to unks_in
 								  donated_invars_known, _ = pe.partition_list(unks_in, params_known['donated_invars'])
 								  in_shardings_known, _ = pe.partition_list(unks_in, params_known['in_shardings'])
 								  _, out_shardings_known = pe.partition_list(kept_outs_known, params_known['out_shardings'])
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  in_layouts_known, _ = pe.partition_list(unks_in, params_known['in_layouts'])
 								  _, out_layouts_known = pe.partition_list(kept_outs_known, params_known['out_layouts'])
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  new_params_known = dict(params_known,
 								                          in_shardings=tuple(in_shardings_known),
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								                          out_shardings=(*out_shardings_known,
 								                                         *[UNSPECIFIED] * num_res_out),
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                          in_layouts=tuple(in_layouts_known),
 								                          out_layouts=(*out_layouts_known, *[None] * num_res_out),
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								                          donated_invars=tuple(donated_invars_known))
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  assert len(new_params_known['in_shardings']) == len(params_known['jaxpr'].in_avals)
 								  assert len(new_params_known['out_shardings']) == len(params_known['jaxpr'].out_avals)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  assert len(new_params_known['in_layouts']) == len(params_known['jaxpr'].in_avals)
 								  assert len(new_params_known['out_layouts']) == len(params_known['jaxpr'].out_avals)
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
 								  # added num_res new inputs to jaxpr_staged, and pruning according to inst_in
 								  _, donated_invars_staged = pe.partition_list(inst_in, params_staged['donated_invars'])
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								  donated_invars_staged = [False] * num_res_in + donated_invars_staged
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  _, in_shardings_staged = pe.partition_list(inst_in, params_staged['in_shardings'])
-												output res forwarding optimization for shard_map and jit

											
										
										
											2023-10-12 16:00:08 -07:00
+								  in_shardings_staged = [*[UNSPECIFIED] * num_res_in, *in_shardings_staged]
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  _, out_shardings_staged = pe.partition_list(kept_outs_staged, params_staged['out_shardings'])
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  _, in_layouts_staged = pe.partition_list(inst_in, params_staged['in_layouts'])
 								  in_layouts_staged = [*[None] * num_res_in, *in_layouts_staged]
 								  _, out_layouts_staged = pe.partition_list(kept_outs_staged, params_staged['out_layouts'])
-												Add in_positional_semantics to new_params_known and new_params_staged otherwise it leads to length mismatch error down the stack. It is similar to donated_invars and in_shardings.

PiperOrigin-RevId: 502082828

											
										
										
											2023-01-14 10:18:28 -08:00
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  new_params_staged = dict(params_staged,
 								                           in_shardings=tuple(in_shardings_staged),
 								                           out_shardings=tuple(out_shardings_staged),
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                           in_layouts=tuple(in_layouts_staged),
 								                           out_layouts=tuple(out_layouts_staged),
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								                           donated_invars=tuple(donated_invars_staged))
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  assert len(new_params_staged['in_shardings']) == len(params_staged['jaxpr'].in_avals)
 								  assert len(new_params_staged['out_shardings']) == len(params_staged['jaxpr'].out_avals)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  assert len(new_params_staged['in_layouts']) == len(params_staged['jaxpr'].in_avals)
 								  assert len(new_params_staged['out_layouts']) == len(params_staged['jaxpr'].out_avals)
-												Make pickle_test.py pass with jit/pjit api merge. Also rename and move some functions around

PiperOrigin-RevId: 501878555

											
										
										
											2023-01-13 10:15:30 -08:00
+								  return new_params_known, new_params_staged
 								pe.partial_eval_jaxpr_custom_rules[pjit_p] = \
 								    partial(pe.closed_call_partial_eval_custom_rule, 'jaxpr',
 								            _pjit_partial_eval_custom_params_updater)
-												Cache the creation of ClosedJaxpr in pjit_transpose which if not cached breaks the compilation cache.

PiperOrigin-RevId: 504304311

											
										
										
											2023-01-24 09:57:55 -08:00
+								@lu.cache
-												make mlir arg and result names work with static_argnums/argnames

This is the first step in a revision to how we handle the debug info pertaining
to staged functions' parameter names and result pytree paths. To limit
complexity, this first step adds machinery required to make our MLIR lowerings'
parameter and result names work, but it does *not* yet unify it with existing
arg-name machinery used at tracing time (in partial_eval.py, e.g.
partial_eval.DebugInfo etc). That unification will come in a follow up commits.
(I wrote the unified version first, then broke it down into this sequence of
commits.)

Another thing that will arrive in follow-up commits is pmap support (handling
static_broadcasted_argnames). This PR doesn't include support for pmap because
pmap's final style implementation requires slightly different machinery than
jit/pjit's initial style implementation. Indeed this PR removes the previous
support for pmap arg/result info, and skips the corresponding tests, because
the previous support didn't handle pmap's static_broadcasted_argnums (and I
think it could even lead to silently incorrect annotations when pmap was not at
the top-level, though I didn't work out an example case to be sure that was
possible).

This commit includes the changes from PR #15079, so that PR should be merged first.

Here's the _why_ of this change:
* The pre-existing solution (from PRs #14702, #14764, and #14813) did not
  handle static_argnums or static_argnames correctly. Instead it would fail,
  resulting in debug info being dropped from the jaxpr and ultimately the MLIR
  computation (but no Exception raised). We need to handle
  static_argnums/argnames because while the corresponding parameters remain on
  the Python callable signature, they are excluded from the args/kwargs
  pytrees; the previous solution didn't account for that divergence.
* The best way to handle static_argnums/argnames is to work out this debug info
  when we still have the original args/kwargs in hand, i.e. much earlier than
  the previous mechanism. We then just have to pass this debug info to the
  right places. Indeed we often already had to work out some debug-related
  information at these call sites (e.g. whether the function is being staged
  out for jit, or scan, or whatever), so after this change we're working out
  all the debug info at the same time.
* A side benefit is that now to get this debug info we no longer need to
  unflatten user pytree defs with dummy objects (to reconstruct dummy
  args/kwargs trees so that we can call inspect.signature(fun).bind), since we
  just use the original args/kwargs instead. Since some user pytree node types
  are not fully polymorphic in their element types (e.g. their __init__ methods
  sometimes contained assertions about their elements' shapes, expecting them
  to be arrays), that means the new mechanism is fundamentally more compatible
  with custom pytree node types.

More concretely, effecting those high-level changes led to:
* replacing the previous `core.DebugInfo` with a class `core.JaxprDebugInfo`,
  which in addition to the more precise name has fields like
  `arg_names: Tuple[Optional[str], ...]` and
  `result_paths: Tuple[Optional[str], ...]`, rather than
  `in_tree: Optional[PyTreeDef]`, reflecting the fact that we work out the
  actual debug info more eagerly than before and we don't need pytrees for
  dummy-unflattening;
* introducing the new `partial_eval.TracingDebugInfo` class representing the
  debug info about inputs which we have available at tracing time; in a
  follow-up PR, we'll adapt partial_eval.py to use this new class and we'll
  delete `partial_eval.DebugInfo` and its corresponding helper methods (not
  done in this commit just to reduce complexity of each change);
* moving the old `core.DebugInfo`, which before #14702 lived in
  partial_eval.py, back to partial_eval.py pending cleanup (deletion) of that
  partial_eval.py debug info code;
* making specific jaxpr-processing functions produce an appropriately updated
  `core.JaxprDebugInfo` object for their output (e.g. `pe.dce_jaxpr` prunes
  elements from the `arg_names` field), maintaining now-checked invariants like
  a Jaxpr's `debug_info` should have the same number of argument names as the
  jaxpr has invars (the jaxpr-processing functions updated here are enough for
  top-level jit jaxprs to have debug info attached, handling the original
  intended use case of jit(f).lower, but not e.g. grad-of-jit cases, which can
  be handled later by updating `ad.jvp_jaxpr` and the like to produce updated
  debug info on their outputs);
* add some tests for static_argnums/static_argnames.

Phew! Can't wait to land those follow-ups too :P

											
										
										
											2023-03-17 17:45:41 -07:00
+								def _pjit_transpose_trace(fun, in_avals):
-												add getstate/setstate in pjit transpose, for bwd pass effects

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-01-29 15:18:33 -08:00
+								  transpose_jaxpr, _, consts, attrs_tracked = pe.trace_to_jaxpr_dynamic(
 								      fun, in_avals)
-												Cache the creation of ClosedJaxpr in pjit_transpose which if not cached breaks the compilation cache.

PiperOrigin-RevId: 504304311

											
										
										
											2023-01-24 09:57:55 -08:00
+								  transpose_jaxpr = core.ClosedJaxpr(transpose_jaxpr, consts)
-												add getstate/setstate in pjit transpose, for bwd pass effects

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-01-29 15:18:33 -08:00
+								  return transpose_jaxpr, attrs_tracked
-												Cache the creation of ClosedJaxpr in pjit_transpose which if not cached breaks the compilation cache.

PiperOrigin-RevId: 504304311

											
										
										
											2023-01-24 09:57:55 -08:00
-												[xmap-removal] remove reduce_axes from grad / vjp / backward_pass

The reduce_axes machinery was planned to be used for xmap. It's not needed for
e.g. shard_map, see https://jax.readthedocs.io/en/latest/jep/17111-shmap-transpose.html.

											
										
										
											2024-02-24 16:11:41 -08:00
+								def _pjit_transpose(cts_in, *primals_in,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								                    jaxpr, in_shardings, out_shardings, in_layouts, out_layouts,
-												Clean up pjit after jax.Array

* Remove {in|out}_positional_semantics from pjit_p.bind
* Remove `in_is_global` from lower_sharding_computation
* Remove local_to_global and global_to_local
* Clean up some arguments of sharded_lowering since they are not needed

PiperOrigin-RevId: 517469390

											
										
										
											2023-03-17 11:50:59 -07:00
+								                    resource_env, donated_invars, name, keep_unused, inline):
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  def prune_type(ty, xs, maybe_zeros):
 								    return tuple(x for x, mz in zip(xs, maybe_zeros) if type(mz) is not ty)
 								  body = lu.wrap_init(ad.closed_backward_pass)
-												[xmap-removal] remove reduce_axes from grad / vjp / backward_pass

The reduce_axes machinery was planned to be used for xmap. It's not needed for
e.g. shard_map, see https://jax.readthedocs.io/en/latest/jep/17111-shmap-transpose.html.

											
										
										
											2024-02-24 16:11:41 -08:00
+								  body = lu.hashable_partial(body, jaxpr, False)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  primals_and_nz_cts_in, in_treedef = tree_flatten((primals_in, cts_in))
 								  body, cts_out_treedef_thunk = flatten_fun_nokwargs(body, in_treedef)
 								  transpose_in_shardings = (
 								    *prune_type(ad.UndefinedPrimal, in_shardings, primals_in),
 								    *prune_type(ad.Zero, out_shardings, cts_in)
 								  )
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  transpose_in_layouts = (
 								    *prune_type(ad.UndefinedPrimal, in_layouts, primals_in),
 								    *prune_type(ad.Zero, out_layouts, cts_in)
 								  )
-												Cache the creation of ClosedJaxpr in pjit_transpose which if not cached breaks the compilation cache.

PiperOrigin-RevId: 504304311

											
										
										
											2023-01-24 09:57:55 -08:00
+								  global_cts_in_avals = tuple(core.raise_to_shaped(core.get_aval(ct))
 								                              for ct in primals_and_nz_cts_in)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												add getstate/setstate in pjit transpose, for bwd pass effects

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-01-29 15:18:33 -08:00
+								  transpose_jaxpr, attrs_tracked = _pjit_transpose_trace(
 								      body, global_cts_in_avals)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  cts_out_treedef = cts_out_treedef_thunk()
 								  transpose_out_shardings = prune_type(
 								      ad.Zero,
 								      in_shardings,
 								      tree_unflatten(cts_out_treedef, [object()] * cts_out_treedef.num_leaves))
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  transpose_out_layouts = prune_type(
 								      ad.Zero,
 								      in_layouts,
 								      tree_unflatten(cts_out_treedef, [object()] * cts_out_treedef.num_leaves))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												add getstate/setstate in pjit transpose, for bwd pass effects

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-01-29 15:18:33 -08:00
+								  if attrs_tracked:
 								    init_states =  _get_states(attrs_tracked)
 								    primals_and_nz_cts_in = [*init_states, *primals_and_nz_cts_in]
 								    transpose_in_shardings = (UNSPECIFIED,) * len(attrs_tracked) + transpose_in_shardings
 								    transpose_out_shardings = (UNSPECIFIED,) * len(attrs_tracked) + transpose_out_shardings
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								    transpose_in_layouts = (None,) * len(attrs_tracked) + transpose_in_layouts
 								    transpose_out_layouts = (None,) * len(attrs_tracked) + transpose_out_layouts
-												add getstate/setstate in pjit transpose, for bwd pass effects

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-01-29 15:18:33 -08:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  nz_cts_out = pjit_p.bind(
 								      *primals_and_nz_cts_in,
 								      jaxpr=transpose_jaxpr,
 								      in_shardings=transpose_in_shardings,
 								      out_shardings=transpose_out_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      in_layouts=transpose_in_layouts,
 								      out_layouts=transpose_out_layouts,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      resource_env=resource_env,
 								      donated_invars=(False,) * len(primals_and_nz_cts_in),
 								      name=name,
 								      keep_unused=keep_unused,
 								      inline=inline)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
-												add getstate/setstate in pjit transpose, for bwd pass effects

Co-authored-by: Roy Frostig <frostig@google.com>

											
										
										
											2024-01-29 15:18:33 -08:00
+								  if attrs_tracked:
 								    final_states, nz_cts_out = split_list(nz_cts_out, [len(init_states)])
 								    _set_states(attrs_tracked, final_states)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return tree_unflatten(cts_out_treedef, nz_cts_out)
 								ad.reducing_transposes[pjit_p] = _pjit_transpose
-												Add dce_rules for pjit primitive so that remat can DCE through the pjit primitive and remove unused residuals

PiperOrigin-RevId: 504123801

											
										
										
											2023-01-23 17:31:33 -08:00
+								@weakref_lru_cache
 								def _dce_jaxpr_pjit(
-												typing: fix incorrect tuple annotations

											
										
										
											2024-02-23 10:23:31 -08:00
+								    jaxpr: core.ClosedJaxpr, used_outputs: tuple[bool, ...]
-												Use lower-case PEP 585 names for types.

Issue https://github.com/google/jax/issues/16537

PiperOrigin-RevId: 542969282

											
										
										
											2023-06-23 15:11:37 -07:00
+								) -> tuple[core.ClosedJaxpr, list[bool]]:
-												Add dce_rules for pjit primitive so that remat can DCE through the pjit primitive and remove unused residuals

PiperOrigin-RevId: 504123801

											
										
										
											2023-01-23 17:31:33 -08:00
+								  new_jaxpr, used_inputs = pe.dce_jaxpr(jaxpr.jaxpr, used_outputs)
 								  return core.ClosedJaxpr(new_jaxpr, jaxpr.consts), used_inputs
-												Use lower-case PEP 585 names for types.

Issue https://github.com/google/jax/issues/16537

PiperOrigin-RevId: 542969282

											
										
										
											2023-06-23 15:11:37 -07:00
+								def dce_jaxpr_pjit_rule(used_outputs: list[bool], eqn: core.JaxprEqn
-												Upgrade remaining sources to Python 3.9

This PR is a follow up to #18881.

The changes were generated by adding

    from __future__ import annotations

to the files which did not already have them and running

    pyupgrade --py39-plus --keep-percent-format {jax,tests,jaxlib,examples,benchmarks}/**/*.py

											
										
										
											2023-12-11 13:59:29 +00:00
+								                        ) -> tuple[list[bool], core.JaxprEqn | None]:
-												Add dce_rules for pjit primitive so that remat can DCE through the pjit primitive and remove unused residuals

PiperOrigin-RevId: 504123801

											
										
										
											2023-01-23 17:31:33 -08:00
+								  dced_jaxpr, used_inputs = _dce_jaxpr_pjit(
 								      eqn.params['jaxpr'], tuple(used_outputs))
 								  def keep_where(xs, keeps):
-												[dynamic-shapes] make dynamic shape staging-to-jaxpr work with pjit

											
										
										
											2023-03-22 20:54:45 -07:00
+								    return tuple(x for x, keep in zip(xs, keeps) if keep)
-												Add dce_rules for pjit primitive so that remat can DCE through the pjit primitive and remove unused residuals

PiperOrigin-RevId: 504123801

											
										
										
											2023-01-23 17:31:33 -08:00
 								  eqn_params = eqn.params
 								  new_params = dict(
 								      eqn_params,
 								      jaxpr=dced_jaxpr,
 								      in_shardings=keep_where(eqn_params["in_shardings"], used_inputs),
 								      out_shardings=keep_where(eqn_params["out_shardings"], used_outputs),
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      in_layouts=keep_where(eqn_params["in_layouts"], used_inputs),
 								      out_layouts=keep_where(eqn_params["out_layouts"], used_outputs),
-												Add dce_rules for pjit primitive so that remat can DCE through the pjit primitive and remove unused residuals

PiperOrigin-RevId: 504123801

											
										
										
											2023-01-23 17:31:33 -08:00
+								      donated_invars=keep_where(eqn_params["donated_invars"], used_inputs),
 								  )
 								  if not any(used_inputs) and not any(used_outputs) and not dced_jaxpr.effects:
 								    return used_inputs, None
 								  else:
 								    new_eqn = core.new_jaxpr_eqn(
 								        [v for v, used in zip(eqn.invars, used_inputs) if used],
 								        [v for v, used in zip(eqn.outvars, used_outputs) if used],
-												Initial commit for `jax.experimental.compute_on` API.

The current supported values for compute type is `device_host`, `device`. `device_sparse` will be allowed in follow up CL. Using `device_host` means that the device's PJRT client will be orchestrating the execution of the computation on the host.

`cpu` as a compute_type is reserved for pure CPU only computations without a device's pjrt client orchestrating the computation.

PiperOrigin-RevId: 634909918

											
										
										
											2024-05-17 15:58:25 -07:00
+								        eqn.primitive, new_params, dced_jaxpr.effects, eqn.source_info, eqn.ctx)
-												Add dce_rules for pjit primitive so that remat can DCE through the pjit primitive and remove unused residuals

PiperOrigin-RevId: 504123801

											
										
										
											2023-01-23 17:31:33 -08:00
+								    return used_inputs, new_eqn
 								pe.dce_rules[pjit_p] = dce_jaxpr_pjit_rule
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								def _pjit_pp_rule(eqn, context, settings):
 								  params = dict(eqn.params)
 								  del params['inline']
 								  if not any(params['donated_invars']):
 								    del params['donated_invars']
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if all(is_unspecified(s) for s in params['in_shardings']):
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								    del params['in_shardings']
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  if all(is_unspecified(s) for s in params['out_shardings']):
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								    del params['out_shardings']
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  if all(l is None for l in params['in_layouts']):
 								    del params['in_layouts']
 								  if all(l is None for l in params['out_layouts']):
 								    del params['out_layouts']
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								  if not params['keep_unused']:
 								    del params['keep_unused']
 								  if (params['resource_env'] is None or
 								      params['resource_env'].physical_mesh.empty):
 								    del params['resource_env']
-												Print pjit name= before other params

The jaxpr sometimes gets pretty big, making it hard to see the name.

											
										
										
											2023-12-07 15:56:56 +00:00
 								  # Move name= to the front to make the resulting equation easier to scan.
 								  del params["name"]
 								  return core._pp_eqn(eqn, context, settings, params=["name"] + sorted(params))
-												simpler pretty-print for pjit, tweak custom pp rule signature

											
										
										
											2023-02-09 11:02:24 -08:00
+								core.pp_eqn_rules[pjit_p] = _pjit_pp_rule
-												[run_state] add pjit run_state discharge rule and basic test

											
										
										
											2023-10-04 12:57:17 -07:00
+								def _pjit_state_discharge_rule(
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								    in_avals, out_avals, *args, jaxpr, in_shardings, out_shardings,
 								    in_layouts, out_layouts, **params):
-												[run_state] add pjit run_state discharge rule and basic test

											
										
										
											2023-10-04 12:57:17 -07:00
+								  if not (all(map(is_unspecified, in_shardings)) and
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								          all(map(is_unspecified, out_shardings))):
 								    raise NotImplementedError
 								  if not (all(l is None for l in in_layouts) and
 								          all(l is None for l in out_layouts)):
 								    raise NotImplementedError
-												[run_state] add pjit run_state discharge rule and basic test

											
										
										
											2023-10-04 12:57:17 -07:00
+								  jaxpr, consts = jaxpr.jaxpr, jaxpr.consts
 								  num_outs = len(jaxpr.outvars)
 								  discharged_jaxpr, discharged_consts = state_discharge.discharge_state(jaxpr, consts)
 								  discharged_closed_jaxpr = core.ClosedJaxpr(discharged_jaxpr, discharged_consts)
 								  new_in_shardings = (UnspecifiedValue(),) * len(discharged_jaxpr.invars)
 								  new_out_shardings = (UnspecifiedValue(),) * len(discharged_jaxpr.outvars)
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								  new_in_layouts = (None,) * len(discharged_jaxpr.invars)
 								  new_out_layouts = (None,) * len(discharged_jaxpr.outvars)
-												[run_state] add pjit run_state discharge rule and basic test

											
										
										
											2023-10-04 12:57:17 -07:00
+								  out_and_ref_vals = pjit_p.bind(
 								      *args, jaxpr=discharged_closed_jaxpr, in_shardings=new_in_shardings,
-												Add `Layout` support to `jax.jit`.

`jax.jit` now accepts `Layout` instances to the `in_shardings` and `out_shardings` argument. Major changes are just plumbing `in_layouts` and `out_layouts` everywhere.

Note that public api is `Layout(device_local_layout, sharding)` which is how users will pass us the Layout but internally we split them apart into device_local_layout and sharding.

Docs are coming up on how to use the API and what Layouts mean and how to make sense of them (especially on TPU).

PiperOrigin-RevId: 622352537

											
										
										
											2024-04-05 20:08:48 -07:00
+								      out_shardings=new_out_shardings, in_layouts=new_in_layouts,
 								      out_layouts=new_out_layouts, **params)
-												[run_state] add pjit run_state discharge rule and basic test

											
										
										
											2023-10-04 12:57:17 -07:00
+								  out_vals, ref_vals = split_list(out_and_ref_vals, [num_outs])
 								  ref_vals_iter = iter(ref_vals)
-												Add a zeros rule for mutable arrays and test it using a custom vjp.

add jit compatibility (have pjit jvp instantiate all ref tangents)

Co-authored-by: Matt Johnson <mattjj@google.com>

											
										
										
											2024-04-04 14:33:06 -04:00
+								  new_invals = tuple(next(ref_vals_iter) if isinstance(aval, AbstractRef)
-												[run_state] add pjit run_state discharge rule and basic test

											
										
										
											2023-10-04 12:57:17 -07:00
+								                     else None for aval in in_avals)
 								  sentinel = object()
 								  assert next(ref_vals_iter, sentinel) is sentinel
 								  return new_invals, out_vals
 								state_discharge.register_discharge_rule(pjit_p)(_pjit_state_discharge_rule)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								# -------------------- with_sharding_constraint --------------------
-												Remove axis_resources from with_sharding_constraint since it has been 3 months since the deprecation as per the API deprecation policy.

PiperOrigin-RevId: 535687618

											
										
										
											2023-05-26 12:34:32 -07:00
+								def with_sharding_constraint(x, shardings):
-												Document jax.lax.with_sharding_constraint

											
										
										
											2023-04-26 10:19:04 -07:00
+								  """Mechanism to constrain the sharding of an Array inside a jitted computation
 								  This is a strict constraint for the GSPMD partitioner and not a hint. For examples
 								  of how to use this function, see `Distributed arrays and automatic parallelization`_.
 								  Args:
-												Fix typos across the package

											
										
										
											2023-09-22 14:54:31 -07:00
+								    x: PyTree of jax.Arrays which will have their shardings constrained
-												Document jax.lax.with_sharding_constraint

											
										
										
											2023-04-26 10:19:04 -07:00
+								    shardings: PyTree of sharding specifications. Valid values are the same as for
 								      the ``in_shardings`` argument of :func:`jax.experimental.pjit`.
 								  Returns:
 								    x_with_shardings: PyTree of jax.Arrays with specified sharding constraints.
 								  .. _Distributed arrays and automatic parallelization: https://jax.readthedocs.io/en/latest/notebooks/Distributed_arrays_and_automatic_parallelization.html
 								  """
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  x_flat, tree = tree_flatten(x)
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
 								  layouts, shardings = _split_layout_and_sharding(shardings)
-												Remove the unused return from prepare_axis_resources

PiperOrigin-RevId: 621738698

											
										
										
											2024-04-03 22:38:45 -07:00
+								  user_shardings = prepare_axis_resources(
-												Remove axis_resources from with_sharding_constraint since it has been 3 months since the deprecation as per the API deprecation policy.

PiperOrigin-RevId: 535687618

											
										
										
											2023-05-26 12:34:32 -07:00
+								      shardings, "shardings", allow_unconstrained_dims=True)
 								  del shardings
-												Change the `axis_resources` argument of `with_sharding_constraint` to `shardings` to match `pjit` and `jit`.

PiperOrigin-RevId: 509275107

											
										
										
											2023-02-13 10:53:21 -08:00
 								  user_shardings_flat = tuple(
 								      flatten_axes("with_sharding_constraint shardings", tree, user_shardings))
 								  del user_shardings
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								  user_layouts_flat = tuple(
 								      flatten_axes("with_sharding_constraint layouts", tree, layouts))
 								  del layouts
-												Avoid imports from the public jax.* namespace in more places internally.

This change is in preparation for more cycle breaking in the Bazel dependency graph.

PiperOrigin-RevId: 521822756

											
										
										
											2023-04-04 11:41:00 -07:00
+								  resource_env = mesh_lib.thread_resources.env
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  mesh = resource_env.physical_mesh
-												Improve the error raised when wsc is passed a PartitionSpec without a mesh context manager

PiperOrigin-RevId: 529260748

											
										
										
											2023-05-03 19:28:54 -07:00
+								  shardings_flat = [_create_sharding_for_array(mesh, a, 'shardings',
 								                                               'with_sharding_constraint')
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								                    for a in user_shardings_flat]
-												#sdy Support with_sharding_constraint lowering through Shardy.

PiperOrigin-RevId: 655905063

											
										
										
											2024-07-25 04:20:09 -07:00
+								  # TODO(bartchr): remove `unconstrained_dims` after migrating to Shardy. It's
 								  # already part of the shardings.
-												Remove references to jax.config.jax_array, which is always True at head.

PiperOrigin-RevId: 516970232

											
										
										
											2023-03-15 17:08:21 -07:00
+								  unconstrained_dims = [get_unconstrained_dims(s)
 								                        if isinstance(s, NamedSharding) else {}
 								                        for s in shardings_flat]
-												Change the `axis_resources` argument of `with_sharding_constraint` to `shardings` to match `pjit` and `jit`.

PiperOrigin-RevId: 509275107

											
										
										
											2023-02-13 10:53:21 -08:00
+								  del user_shardings_flat
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Improve the shape incompatible error message by adding the argument/result name path to it.

PiperOrigin-RevId: 529605855

											
										
										
											2023-05-04 21:49:28 -07:00
+								  pjit_check_aval_sharding(
 								      shardings_flat, x_flat, None, "with_sharding_constraint arguments",
 								      allow_uneven_sharding=True)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Add check_compatible_aval checks to Layout. It checks if `len(major_to_minor) == len(aval.shape)`.

PiperOrigin-RevId: 651777179

											
										
										
											2024-07-12 08:09:54 -07:00
+								  check_aval_layout_compatibility(user_layouts_flat, x_flat, None,
 								                                  "with_sharding_constraint arguments")
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								  outs = [sharding_constraint_p.bind(xf, sharding=s, layout=l,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								                                     resource_env=resource_env,
 								                                     unconstrained_dims=ud)
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								          for xf, s, l, ud in zip(x_flat, shardings_flat, user_layouts_flat,
 								                                  unconstrained_dims)]
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return tree_unflatten(tree, outs)
-												Add impl rule for with_sharding_constraint so that users can use their functions with and without a jit.

The semantics of eager wsc is the same as within a jit i.e. it will reshard to the given sharding only if the devices are the same and in the same order.

eager wsc won't work as expected with AD transpose because there is no `src` argument to reverse the shardings when transposing and was decided that it is fine for now. jax.device_put should be the API to use for that.

PiperOrigin-RevId: 532858670

											
										
										
											2023-05-17 11:49:31 -07:00
+								def _identity_fn(x): return x
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								def _sharding_constraint_impl(x, sharding, layout, resource_env,
 								                              unconstrained_dims):
-												Introduce `jax.sharding.AbstractMesh(shape_tuple: tuple[tuple[str, int], ...])` and allow `with_sharding_constraint` and `shard_map` to accept an abstract mesh as input (`with_sharding_constraint` is via `NamedSharding(abstract_mesh, pspec)`).

**Semantics**

Inside jit, we don't need to talk about concrete devices ever so the semantics stay the same as today i.e. we can lower a NamedSharding with abstract mesh with only mesh axis names and sizes and PartitionSpec. The only restriction is that the number of devices need to be consistent throughout the program when we are tracing.
During compilation, the order of devices throughout the program needs to be consistent (same as before this change).

Outside jit i.e. eager mode, if a `shard_map` or `with_sharding_constraint` contains AbstractMesh, then the input to those primitives should contain a concrete Mesh with the same shape and names as the abstract mesh.

**Why do this?**

There are cases, where you want the change the devices in the mesh but keep the mesh shape the same (axis names and axis sizes). But this leads to a device mismatch error if you have `with_sharding_constraint` or `shard_map` in your computation because they embed concrete devices in their signature.

So to fix the error, you need to change the mesh in `wsc` and `shmap` which will lead to a tracing cache miss (because function id is now different) and consequently a lowering to stableHLO cache miss. Explaining via an example:

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(mesh1, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # DEVICE MISMATCH ERROR!
```

The same problem exists for `shard_map` since it takes a mesh with concrete devices in it's signature.

**Okay, so how do you fix this?**

As mentioned above, we need the above program to work and get tracing and lowering cache hits (**cache hits is the most important** part here)

The approach in this change, allows `with_sharding_constraint` to accept a `NamedSharding(abstract_mesh, pspec)` as input. This leads to no errors downstream and we get tracing and lowering cache hits since we don't encode the concrete devices anymore. Just the axis_names and axis_size of the mesh.

**The important part is that the concrete device information should only come from the arguments. Inside `jax.jit`, you should never reference concrete devices ever.**

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(abstract_mesh, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

**One caveat is that this only works with `jax.NamedSharding` but that's fine because `NamedSharding` is the most used `Sharding` in JAX.**

**What about `shard_map`?**

shard_map's signature will be: `shmap(f, mesh: Mesh | AbstractMesh, in_specs: Specs, out_specs: Specs)`.

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = shard_map(lambda x: x, mesh=abstract_mesh, in_specs=P('x'), out_specs=P('x'))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

This is a fully backwards change. So your current code will continue to work as is but you can opt-into this new behavior and get all the benefits!

PiperOrigin-RevId: 662670932

											
										
										
											2024-08-13 15:17:30 -07:00
+								  if (isinstance(sharding, NamedSharding) and
 								      isinstance(sharding.mesh, AbstractMesh)):
-												Improve the error message to specify shapes too

PiperOrigin-RevId: 668117141

											
										
										
											2024-08-27 13:30:12 -07:00
+								    aval = shaped_abstractify(x)
-												Introduce `jax.sharding.AbstractMesh(shape_tuple: tuple[tuple[str, int], ...])` and allow `with_sharding_constraint` and `shard_map` to accept an abstract mesh as input (`with_sharding_constraint` is via `NamedSharding(abstract_mesh, pspec)`).

**Semantics**

Inside jit, we don't need to talk about concrete devices ever so the semantics stay the same as today i.e. we can lower a NamedSharding with abstract mesh with only mesh axis names and sizes and PartitionSpec. The only restriction is that the number of devices need to be consistent throughout the program when we are tracing.
During compilation, the order of devices throughout the program needs to be consistent (same as before this change).

Outside jit i.e. eager mode, if a `shard_map` or `with_sharding_constraint` contains AbstractMesh, then the input to those primitives should contain a concrete Mesh with the same shape and names as the abstract mesh.

**Why do this?**

There are cases, where you want the change the devices in the mesh but keep the mesh shape the same (axis names and axis sizes). But this leads to a device mismatch error if you have `with_sharding_constraint` or `shard_map` in your computation because they embed concrete devices in their signature.

So to fix the error, you need to change the mesh in `wsc` and `shmap` which will lead to a tracing cache miss (because function id is now different) and consequently a lowering to stableHLO cache miss. Explaining via an example:

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(mesh1, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # DEVICE MISMATCH ERROR!
```

The same problem exists for `shard_map` since it takes a mesh with concrete devices in it's signature.

**Okay, so how do you fix this?**

As mentioned above, we need the above program to work and get tracing and lowering cache hits (**cache hits is the most important** part here)

The approach in this change, allows `with_sharding_constraint` to accept a `NamedSharding(abstract_mesh, pspec)` as input. This leads to no errors downstream and we get tracing and lowering cache hits since we don't encode the concrete devices anymore. Just the axis_names and axis_size of the mesh.

**The important part is that the concrete device information should only come from the arguments. Inside `jax.jit`, you should never reference concrete devices ever.**

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(abstract_mesh, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

**One caveat is that this only works with `jax.NamedSharding` but that's fine because `NamedSharding` is the most used `Sharding` in JAX.**

**What about `shard_map`?**

shard_map's signature will be: `shmap(f, mesh: Mesh | AbstractMesh, in_specs: Specs, out_specs: Specs)`.

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = shard_map(lambda x: x, mesh=abstract_mesh, in_specs=P('x'), out_specs=P('x'))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

This is a fully backwards change. So your current code will continue to work as is but you can opt-into this new behavior and get all the benefits!

PiperOrigin-RevId: 662670932

											
										
										
											2024-08-13 15:17:30 -07:00
+								    if not hasattr(x, 'sharding'):
 								      raise ValueError(
 								          'Target sharding contains a `jax.sharding.AbstractMesh` which'
 								          ' requires the input passed should be a `jax.Array`. Got'
 								          f' {type(x)} with shape {aval.str_short()}')
 								    if not isinstance(x.sharding, NamedSharding):
 								      raise TypeError(
 								          'The sharding on the input must be a `NamedSharding` since the target'
 								          ' sharding has an `AbstractMesh` in it. Got sharding type'
-												Improve the error message to specify shapes too

PiperOrigin-RevId: 668117141

											
										
										
											2024-08-27 13:30:12 -07:00
+								          f' {type(x.sharding)} for shape {aval.str_short()}')
-												Introduce `jax.sharding.AbstractMesh(shape_tuple: tuple[tuple[str, int], ...])` and allow `with_sharding_constraint` and `shard_map` to accept an abstract mesh as input (`with_sharding_constraint` is via `NamedSharding(abstract_mesh, pspec)`).

**Semantics**

Inside jit, we don't need to talk about concrete devices ever so the semantics stay the same as today i.e. we can lower a NamedSharding with abstract mesh with only mesh axis names and sizes and PartitionSpec. The only restriction is that the number of devices need to be consistent throughout the program when we are tracing.
During compilation, the order of devices throughout the program needs to be consistent (same as before this change).

Outside jit i.e. eager mode, if a `shard_map` or `with_sharding_constraint` contains AbstractMesh, then the input to those primitives should contain a concrete Mesh with the same shape and names as the abstract mesh.

**Why do this?**

There are cases, where you want the change the devices in the mesh but keep the mesh shape the same (axis names and axis sizes). But this leads to a device mismatch error if you have `with_sharding_constraint` or `shard_map` in your computation because they embed concrete devices in their signature.

So to fix the error, you need to change the mesh in `wsc` and `shmap` which will lead to a tracing cache miss (because function id is now different) and consequently a lowering to stableHLO cache miss. Explaining via an example:

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(mesh1, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # DEVICE MISMATCH ERROR!
```

The same problem exists for `shard_map` since it takes a mesh with concrete devices in it's signature.

**Okay, so how do you fix this?**

As mentioned above, we need the above program to work and get tracing and lowering cache hits (**cache hits is the most important** part here)

The approach in this change, allows `with_sharding_constraint` to accept a `NamedSharding(abstract_mesh, pspec)` as input. This leads to no errors downstream and we get tracing and lowering cache hits since we don't encode the concrete devices anymore. Just the axis_names and axis_size of the mesh.

**The important part is that the concrete device information should only come from the arguments. Inside `jax.jit`, you should never reference concrete devices ever.**

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(abstract_mesh, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

**One caveat is that this only works with `jax.NamedSharding` but that's fine because `NamedSharding` is the most used `Sharding` in JAX.**

**What about `shard_map`?**

shard_map's signature will be: `shmap(f, mesh: Mesh | AbstractMesh, in_specs: Specs, out_specs: Specs)`.

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = shard_map(lambda x: x, mesh=abstract_mesh, in_specs=P('x'), out_specs=P('x'))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

This is a fully backwards change. So your current code will continue to work as is but you can opt-into this new behavior and get all the benefits!

PiperOrigin-RevId: 662670932

											
										
										
											2024-08-13 15:17:30 -07:00
+								    if x.sharding.mesh.shape_tuple != sharding.mesh.shape_tuple:
 								      raise ValueError(
 								          f'Mesh shape of the input {x.sharding.mesh.shape_tuple} does not'
 								          ' match the mesh shape of the target sharding'
-												Improve the error message to specify shapes too

PiperOrigin-RevId: 668117141

											
										
										
											2024-08-27 13:30:12 -07:00
+								          f' {sharding.mesh.shape_tuple} for shape {aval.str_short()}')
-												Introduce `jax.sharding.AbstractMesh(shape_tuple: tuple[tuple[str, int], ...])` and allow `with_sharding_constraint` and `shard_map` to accept an abstract mesh as input (`with_sharding_constraint` is via `NamedSharding(abstract_mesh, pspec)`).

**Semantics**

Inside jit, we don't need to talk about concrete devices ever so the semantics stay the same as today i.e. we can lower a NamedSharding with abstract mesh with only mesh axis names and sizes and PartitionSpec. The only restriction is that the number of devices need to be consistent throughout the program when we are tracing.
During compilation, the order of devices throughout the program needs to be consistent (same as before this change).

Outside jit i.e. eager mode, if a `shard_map` or `with_sharding_constraint` contains AbstractMesh, then the input to those primitives should contain a concrete Mesh with the same shape and names as the abstract mesh.

**Why do this?**

There are cases, where you want the change the devices in the mesh but keep the mesh shape the same (axis names and axis sizes). But this leads to a device mismatch error if you have `with_sharding_constraint` or `shard_map` in your computation because they embed concrete devices in their signature.

So to fix the error, you need to change the mesh in `wsc` and `shmap` which will lead to a tracing cache miss (because function id is now different) and consequently a lowering to stableHLO cache miss. Explaining via an example:

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(mesh1, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # DEVICE MISMATCH ERROR!
```

The same problem exists for `shard_map` since it takes a mesh with concrete devices in it's signature.

**Okay, so how do you fix this?**

As mentioned above, we need the above program to work and get tracing and lowering cache hits (**cache hits is the most important** part here)

The approach in this change, allows `with_sharding_constraint` to accept a `NamedSharding(abstract_mesh, pspec)` as input. This leads to no errors downstream and we get tracing and lowering cache hits since we don't encode the concrete devices anymore. Just the axis_names and axis_size of the mesh.

**The important part is that the concrete device information should only come from the arguments. Inside `jax.jit`, you should never reference concrete devices ever.**

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = with_sharding_constraint(x, NamedSharding(abstract_mesh, P('x')))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

**One caveat is that this only works with `jax.NamedSharding` but that's fine because `NamedSharding` is the most used `Sharding` in JAX.**

**What about `shard_map`?**

shard_map's signature will be: `shmap(f, mesh: Mesh | AbstractMesh, in_specs: Specs, out_specs: Specs)`.

```
mesh1 = Mesh(jax.devices()[:2], 'x')
mesh2 = Mesh(jax.devices()[2:4], 'x')

arr_mesh1 = jax.device_put(np.arange(8), NamedSharding(mesh1, P()))
arr_mesh2 = jax.device_put(np.arange(8), NamedSharding(mesh2, P()))

# Creating abstract mesh with mesh1 but since both meshes have the same shape (names
# and axis size), it should be ok.
abstract_mesh = jax.sharding.AbstractMesh(arr_mesh1.shape_tuple)

@jax.jit
def f(x):
  y = shard_map(lambda x: x, mesh=abstract_mesh, in_specs=P('x'), out_specs=P('x'))
  return y * 2

f(arr_mesh1)
f(arr_mesh2)  # tracing and lowering cache hit
```

This is a fully backwards change. So your current code will continue to work as is but you can opt-into this new behavior and get all the benefits!

PiperOrigin-RevId: 662670932

											
										
										
											2024-08-13 15:17:30 -07:00
+								    sharding = NamedSharding._from_parsed_pspec(
 								        x.sharding.mesh, sharding._parsed_pspec)
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								  if layout is None:
 								    if hasattr(x, 'sharding') and x.sharding.is_equivalent_to(sharding, x.ndim):
 								      return x
 								    # Run a jit here to raise good errors when device assignment don't match.
 								    return api.jit(_identity_fn, out_shardings=sharding)(x)
 								  else:
 								    if (hasattr(x, 'layout') and x.layout.device_local_layout == layout and
 								        x.sharding.is_equivalent_to(sharding, x.ndim)):
 								      return x
 								    return api.jit(_identity_fn, out_shardings=Layout(layout, sharding))(x)
-												Add impl rule for with_sharding_constraint so that users can use their functions with and without a jit.

The semantics of eager wsc is the same as within a jit i.e. it will reshard to the given sharding only if the devices are the same and in the same order.

eager wsc won't work as expected with AD transpose because there is no `src` argument to reverse the shardings when transposing and was decided that it is fine for now. jax.device_put should be the API to use for that.

PiperOrigin-RevId: 532858670

											
										
										
											2023-05-17 11:49:31 -07:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								sharding_constraint_p = core.Primitive("sharding_constraint")
 								sharding_constraint_p.def_impl(_sharding_constraint_impl)
 								sharding_constraint_p.def_abstract_eval(lambda x, **_: x)
 								ad.deflinear2(sharding_constraint_p,
 								              lambda ct, _, **params: (sharding_constraint_p.bind(ct, **params),))
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								def _sharding_constraint_hlo_lowering(ctx, x_node, *, sharding, layout,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								                                      resource_env, unconstrained_dims):
 								  aval, = ctx.avals_in
-												[shape_poly] Lowering sharding annotations in presence of dynamic shapes

Sharding annotations are lowered to custom calls, and in presence of dynamic shapes
we must use the `indices_of_shape_operands` attribute to hlo.CustomCall.
In order to be able to generate the code to compute the result shapes
we must pass the `LoweringRuleContext` and the result abstract value
to the lowering helpers that generate the custom calls.

The above is easy everywhere, except for the sharding annotations for
the inputs and outputs for a function, because we do not yet have
a LoweringRuleContext available.

This code is tested by tests that are still disabled in sharding_test.
They can be enabled once StableHLO improves the support for
dynamic shapes for custom calls: https://github.com/openxla/stablehlo/issues/1367

											
										
										
											2023-04-05 09:38:37 +02:00
+								  out_aval, = ctx.avals_out
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  axis_ctx = ctx.module_context.axis_context
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								  if (isinstance(axis_ctx, sharding_impls.SPMDAxisContext) and
 								      axis_ctx.manual_axes):
 								    sharding = mlir.add_manual_axes(axis_ctx, sharding, aval.ndim)
-												#sdy Support with_sharding_constraint lowering through Shardy.

PiperOrigin-RevId: 655905063

											
										
										
											2024-07-25 04:20:09 -07:00
+								  if config.use_shardy_partitioner.value:
 								    sharding = sharding._to_sdy_sharding(aval.ndim)
 								  else:
 								    sharding = sharding._to_xla_hlo_sharding(aval.ndim).to_proto()
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								  out = mlir.wrap_with_sharding_op(
-												#sdy Support with_sharding_constraint lowering through Shardy.

PiperOrigin-RevId: 655905063

											
										
										
											2024-07-25 04:20:09 -07:00
+								      ctx, x_node, out_aval, sharding, unspecified_dims=unconstrained_dims)
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								  if layout is not None:
 								    out = mlir.wrap_with_layout_op(ctx, out, out_aval, layout, aval)
 								  return [out]
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								mlir.register_lowering(sharding_constraint_p,
 								                       _sharding_constraint_hlo_lowering)
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								def _sharding_constraint_batcher(
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								    spmd_axis_name, axis_size, axis_name, main_type, vals_in,
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								    dims_in, sharding, layout, resource_env, unconstrained_dims):
-												add error checks for vmap spmd_axis_name

											
										
										
											2024-05-04 03:27:31 +00:00
+								  if spmd_axis_name is not None and isinstance(sharding, NamedSharding):
 								    used = {n for ns in sharding.spec
 								            for n in (ns if isinstance(ns, tuple) else (ns,))}
 								    if set(spmd_axis_name) & used:
-												Improve the error message when users pass DeviceLocalLayout.AUTO to `jax.jit` and a jax.Array as an argument.

PiperOrigin-RevId: 638797194

											
										
										
											2024-05-30 15:06:12 -07:00
+								      raise ValueError(f"vmap spmd_axis_name {spmd_axis_name} cannot appear in "
 								                       "with_sharding_constraint spec, but got spec "
 								                       f"{sharding.spec}")
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  x, = vals_in
 								  d, = dims_in
-												Make sure that the sharding and unconstrained_dims in with_sharding_constraint are correct when wsc is vmapped.

In other words, if unconstrained_dims is specified, then the sharding should also contain P.UNCONSTRAINED under vmap.

PiperOrigin-RevId: 638843222

											
										
										
											2024-05-30 17:42:14 -07:00
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								  unconstrained_dims = {ud + (d <= ud) for ud in unconstrained_dims}
 								  if spmd_axis_name is None:
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    unconstrained_dims.add(d)
-												Make sure that the sharding and unconstrained_dims in with_sharding_constraint are correct when wsc is vmapped.

In other words, if unconstrained_dims is specified, then the sharding should also contain P.UNCONSTRAINED under vmap.

PiperOrigin-RevId: 638843222

											
										
										
											2024-05-30 17:42:14 -07:00
 								  vmapped_sharding = _pjit_batcher_for_sharding(
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								      sharding, d, spmd_axis_name, resource_env.physical_mesh, x.ndim)
-												Make sure that the sharding and unconstrained_dims in with_sharding_constraint are correct when wsc is vmapped.

In other words, if unconstrained_dims is specified, then the sharding should also contain P.UNCONSTRAINED under vmap.

PiperOrigin-RevId: 638843222

											
										
										
											2024-05-30 17:42:14 -07:00
+								  if unconstrained_dims and isinstance(vmapped_sharding, NamedSharding):
 								    new_spec = list(vmapped_sharding.spec) + [None] * (x.ndim - len(vmapped_sharding.spec))
 								    for u in unconstrained_dims:
 								      new_spec[u] = PartitionSpec.UNCONSTRAINED
 								    vmapped_sharding = NamedSharding(
 								        vmapped_sharding.mesh, PartitionSpec(*new_spec))
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								  # TODO(yashkatariya): Figure out layouts should change under vmap.
 								  if layout is not None:
-												Make the vmap(jit) or vmap(wsc) with a concrete layout error more informative

PiperOrigin-RevId: 656176702

											
										
										
											2024-07-25 18:31:50 -07:00
+								    raise NotImplementedError(
 								        'Concrete layout is not supported for vmap(with_sharding_constraint). '
 								        f'Got layout {layout}')
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  y = sharding_constraint_p.bind(
 								      x,
-												Make sure that the sharding and unconstrained_dims in with_sharding_constraint are correct when wsc is vmapped.

In other words, if unconstrained_dims is specified, then the sharding should also contain P.UNCONSTRAINED under vmap.

PiperOrigin-RevId: 638843222

											
										
										
											2024-05-30 17:42:14 -07:00
+								      sharding=vmapped_sharding,
-												Add concrete layout API to JAX. The API takes `major_to_minor: tuple[int, ...]` and `tiling: tuple[tuple[int, ...], ...]` as the arguments. Allows users to pass layouts to `with_sharding_constraint` to constrain the layout + sharding.

`sub_byte_element_size_in_bits` is a lowering only thing for now (since we know the dtype of the aval so JAX can add the appropriate value). We can expose it to the user API if required.

memory space is exposed via JAX memories API so it doesn't have to be in the layout API.

Also expose `_xla_layout` as a private API from `PJRTLayout` so that we can access fields to create JAX layouts.

Add construtors to `xla::Layout` so that JAX can create Layouts with minor_to_major and tiling information.

PiperOrigin-RevId: 647487510

											
										
										
											2024-06-27 16:46:44 -07:00
+								      layout=layout,
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								      resource_env=resource_env,
 								      unconstrained_dims=unconstrained_dims)
 								  return y, d
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								batching.spmd_axis_primitive_batchers[sharding_constraint_p] = _sharding_constraint_batcher
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								batching.axis_primitive_batchers[sharding_constraint_p] = partial(
-												Simplify pjit's batching rule now that xmap is deleted. Also do cleanup around adding manual axes under shard_map

PiperOrigin-RevId: 655776234

											
										
										
											2024-07-24 19:01:31 -07:00
+								    _sharding_constraint_batcher, None)
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								# -------------------- helpers --------------------
 								def get_unconstrained_dims(sharding: NamedSharding):
-												Improve pytype inference for Sharding type.

* Define use_cpp_class and use_cpp_method decorators as no-ops for type checking.
* Remove the use of abc.ABC when defining the Sharding type. This triggers a pytype bug: the easiest fix seems to be to skip the use of the ABC.
* Write use_cpp_class decorator differently on ArrayImpl to work around pytype bug.
* Fix a few new type errors.

PiperOrigin-RevId: 516631428

											
										
										
											2023-03-14 14:19:25 -07:00
+								  assert sharding._parsed_pspec is not None
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return {i for i, axes in enumerate(sharding._parsed_pspec)
 								          if axes is None}
-												Make looking up shardings from executable consistent. If `out_shardings` are specified on `jit`, always check it against the `get_output_shardings` from the executable.

PiperOrigin-RevId: 583456869

											
										
										
											2023-11-17 12:18:46 -08:00
+								def _get_partition_spec(
 								    ppspec: Sequence[ParsedPartitionSpec]) -> Sequence[PartitionSpec]:
-												Split sharding_impls into its own Bazel target.

* Move dependencies of sharding_impls into sharding_impls to avoid creating cyclic dependencies.
* Fix a handful of new pytype errors.

PiperOrigin-RevId: 523146076

											
										
										
											2023-04-10 10:15:08 -07:00
+								  return [get_single_pspec(p) for p in ppspec]
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Prune some exports from jax.experimental.pjit.

jax.experimental.pjit is deprecated in its entirety (use "jit" instead), and experimental APIs have no stability promises.

PiperOrigin-RevId: 552903601

											
										
										
											2023-08-01 13:26:43 -07:00
+								def get_op_sharding_from_executable(
-												Use lower-case PEP 585 names for types.

Issue https://github.com/google/jax/issues/16537

PiperOrigin-RevId: 542969282

											
										
										
											2023-06-23 15:11:37 -07:00
+								    executable) -> tuple[Sequence[xc.OpSharding], Sequence[xc.OpSharding]]:
 								  in_op_shardings: list[xc.OpSharding] = []
-												Bump the minimum jaxlib version to 0.4.1.

Jaxlib 0.4.1 has XLA client version 109 and MLIR API version 39.

											
										
										
											2022-12-19 17:38:24 +00:00
+								  parameter_shardings_from_xla = executable.get_parameter_shardings()
 								  if parameter_shardings_from_xla is not None:
 								    in_op_shardings = parameter_shardings_from_xla
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
-												Use lower-case PEP 585 names for types.

Issue https://github.com/google/jax/issues/16537

PiperOrigin-RevId: 542969282

											
										
										
											2023-06-23 15:11:37 -07:00
+								  out_op_shardings: list[xc.OpSharding] = []
-												Bump the minimum jaxlib version to 0.4.1.

Jaxlib 0.4.1 has XLA client version 109 and MLIR API version 39.

											
										
										
											2022-12-19 17:38:24 +00:00
+								  output_shardings_from_xla = executable.get_output_shardings()
 								  if output_shardings_from_xla is not None:
 								    out_op_shardings = output_shardings_from_xla
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
 								  return in_op_shardings, out_op_shardings
-												Make looking up shardings from executable consistent. If `out_shardings` are specified on `jit`, always check it against the `get_output_shardings` from the executable.

PiperOrigin-RevId: 583456869

											
										
										
											2023-11-17 12:18:46 -08:00
+								def _get_ppspec_from_executable(
 								    executable, mesh
 								  ) -> tuple[Sequence[ParsedPartitionSpec], Sequence[ParsedPartitionSpec]]:
-												Prune some exports from jax.experimental.pjit.

jax.experimental.pjit is deprecated in its entirety (use "jit" instead), and experimental APIs have no stability promises.

PiperOrigin-RevId: 552903601

											
										
										
											2023-08-01 13:26:43 -07:00
+								  input_op_shardings, output_op_sharding = get_op_sharding_from_executable(
-												Tweaks the utility function `_get_ppspec_from_executable` to get the shardings directly from the executable (instead of from its HLO modules).

PiperOrigin-RevId: 549473458

											
										
										
											2023-07-19 17:38:14 -07:00
+								      executable
 								  )
-												Use lower-case PEP 585 names for types.

Issue https://github.com/google/jax/issues/16537

PiperOrigin-RevId: 542969282

											
										
										
											2023-06-23 15:11:37 -07:00
+								  in_ppspec: list[ParsedPartitionSpec] = []
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  for s in input_op_shardings:
 								    in_ppspec.extend(parse_flatten_op_sharding(s, mesh))
-												Tweaks the utility function `_get_ppspec_from_executable` to get the shardings directly from the executable (instead of from its HLO modules).

PiperOrigin-RevId: 549473458

											
										
										
											2023-07-19 17:38:14 -07:00
 								  out_ppspec: list[ParsedPartitionSpec] = []
 								  for s in output_op_sharding:
 								    out_ppspec.extend(parse_flatten_op_sharding(s, mesh))
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  return in_ppspec, out_ppspec
-												Prune some exports from jax.experimental.pjit.

jax.experimental.pjit is deprecated in its entirety (use "jit" instead), and experimental APIs have no stability promises.

PiperOrigin-RevId: 552903601

											
										
										
											2023-08-01 13:26:43 -07:00
+								def get_pspec_from_executable(
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								    executable, mesh: pxla.Mesh
-												Use lower-case PEP 585 names for types.

Issue https://github.com/google/jax/issues/16537

PiperOrigin-RevId: 542969282

											
										
										
											2023-06-23 15:11:37 -07:00
+								) -> tuple[tuple[PartitionSpec, ...], tuple[PartitionSpec, ...]]:
-												Move `pjit.py` to `jax/_src` in preparation for merging the `jit` and `pjit` frontend APIs

PiperOrigin-RevId: 495944279

											
										
										
											2022-12-16 13:06:38 -08:00
+								  in_ppspec, out_ppspec = _get_ppspec_from_executable(executable, mesh)
 								  out_partition_spec = _get_partition_spec(out_ppspec)
 								  in_partition_spec = _get_partition_spec(in_ppspec)
 								  return tuple(in_partition_spec), tuple(out_partition_spec)