# Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Implements the NumPy API, using the primitives in :mod:`jax.lax`. NumPy operations are implemented in Python in terms of the primitive operations in :mod:`jax.lax`. Since NumPy operations are not primitive and instead are implemented in terms of :mod:`jax.lax` operations, we do not need to define transformation rules such as gradient or batching rules. Instead, transformations for NumPy primitives can be derived from the transformation rules for the underlying :code:`lax` primitives. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from distutils.util import strtobool import collections try: from collections.abc import Sequence except ImportError: # python 2 from collections import Sequence import itertools import os import re import string import types import warnings import numpy as onp import opt_einsum import six from six.moves import builtins, xrange from jax import jit, device_put, custom_transforms, defjvp from .. import core from .. import dtypes from ..abstract_arrays import UnshapedArray, ShapedArray, ConcreteArray from ..config import flags from ..interpreters.xla import DeviceArray from .. import lax from ..util import partial, get_module_functions, unzip2, prod as _prod from ..lib import pytree from ..lib import xla_client FLAGS = flags.FLAGS flags.DEFINE_enum( 'jax_numpy_rank_promotion', os.getenv('JAX_NUMPY_RANK_PROMOTION', 'allow'), enum_values=['allow', 'warn', 'raise'], help= 'Control NumPy-style automatic rank promotion broadcasting ' '("allow", "warn", or "raise").') if six.PY3: def removechars(s, chars): return s.translate(str.maketrans(dict.fromkeys(chars))) else: def removechars(s, chars): return s.translate(None, ''.join(chars)) newaxis = None # We replace some builtin names to follow Numpy's API, so we capture here. _abs = builtins.abs _all = builtins.all _any = builtins.any _max = builtins.max _min = builtins.min _sum = builtins.sum # We need some numpy scalars pi = onp.pi e = onp.e inf = onp.inf NINF = onp.NINF nan = onp.nan # And some numpy utility functions set_printoptions = onp.set_printoptions # We want isinstance(x, np.ndarray) checks in user code to work with the our # array-like types, including DeviceArray and UnshapedArray (i.e. the abstract # array base class). We can override the isinstance behavior directly, without # having the complexity of multiple inheritance on those classes, by defining # the ndarray class to have a metaclass with special __instancecheck__ behavior. _arraylike_types = (onp.ndarray, UnshapedArray, DeviceArray) class _ArrayMeta(type(onp.ndarray)): """Metaclass for overriding ndarray isinstance checks.""" def __instancecheck__(self, instance): try: return isinstance(instance.aval, _arraylike_types) except AttributeError: return isinstance(instance, _arraylike_types) class ndarray(six.with_metaclass(_ArrayMeta, onp.ndarray)): def __init__(shape, dtype=None, buffer=None, offset=0, strides=None, order=None): raise TypeError("jax.numpy.ndarray() should not be instantiated explicitly." " Use jax.numpy.array, or jax.numpy.zeros instead.") iscomplexobj = onp.iscomplexobj shape = _shape = onp.shape ndim = _ndim = onp.ndim size = onp.size _dtype = dtypes.result_type bool_ = onp.bool_ int_ = dtypes.int_ float_ = dtypes.float_ complex_ = dtypes.complex_ uint8 = onp.uint8 uint16 = onp.uint16 uint32 = onp.uint32 uint64 = onp.uint64 int8 = onp.int8 int16 = onp.int16 int32 = onp.int32 int64 = onp.int64 bfloat16 = dtypes.bfloat16 float16 = onp.float16 float32 = single = onp.float32 float64 = double = onp.float64 complex64 = csingle = onp.complex64 complex128 = cdouble = onp.complex128 flexible = onp.flexible character = onp.character object_ = onp.object_ number = onp.number inexact = onp.inexact complexfloating = onp.complexfloating floating = onp.floating integer = onp.integer signedinteger = onp.signedinteger unsignedinteger = onp.unsignedinteger iinfo = dtypes.iinfo dtype = onp.dtype can_cast = dtypes.can_cast issubsctype = dtypes.issubsctype result_type = dtypes.result_type promote_types = dtypes.promote_types ComplexWarning = onp.ComplexWarning array_str = onp.array_str array_repr = onp.array_repr save = onp.save savez = onp.savez load = onp.load ### utility functions def _promote_shapes(fun_name, *args): """Prepend implicit leading singleton dimensions for Numpy broadcasting.""" if len(args) < 2: return args else: shapes = [shape(arg) for arg in args] nonscalar_ranks = [len(shp) for shp in shapes if shp] if not nonscalar_ranks or len(set(nonscalar_ranks)) == 1: return args else: if FLAGS.jax_numpy_rank_promotion != "allow": _rank_promotion_warning_or_error(fun_name, shapes) result_rank = len(lax.broadcast_shapes(*shapes)) return [lax.reshape(arg, (1,) * (result_rank - len(shp)) + shp) if shp and len(shp) != result_rank else arg for arg, shp in zip(args, shapes)] def _rank_promotion_warning_or_error(fun_name, shapes): if FLAGS.jax_numpy_rank_promotion == "warn": msg = ("Following NumPy automatic rank promotion for {} on shapes {}. " "Set the jax_numpy_rank_promotion config option to 'allow' to " "disable this warning; for more information, see " "https://jax.readthedocs.io/en/latest/rank_promotion_warning.html.") warnings.warn(msg.format(fun_name, ' '.join(map(str, shapes)))) elif FLAGS.jax_numpy_rank_promotion == "raise": msg = ("Operands could not be broadcast together for {} on shapes {} " "and with the config option jax_numpy_rank_promotion='raise'. " "For more information, see " "https://jax.readthedocs.io/en/latest/rank_promotion_warning.html.") raise ValueError(msg.format(fun_name, ' '.join(map(str, shapes)))) def _promote_dtypes(*args): """Convenience function to apply Numpy argument dtype promotion.""" # TODO(dougalm,mattjj): This is a performance bottleneck. Consider memoizing. if len(args) < 2: return args else: to_dtype = result_type(*args) return [lax.convert_element_type(x, to_dtype) for x in args] def _promote_to_result_dtype(op, *args): """Convenience function to promote args directly to the op's result dtype.""" to_dtype = _result_dtype(op, *args) return [lax.convert_element_type(arg, to_dtype) for arg in args] def _result_dtype(op, *args): """Compute result dtype of applying op to arguments with given dtypes.""" args = [onp.ones((0,) * ndim(arg), _dtype(arg)) for arg in args] return _dtype(op(*args)) def _arraylike(x): return isinstance(x, ndarray) or isscalar(x) def _check_arraylike(fun_name, *args): """Check if all args fit JAX's definition of arraylike (ndarray or scalar).""" if _any(not _arraylike(arg) for arg in args): pos, arg = next((i, arg) for i, arg in enumerate(args) if not _arraylike(arg)) msg = "{} requires ndarray or scalar arguments, got {} at position {}." raise TypeError(msg.format(fun_name, type(arg), pos)) def _promote_args(fun_name, *args): """Convenience function to apply Numpy argument shape and dtype promotion.""" _check_arraylike(fun_name, *args) return _promote_shapes(fun_name, *_promote_dtypes(*args)) def _promote_args_like(op, *args): """Convenience function to apply shape and dtype promotion to result type.""" _check_arraylike(op.__name__, *args) return _promote_shapes(op.__name__, *_promote_to_result_dtype(op, *args)) def _constant_like(x, const): return onp.array(const, dtype=_dtype(x)) def update_numpydoc(docstr, fun, op): '''Transforms the numpy docstring to remove references of parameters that are supported by the numpy version but not the JAX version''' #Some numpy functions have an extra tab at the beginning of each line, #If this function is one of those we remove this extra tab from all the lines if not hasattr(op, '__code__'): return docstr if docstr[:4] == ' ': lines = docstr.split('\n') for idx, line in enumerate(lines): lines[idx] = line.replace(' ', '', 1) docstr = '\n'.join(lines) begin_idx = docstr.find("Parameters") begin_idx = docstr.find("--\n", begin_idx) + 2 end_idx = docstr.find("Returns", begin_idx) parameters = docstr[begin_idx:end_idx] param_list = parameters.replace('\n ', '@@').split('\n') for idx, p in enumerate(param_list): param = p[:p.find(' : ')].split(", ")[0] if param not in op.__code__.co_varnames: param_list[idx] = '' param_list = [param for param in param_list if param != ''] parameters = '\n'.join(param_list).replace('@@', '\n ') return docstr[:begin_idx + 1] + parameters + docstr[end_idx - 2:] _numpy_signature_re = re.compile(r'^([\w., ]+=)?\s*[\w\.]+\(.*\)$') def _wraps(fun, update_doc=True, lax_description=""): """Like functools.wraps but works with numpy.ufuncs. It is important that when wrapping numpy functions the parameters names in the original function and in the JAX version are the same Parameters: fun: The function being wrapped update_doc: whether to transform the numpy docstring to remove references of parameters that are supported by the numpy version but not the JAX version. If False, include the numpy docstring verbatim. """ def wrap(op): if not hasattr(fun, '__doc__') or fun.__doc__ is None: return op try: # Numpy doc comments have the form: # fn(x, y, z) (optional) # # A one-line summary # # ... everything else ... # We (a) move the summary to the top, since it is what the Sphinx # autosummary extension expects, and (b) add a comment below the summary # to the effect that this is a LAX wrapper of a Numpy function. sections = fun.__doc__.split("\n\n") signatures = [] summary = None for i in xrange(len(sections)): if _numpy_signature_re.match(sections[i]): signatures.append(sections[i]) else: summary = sections[i].strip() break body = "\n\n".join(signatures + sections[i + 1:]) if update_doc: body = update_numpydoc(body, fun, op) desc = lax_description + "\n" if lax_description else "" docstr = ( "{summary}\n\nLAX-backend implementation of :func:`{fun}`.\n" "{lax_description}Original docstring below.\n\n{body}" .format(summary=summary, lax_description=desc, fun=fun.__name__, body=body)) op.__name__ = fun.__name__ op.__doc__ = docstr finally: return op return wrap def _canonicalize_axis(axis, num_dims): """Canonicalize an axis in (-num_dims, num_dims) to [0, num_dims).""" axis = int(axis) if axis < 0: axis = axis + num_dims if axis < 0 or axis >= num_dims: raise ValueError( "axis {} is out of bounds for array of dimension {}".format( axis, num_dims)) return axis ### implementations of numpy functions in terms of lax @_wraps(onp.finfo) def finfo(dtype): return dtypes.finfo(dtype) @_wraps(onp.issubdtype) def issubdtype(arg1, arg2): return dtypes.issubdtype(arg1, arg2) issubdtype = dtypes.issubdtype @_wraps(onp.isscalar) def isscalar(num): return dtypes.is_python_scalar(num) or onp.isscalar(num) @_wraps(onp.result_type) def result_type(*args): return dtypes.result_type(*args) def _one_to_one_unop(numpy_fn, lax_fn, promote_like=False): if promote_like: fn = lambda x: lax_fn(lax.convert_element_type(x, _result_dtype(numpy_fn, x))) else: fn = lambda x: lax_fn(x) return _wraps(numpy_fn)(fn) def _one_to_one_binop(numpy_fn, lax_fn, promote_like=False): if promote_like: fn = lambda x1, x2: lax_fn(*_promote_args_like(numpy_fn, x1, x2)) else: fn = lambda x1, x2: lax_fn(*_promote_args(numpy_fn.__name__, x1, x2)) return _wraps(numpy_fn)(fn) absolute = abs = _one_to_one_unop(onp.absolute, lax.abs) fabs = _one_to_one_unop(onp.fabs, lax.abs, True) bitwise_not = _one_to_one_unop(onp.bitwise_not, lax.bitwise_not) negative = _one_to_one_unop(onp.negative, lax.neg) positive = _one_to_one_unop(onp.positive, lambda x: x) sign = _one_to_one_unop(onp.sign, lax.sign) floor = _one_to_one_unop(onp.floor, lax.floor, True) ceil = _one_to_one_unop(onp.ceil, lax.ceil, True) exp = _one_to_one_unop(onp.exp, lax.exp, True) log = _one_to_one_unop(onp.log, lax.log, True) expm1 = _one_to_one_unop(onp.expm1, lax.expm1, True) log1p = _one_to_one_unop(onp.log1p, lax.log1p, True) sin = _one_to_one_unop(onp.sin, lax.sin, True) cos = _one_to_one_unop(onp.cos, lax.cos, True) tan = _one_to_one_unop(onp.tan, lax.tan, True) arcsin = _one_to_one_unop(onp.arcsin, lax.asin, True) arccos = _one_to_one_unop(onp.arccos, lax.acos, True) arctan = _one_to_one_unop(onp.arctan, lax.atan, True) sinh = _one_to_one_unop(onp.sinh, lax.sinh, True) cosh = _one_to_one_unop(onp.cosh, lax.cosh, True) tanh = _one_to_one_unop(onp.tanh, lax.tanh, True) sqrt = _one_to_one_unop(onp.sqrt, lax.sqrt, True) add = _one_to_one_binop(onp.add, lax.add) bitwise_and = _one_to_one_binop(onp.bitwise_and, lax.bitwise_and) bitwise_or = _one_to_one_binop(onp.bitwise_or, lax.bitwise_or) bitwise_xor = _one_to_one_binop(onp.bitwise_xor, lax.bitwise_xor) right_shift = _one_to_one_binop(onp.right_shift, lax.shift_right_arithmetic) left_shift = _one_to_one_binop(onp.left_shift, lax.shift_left) equal = _one_to_one_binop(onp.equal, lax.eq) multiply = _one_to_one_binop(onp.multiply, lax.mul) not_equal = _one_to_one_binop(onp.not_equal, lax.ne) subtract = _one_to_one_binop(onp.subtract, lax.sub) arctan2 = _one_to_one_binop(onp.arctan2, lax.atan2, True) minimum = _one_to_one_binop(onp.minimum, lax.min) maximum = _one_to_one_binop(onp.maximum, lax.max) float_power = _one_to_one_binop(onp.float_power, lax.pow, True) def _comparison_op(numpy_fn, lax_fn): def fn(x1, x2): x1, x2 = _promote_args(numpy_fn.__name__, x1, x2) # Comparison on complex types are defined as a lexicographic ordering on # the (real, imag) pair. if issubdtype(_dtype(x1), complexfloating): rx = lax.real(x1) ry = lax.real(x2) return lax.select(lax.eq(rx, ry), lax_fn(lax.imag(x1), lax.imag(x2)), lax_fn(rx, ry)) return lax_fn(x1, x2) return _wraps(numpy_fn)(fn) greater_equal = _comparison_op(onp.greater_equal, lax.ge) greater = _comparison_op(onp.greater, lax.gt) less_equal = _comparison_op(onp.less_equal, lax.le) less = _comparison_op(onp.less, lax.lt) def _logical_op(np_op, bitwise_op): @_wraps(np_op, update_doc=False) def op(*args): zero = lambda x: lax.full_like(x, shape=(), fill_value=0) args = (x if issubdtype(_dtype(x), onp.bool_) else lax.ne(x, zero(x)) for x in args) return bitwise_op(*_promote_args(np_op.__name__, *args)) return op logical_and = _logical_op(onp.logical_and, lax.bitwise_and) logical_not = _logical_op(onp.logical_not, lax.bitwise_not) logical_or = _logical_op(onp.logical_or, lax.bitwise_or) logical_xor = _logical_op(onp.logical_xor, lax.bitwise_xor) @_wraps(onp.true_divide) def true_divide(x1, x2): result_dtype = _result_dtype(onp.true_divide, x1, x2) x1, x2 = _promote_shapes("true_divide", x1, x2) return lax.div(lax.convert_element_type(x1, result_dtype), lax.convert_element_type(x2, result_dtype)) @_wraps(onp.divide) def divide(x1, x2): # decide whether to perform integer division based on Numpy result dtype, as a # way to check whether Python 3 style division is active in Numpy result_dtype = _result_dtype(onp.divide, x1, x2) if issubdtype(result_dtype, onp.integer): return floor_divide(x1, x2) else: return true_divide(x1, x2) @_wraps(onp.floor_divide) def floor_divide(x1, x2): x1, x2 = _promote_args("floor_divide", x1, x2) dtype = _dtype(x1) if issubdtype(dtype, integer): quotient = lax.div(x1, x2) select = logical_and(lax.sign(x1) != lax.sign(x2), lax.rem(x1, x2) != 0) # TODO(mattjj): investigate why subtracting a scalar was causing promotion return where(select, quotient - onp.array(1, _dtype(quotient)), quotient) elif issubdtype(dtype, complexfloating): x1r = lax.real(x1) x1i = lax.imag(x1) x2r = lax.real(x2) x2i = lax.imag(x2) which = lax.ge(lax.abs(x2r), lax.abs(x2i)) rat1 = where(which, lax._const(x2i, 1), lax.div(x2r, x2i)) rat2 = where(which, lax.div(x2i, x2r), lax._const(x2i, 1)) out = lax.floor(lax.div(lax.add(lax.mul(x1r, rat1), lax.mul(x1i, rat2)), lax.add(lax.mul(x2r, rat1), lax.mul(x2i, rat2)))) return lax.convert_element_type(out, dtype) else: return _float_divmod(x1, x2)[0] @_wraps(onp.divmod) def divmod(x1, x2): x1, x2 = _promote_args("divmod", x1, x2) if issubdtype(_dtype(x1), onp.integer): return floor_divide(x1, x2), remainder(x1, x2) else: return _float_divmod(x1, x2) def _float_divmod(x1, x2): # see float_divmod in floatobject.c of CPython mod = lax.rem(x1, x2) div = lax.div(lax.sub(x1, mod), x2) ind = lax.bitwise_and(mod != 0, lax.sign(x2) != lax.sign(mod)) mod = lax.select(ind, mod + x2, mod) div = lax.select(ind, div - _constant_like(div, 1), div) return lax.round(div), mod @_wraps(onp.power) def power(x1, x2): x1 = asarray(x1) x2 = asarray(x2) x1, x2 = _promote_args_like(onp.power, x1, x2) dtype = _dtype(x1) if not issubdtype(dtype, integer): return lax.pow(x1, x2) # Integer power => use binary exponentiation. # TODO(phawkins): add integer pow support to XLA. bits = 6 # Anything more would overflow for any x1 > 1 acc = ones(shape(x1), dtype=dtype) for _ in xrange(bits): acc = where(lax.bitwise_and(x2, _constant_like(x2, 1)), lax.mul(acc, x1), acc) x1 = lax.mul(x1, x1) x2 = lax.shift_right_logical(x2, _constant_like(x2, 1)) return acc @_wraps(onp.logaddexp) def logaddexp(x1, x2): x1, x2 = _promote_shapes("logaddexp", *_promote_to_result_dtype(onp.logaddexp, x1, x2)) amax = lax.max(x1, x2) delta = lax.sub(x1, x2) return lax.select(isnan(delta), lax.add(x1, x2), # NaNs or infinities of the same sign. lax.add(amax, lax.log1p(lax.exp(-lax.abs(delta))))) @_wraps(onp.logaddexp2) def logaddexp2(x1, x2): x1, x2 = _promote_shapes("logaddexp2", *_promote_to_result_dtype(onp.logaddexp2, x1, x2)) amax = lax.max(x1, x2) delta = lax.sub(x1, x2) return lax.select(isnan(delta), lax.add(x1, x2), # NaNs or infinities of the same sign. lax.add(amax, lax.div(lax.log1p(exp2(-lax.abs(delta))), _constant_like(x1, onp.log(2))))) @_wraps(onp.log2) def log2(x): x, = _promote_to_result_dtype(onp.log2, x) return lax.div(lax.log(x), lax.log(_constant_like(x, 2))) @_wraps(onp.log10) def log10(x): x, = _promote_to_result_dtype(onp.log10, x) return lax.div(lax.log(x), lax.log(_constant_like(x, 10))) @_wraps(onp.exp2) def exp2(x): x, = _promote_to_result_dtype(onp.exp2, x) return lax.exp(lax.mul(lax.log(_constant_like(x, 2)), x)) @_wraps(onp.signbit) def signbit(x): x, = _promote_shapes("signbit", x) dtype = _dtype(x) if issubdtype(dtype, integer): return lax.lt(x, _constant_like(x, 0)) elif issubdtype(dtype, bool_): return full_like(x, False, dtype=bool_) elif not issubdtype(dtype, floating): raise ValueError( "jax.numpy.signbit is not well defined for %s" % dtype) # TPU supports BF16 but not S16 types, so as a workaround, convert BF16 to # F32. if dtype == bfloat16: dtype = float32 x = lax.convert_element_type(x, float32) info = finfo(dtype) if info.bits == 16: int_type = onp.int16 elif info.bits == 32: int_type = onp.int32 elif info.bits == 64: int_type = onp.int64 else: raise NotImplementedError( "jax.numpy.signbit only supports 16, 32, and 64-bit types.") x = lax.bitcast_convert_type(x, int_type) return lax.convert_element_type(x >> (info.nexp + info.nmant), onp.bool) @_wraps(onp.remainder) def remainder(x1, x2): x1, x2 = _promote_args("remainder", x1, x2) zero = _constant_like(x1, 0) trunc_mod = lax.rem(x1, x2) trunc_mod_not_zero = lax.ne(trunc_mod, zero) do_plus = lax.bitwise_and( lax.ne(lax.lt(trunc_mod, zero), lax.lt(x2, zero)), trunc_mod_not_zero) return lax.select(do_plus, lax.add(trunc_mod, x2), trunc_mod) mod = remainder fmod = _wraps(onp.fmod)(lambda x1, x2: lax.rem(x1, x2)) @_wraps(onp.cbrt) def cbrt(x): x, = _promote_to_result_dtype(onp.cbrt, x) return lax.sign(x) * power(lax.abs(x), _constant_like(x, 1. / 3.)) @_wraps(onp.square) def square(x): x, = _promote_to_result_dtype(onp.square, x) return x * x @_wraps(onp.deg2rad) def deg2rad(x): x, = _promote_to_result_dtype(onp.deg2rad, x) return lax.mul(x, lax._const(x, pi / 180)) @_wraps(onp.rad2deg) def rad2deg(x): x, = _promote_to_result_dtype(onp.rad2deg, x) return lax.mul(x, lax._const(x, 180 / pi)) degrees = rad2deg radians = deg2rad @_wraps(onp.heaviside) def heaviside(x1, x2): x1, x2 = _promote_to_result_dtype(onp.heaviside, x1, x2) zero = lax._const(x1, 0) return where(lax.lt(x1, zero), zero, where(lax.gt(x1, zero), lax._const(x1, 1), x2)) @_wraps(onp.hypot) def hypot(x1, x2): x1, x2 = _promote_to_result_dtype(onp.hypot, x1, x2) return lax.sqrt(x1*x1 + x2*x2) @_wraps(onp.reciprocal) def reciprocal(x): x, = _promote_to_result_dtype(onp.reciprocal, x) return lax.div(lax._const(x, 1), x) @_wraps(onp.sinc, update_doc=False) def sinc(x): x, = _promote_to_result_dtype(onp.sinc, x) eq_zero = lax.eq(x, lax._const(x, 0)) safe_x = where(eq_zero, lax._const(x, 0), x) pi_x = lax.mul(lax._const(x, pi), safe_x) return where(eq_zero, lax._const(x, 1), lax.div(lax.sin(pi_x), pi_x)) @_wraps(onp.arcsinh) @custom_transforms @jit @lax._upcast_fp16_for_computation def arcsinh(x): # asinh(x) = log(x + sqrt(x**2 + 1)) x, = _promote_to_result_dtype(onp.arcsinh, x) one = lax._const(x, 1) result = lax.log(x + lax.sqrt(x * x + one)) if issubdtype(_dtype(result), onp.complexfloating): return result a = abs(x) sqrt_max_value = onp.sqrt(finfo(_dtype(x)).max) log2 = lax._const(a, onp.log(2)) return lax.select(a < sqrt_max_value, result, lax.sign(x) * (lax.log(a) + log2)) defjvp(arcsinh, lambda g, ans, x: g / lax.sqrt(lax._const(x, 1) + square(x))) @_wraps(onp.arccosh) @jit @lax._upcast_fp16_for_computation def arccosh(x): # acosh(x) = log(x + sqrt((x + 1) * (x - 1))) if x < sqrt_max_value # log(x) + log(2) otherwise x, = _promote_to_result_dtype(onp.arccosh, x) one = lax._const(x, 1) result = lax.log(x + lax.sqrt((x + one) * (x - one))) if issubdtype(_dtype(result), onp.complexfloating): return result sqrt_max_value = onp.sqrt(finfo(_dtype(x)).max) log2 = lax._const(x, onp.log(2)) return lax.select(x < sqrt_max_value, result, lax.log(x) + log2) @_wraps(onp.arctanh) def arctanh(x): # atanh(x) = 0.5 * log((1 + x) / (1 - x)) x, = _promote_to_result_dtype(onp.arctanh, x) one = lax._const(x, 1) result = lax._const(x, 0.5) * lax.log((one + x) / (one - x)) if issubdtype(_dtype(result), onp.complexfloating): return result return lax.select(abs(x) <= 1, result, lax.full_like(x, onp.nan)) @_wraps(onp.transpose) def transpose(a, axes=None): axes = onp.arange(ndim(a))[::-1] if axes is None else axes return lax.transpose(a, axes) @_wraps(onp.rot90) def rot90(m, k=1, axes=(0, 1)): ax1, ax2 = axes ax1 = _canonicalize_axis(ax1, m.ndim) ax2 = _canonicalize_axis(ax2, m.ndim) if ax1 == ax2: raise ValueError("Axes must be different") # same as numpy error k = k % 4 if k == 0: return m elif k == 2: return flip(flip(m, ax1), ax2) else: perm = list(range(m.ndim)) perm[ax1], perm[ax2] = perm[ax2], perm[ax1] if k == 1: return transpose(flip(m, ax2), perm) else: return flip(transpose(m, perm), ax2) @_wraps(onp.flip) def flip(m, axis): return lax.rev(m, [_canonicalize_axis(axis, len(m.shape))]) @_wraps(onp.fliplr) def fliplr(m): return flip(m, 1) @_wraps(onp.flipud) def flipud(m): return flip(m, 0) @_wraps(onp.conjugate) def conjugate(x): return lax.conj(x) if iscomplexobj(x) else x conj = conjugate @_wraps(onp.imag) def imag(val): return lax.imag(val) if iscomplexobj(val) else zeros_like(val) @_wraps(onp.real) def real(val): return lax.real(val) if iscomplexobj(val) else val @_wraps(onp.iscomplex) def iscomplex(x): i = imag(x) return lax.ne(i, lax._const(i, 0)) @_wraps(onp.isreal) def isreal(x): i = imag(x) return lax.eq(i, lax._const(i, 0)) @_wraps(onp.angle) def angle(z): re = real(z) im = imag(z) dtype = _dtype(re) if not issubdtype(dtype, inexact) or ( issubdtype(_dtype(z), floating) and ndim(z) == 0): dtype = dtypes.canonicalize_dtype(float64) re = lax.convert_element_type(re, dtype) im = lax.convert_element_type(im, dtype) return lax.atan2(im, re) @_wraps(onp.diff) def diff(a, n=1, axis=-1,): if not isinstance(a, ndarray) or a.ndim == 0: return a if n == 0: return a if n < 0: raise ValueError( "order must be non-negative but got " + repr(n)) nd = a.ndim slice1 = [slice(None)] * nd slice2 = [slice(None)] * nd slice1[axis] = slice(1, None) slice2[axis] = slice(None, -1) slice1 = tuple(slice1) slice2 = tuple(slice2) op = not_equal if a.dtype == onp.bool_ else subtract for _ in range(n): a = op(a[slice1], a[slice2]) return a @_wraps(onp.isrealobj) def isrealobj(x): return not iscomplexobj(x) @_wraps(onp.reshape) def reshape(a, newshape, order="C"): try: return a.reshape(newshape, order=order) # forward to method for ndarrays except AttributeError: return _reshape(a, newshape, order=order) def _reshape(a, newshape, order="C"): dummy_val = onp.broadcast_to(0, shape(a)) # zero strides computed_newshape = onp.reshape(dummy_val, newshape).shape if order == "C": return lax.reshape(a, computed_newshape, None) elif order == "F": dims = onp.arange(ndim(a))[::-1] return lax.reshape(a, computed_newshape[::-1], dims).T elif order == "A": raise NotImplementedError("np.reshape order=A is not implemented.") else: raise ValueError("Unexpected value for 'order' argument: {}.".format(order)) def _reshape_method(a, *newshape, **kwargs): order = kwargs.pop("order", "C") if len(kwargs) == 1: invalid_kwarg, = kwargs msg = "'{}' is an invalid keyword argument for this function" raise TypeError(msg.format(invalid_kwarg)) # same as NumPy error elif kwargs: invalid_kwargs = "'{}'".format("'".join(kwargs)) msg = "{} are invalid keyword arguments for this function" raise TypeError(msg.format(invalid_kwargs)) # different from NumPy error if len(newshape) == 1 and not isinstance(newshape[0], int): newshape = newshape[0] return _reshape(a, newshape, order=order) @_wraps(onp.ravel) def ravel(a, order="C"): if order == "K": raise NotImplementedError("Ravel not implemented for order='K'.") return reshape(a, (size(a),), order) @_wraps(onp.squeeze) def squeeze(a, axis=None): if 1 not in shape(a): return a if axis is None: newshape = [d for d in shape(a) if d != 1] else: if isinstance(axis, int): axis = (axis,) axis = frozenset(_canonicalize_axis(i, ndim(a)) for i in axis) newshape = [d for i, d in enumerate(shape(a)) if d != 1 or i not in axis] return lax.reshape(a, newshape) @_wraps(onp.expand_dims) def expand_dims(a, axis): shape = _shape(a) axis = _canonicalize_axis(axis, ndim(a) + 1) return lax.reshape(a, shape[:axis] + (1,) + shape[axis:]) @_wraps(onp.swapaxes) def swapaxes(a, axis1, axis2): perm = onp.arange(ndim(a)) perm[axis1], perm[axis2] = perm[axis2], perm[axis1] return lax.transpose(a, perm) @_wraps(onp.moveaxis) def moveaxis(a, source, destination): if isinstance(source, int): source = (source,) if isinstance(destination, int): destination = (destination,) source = tuple(_canonicalize_axis(i, ndim(a)) for i in source) destination = tuple(_canonicalize_axis(i, ndim(a)) for i in destination) if len(source) != len(destination): raise ValueError("Inconsistent number of elements: {} vs {}" .format(len(source), len(destination))) perm = [i for i in range(ndim(a)) if i not in source] for dest, src in sorted(zip(destination, source)): perm.insert(dest, src) return lax.transpose(a, perm) @_wraps(onp.isclose) def isclose(a, b, rtol=1e-05, atol=1e-08): a, b = _promote_args("isclose", asarray(a), asarray(b)) dtype = _dtype(a) if issubdtype(dtype, inexact): if issubdtype(dtype, complexfloating): dtype = _result_dtype(real, a) rtol = lax.convert_element_type(rtol, dtype) atol = lax.convert_element_type(atol, dtype) out = lax.le( lax.abs(lax.sub(a, b)), lax.add(atol, lax.mul(rtol, lax.abs(b)))) return _maybe_numpy_1_13_isclose_behavior(a, out) else: return lax.eq(a, b) numpy_version = tuple(map(int, onp.version.version.split('.')[:2])) if numpy_version < (1, 14): # see discussion at https://github.com/numpy/numpy/pull/9720 def _maybe_numpy_1_13_isclose_behavior(a, out): if size(out) == 1 and issubdtype(_dtype(a), complexfloating): return lax.reshape(out, (1,)) else: return out else: def _maybe_numpy_1_13_isclose_behavior(a, out): return out # The `jit` on `where` exists to avoid materializing constants in cases like # `np.where(np.zeros(1000), 7, 4)`. In op-by-op mode, we don't want to # materialize the broadcast forms of scalar arguments. @_wraps(onp.where, update_doc=False) @jit def where(condition, x=None, y=None): if x is None or y is None: raise ValueError("Must use the three-argument form of where().") if not issubdtype(_dtype(condition), onp.bool_): condition = lax.ne(condition, zeros_like(condition)) condition, x, y = broadcast_arrays(condition, x, y) if not onp.size(x): empty, _ = _promote_dtypes(x, y) return empty else: return lax.select(condition, *_promote_dtypes(x, y)) @_wraps(onp.select) def select(condlist, choicelist, default=0): if len(condlist) != len(choicelist): msg = "condlist must have length equal to choicelist ({} vs {})" raise ValueError(msg.format(len(condlist), len(choicelist))) if len(condlist) == 0: raise ValueError("condlist must be non-empty") output = default for cond, choice in zip(condlist[::-1], choicelist[::-1]): output = where(cond, choice, output) return output def broadcast_arrays(*args): """Like Numpy's broadcast_arrays but doesn't return views.""" shapes = [shape(arg) for arg in args] if len(set(shapes)) == 1: return [arg if isinstance(arg, ndarray) or isscalar(arg) else array(arg) for arg in args] result_shape = lax.broadcast_shapes(*shapes) return [broadcast_to(arg, result_shape) for arg in args] def broadcast_to(arr, shape): """Like Numpy's broadcast_to but doesn't necessarily return views.""" arr = arr if isinstance(arr, ndarray) or isscalar(arr) else array(arr) shape = tuple(map(int, shape)) # check that shape is concrete arr_shape = _shape(arr) if arr_shape == shape: return arr else: nlead = len(shape) - len(arr_shape) compatible = onp.equal(arr_shape, shape[nlead:]) | onp.equal(arr_shape, 1) if nlead < 0 or not onp.all(compatible): msg = "Incompatible shapes for broadcasting: {} and requested shape {}" raise ValueError(msg.format(arr_shape, shape)) diff, = onp.where(onp.not_equal(shape[nlead:], arr_shape)) new_dims = tuple(range(nlead)) + tuple(nlead + diff) kept_dims = tuple(onp.delete(onp.arange(len(shape)), new_dims)) return lax.broadcast_in_dim(squeeze(arr, diff), shape, kept_dims) @_wraps(onp.split) def split(ary, indices_or_sections, axis=0): dummy_val = onp.broadcast_to(0, ary.shape) # zero strides subarrays = onp.split(dummy_val, indices_or_sections, axis) # shapes split_indices = onp.cumsum([0] + [onp.shape(sub)[axis] for sub in subarrays]) starts, ends = [0] * ndim(ary), shape(ary) _subval = lambda x, i, v: lax.subvals(x, [(i, v)]) return [lax.slice(ary, _subval(starts, axis, start), _subval(ends, axis, end)) for start, end in zip(split_indices[:-1], split_indices[1:])] def _split_on_axis(onp_fun, axis): @_wraps(onp_fun, update_doc=False) def f(ary, indices_or_sections): return split(ary, indices_or_sections, axis=axis) return f vsplit = _split_on_axis(onp.vsplit, axis=0) hsplit = _split_on_axis(onp.hsplit, axis=1) dsplit = _split_on_axis(onp.dsplit, axis=2) @_wraps(onp.clip) def clip(a, a_min=None, a_max=None): if a_min is None and a_max is None: raise "At most one of a_min and a_max may be None" if a_min is not None: if _dtype(a_min) != _dtype(a): a_min = lax.convert_element_type(a_min, _dtype(a)) a = maximum(a_min, a) if a_max is not None: if _dtype(a_max) != _dtype(a): a_max = lax.convert_element_type(a_max, _dtype(a)) a = minimum(a_max, a) return a def _dtype_info(dtype): """Helper function for to get dtype info needed for clipping.""" if issubdtype(dtype, onp.integer): return iinfo(dtype) return finfo(dtype) def _round_to_nearest_even(x): half = lax._const(x, 0.5) one = lax._const(x, 1) round_val = lax.floor(x) fraction = x - round_val nearest_even_int = lax.sub( round_val, lax.mul(lax._const(x, 2), lax.floor(lax.mul(half, x)))) is_odd = lax.eq(nearest_even_int, one) return lax.select( lax.bitwise_or(lax.gt(fraction, half), lax.bitwise_and(lax.eq(fraction, half), is_odd)), lax.add(round_val, one), round_val) @_wraps(onp.round, update_doc=False) def round(a, decimals=0): dtype = _dtype(a) if issubdtype(dtype, integer): if decimals < 0: raise NotImplementedError( "integer np.round not implemented for decimals < 0") return a # no-op on integer types def _round_float(x): if decimals == 0: return _round_to_nearest_even(x) # TODO(phawkins): the strategy of rescaling the value isn't necessarily a # good one since we may be left with an incorrectly rounded value at the # end due to precision problems. As a workaround for float16, convert to # float32, x = lax.convert_element_type(x, onp.float32) if dtype == onp.float16 else x factor = _constant_like(x, 10 ** decimals) out = lax.div(_round_to_nearest_even(lax.mul(x, factor)), factor) return lax.convert_element_type(out, dtype) if dtype == onp.float16 else out if issubdtype(dtype, complexfloating): return lax.complex(_round_float(lax.real(a)), _round_float(lax.imag(a))) else: return _round_float(a) around = round @_wraps(onp.fix) def fix(x, out=None): if out is not None: raise ValueError("fix does not support the `out` argument.") zero = lax._const(x, 0) return where(lax.ge(x, zero), lax.floor(x), lax.ceil(x)) @_wraps(onp.isfinite) def isfinite(x): dtype = _dtype(x) if issubdtype(dtype, floating): return lax.is_finite(x) elif issubdtype(dtype, complexfloating): return lax.bitwise_and(lax.is_finite(real(x)), lax.is_finite(imag(x))) else: return full_like(x, True, dtype=bool_) @_wraps(onp.isinf) def isinf(x): dtype = _dtype(x) if issubdtype(dtype, floating): return lax.eq(lax.abs(x), _constant_like(x, inf)) elif issubdtype(dtype, complexfloating): re = lax.real(x) im = lax.imag(x) return lax.bitwise_or(lax.eq(lax.abs(re), _constant_like(re, inf)), lax.eq(lax.abs(im), _constant_like(im, inf))) else: return full_like(x, False, dtype=bool_) def _isposneginf(infinity, x): dtype = _dtype(x) if issubdtype(dtype, floating): return lax.eq(x, _constant_like(x, infinity)) elif issubdtype(dtype, complexfloating): raise ValueError("isposinf/isneginf are not well defined for complex types") else: return full_like(x, False, dtype=bool_) isposinf = _wraps(onp.isposinf)(partial(_isposneginf, inf)) isneginf = _wraps(onp.isneginf)(partial(_isposneginf, -inf)) @_wraps(onp.isnan) def isnan(x): return lax.bitwise_and(lax.bitwise_not(isfinite(x)), lax.bitwise_not(isinf(x))) @_wraps(onp.nan_to_num) def nan_to_num(x, copy=True): del copy dtype = _dtype(x) if issubdtype(dtype, complexfloating): return lax.complex(nan_to_num(lax.real(x)), nan_to_num(lax.imag(x))) info = finfo(dtypes.canonicalize_dtype(dtype)) x = where(isnan(x), _constant_like(x, 0), x) x = where(isposinf(x), _constant_like(x, info.max), x) x = where(isneginf(x), _constant_like(x, info.min), x) return x ### Reducers def _make_reduction(np_fun, op, init_val, preproc=None, bool_op=None, upcast_f16_for_computation=False): """Creates reduction function given a binary operation and monoid identity.""" bool_op = bool_op or op @_wraps(np_fun) def reduction(a, axis=None, dtype=None, out=None, keepdims=False): if out is not None: raise ValueError("reduction does not support the `out` argument.") a = a if isinstance(a, ndarray) else asarray(a) a = preproc(a) if preproc else a dims = _reduction_dims(a, axis) result_dtype = dtype or _dtype(np_fun(onp.ones((), dtype=_dtype(a)))) if upcast_f16_for_computation and issubdtype(result_dtype, inexact): computation_dtype = promote_types(result_dtype, float32) else: computation_dtype = result_dtype a = lax.convert_element_type(a, computation_dtype) result = lax.reduce(a, _reduction_init_val(a, init_val), op if computation_dtype != bool_ else bool_op, dims) if keepdims: shape_with_singletons = lax.subvals(shape(a), zip(dims, (1,) * len(dims))) result = lax.reshape(result, shape_with_singletons) return lax.convert_element_type(result, dtype or result_dtype) return reduction def _reduction_dims(a, axis): if axis is None: return onp.arange(ndim(a)) elif isinstance(axis, (onp.ndarray, tuple, list)): return tuple(_canonicalize_axis(x, ndim(a)) for x in axis) elif isinstance(axis, int): return (_canonicalize_axis(axis, ndim(a)),) else: raise TypeError("Unexpected type of axis argument: {}".format(type(axis))) def _reduction_init_val(a, init_val): a_dtype = dtypes.canonicalize_dtype(_dtype(a)) if a_dtype == 'bool': return onp.array(init_val > 0, dtype=a_dtype) try: return onp.array(init_val, dtype=a_dtype) except OverflowError: assert issubdtype(a_dtype, onp.integer) sign, info = onp.sign(init_val), iinfo(a_dtype) return onp.array(info.min if sign < 0 else info.max, dtype=a_dtype) _cast_to_bool = partial(lax.convert_element_type, new_dtype=bool_) sum = _make_reduction(onp.sum, lax.add, 0, upcast_f16_for_computation=True, bool_op=lax.bitwise_or) product = prod = _make_reduction(onp.prod, lax.mul, 1, bool_op=lax.bitwise_and, upcast_f16_for_computation=True) amax = max = _make_reduction(onp.max, lax.max, -onp.inf) amin = min = _make_reduction(onp.min, lax.min, onp.inf) all = alltrue = _make_reduction(onp.all, lax.bitwise_and, True, _cast_to_bool) any = sometrue = _make_reduction(onp.any, lax.bitwise_or, False, _cast_to_bool) @_wraps(onp.mean) def mean(a, axis=None, dtype=None, out=None, keepdims=False): if out is not None: raise ValueError("mean does not support the `out` argument.") if axis is None: normalizer = size(a) else: normalizer = onp.prod(onp.take(shape(a), axis)) if dtype is None: if (issubdtype(_dtype(a), onp.bool_) or issubdtype(_dtype(a), onp.integer)): dtype = float_ else: dtype = _dtype(a) return lax.div( sum(a, axis, dtype=dtype, keepdims=keepdims), lax.convert_element_type(normalizer, dtype)) @_wraps(onp.average) def average(a, axis=None, weights=None, returned=False): a = asarray(a) if weights is None: # Treat all weights as 1 avg = mean(a, axis=axis) if axis is None: weights_sum = full((), size(a), dtype=avg.dtype) else: weights_sum = full_like(avg, a.shape[axis], dtype=avg.dtype) else: weights = asarray(weights) if issubdtype(a.dtype, integer) or issubdtype(a.dtype, bool_): out_dtype = dtypes.canonicalize_dtype(result_type(a.dtype, weights.dtype, floating)) else: out_dtype = dtypes.canonicalize_dtype(result_type(a.dtype, weights.dtype)) a_shape = shape(a) a_ndim = len(a_shape) weights_shape = shape(weights) axis = None if axis is None else _canonicalize_axis(axis, a_ndim) if a_shape != weights_shape: # Make sure the dimensions work out if axis is None: raise ValueError("Axis must be specified when shapes of a and " "weights differ.") if len(weights_shape) != 1: raise ValueError("1D weights expected when shapes of a and " "weights differ.") if weights_shape[0] != a_shape[axis]: raise ValueError("Length of weights not " "compatible with specified axis.") weights = broadcast_to(weights, (a_ndim - 1) * (1,) + weights_shape) weights = moveaxis(weights, -1, axis) weights_sum = sum(weights, axis=axis, dtype=out_dtype) avg = sum(multiply(a, weights), axis=axis, dtype=out_dtype) / weights_sum if returned: if avg.shape != weights_sum.shape: weights_sum = broadcast_to(weights_sum, avg.shape) return avg, weights_sum return avg _complex_basetype = lambda dtype: onp.abs(onp.zeros((), dtype)).dtype @_wraps(onp.var) def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): if out is not None: raise ValueError("var does not support the `out` argument.") a_dtype = _dtype(a) if dtype: a_dtype = promote_types(a_dtype, dtype) else: if not issubdtype(a_dtype, inexact): dtype = a_dtype = float_ else: dtype = _complex_basetype(a_dtype) a_dtype = promote_types(a_dtype, float32) a_mean = mean(a, axis, dtype=a_dtype, keepdims=True) centered = a - a_mean if issubdtype(centered.dtype, complexfloating): centered = lax.real(lax.mul(centered, lax.conj(centered))) else: centered = lax.square(centered) if axis is None: normalizer = size(a) else: normalizer = onp.prod(onp.take(shape(a), axis)) normalizer = normalizer - ddof result = sum(centered, axis, keepdims=keepdims) out = lax.div(result, lax.convert_element_type(normalizer, result.dtype)) return lax.convert_element_type(out, dtype) @_wraps(onp.std) def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): if out is not None: raise ValueError("std does not support the `out` argument.") return sqrt(var(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims)) @_wraps(onp.ptp) def ptp(a, axis=None, out=None, keepdims=False): if out is not None: raise ValueError("ptp does not support the `out` argument.") x = amax(a, axis=axis, keepdims=keepdims) y = amin(a, axis=axis, keepdims=keepdims) return lax.sub(x, y) @_wraps(onp.allclose) def allclose(a, b, rtol=1e-05, atol=1e-08): return all(isclose(a, b, rtol, atol)) @_wraps(onp.count_nonzero) def count_nonzero(a, axis=None): return sum(lax.ne(a, _constant_like(a, 0)), axis=axis, dtype=dtypes.canonicalize_dtype(onp.int_)) def _make_nan_reduction(onp_reduction, np_reduction, init_val, nan_if_all_nan): @_wraps(onp_reduction) def nan_reduction(a, axis=None, out=None, keepdims=False, **kwargs): out = np_reduction(where(isnan(a), _reduction_init_val(a, init_val), a), axis=axis, out=out, keepdims=keepdims, **kwargs) if nan_if_all_nan: return where(all(isnan(a), axis=axis, keepdims=keepdims), _constant_like(a, nan), out) else: return out return nan_reduction nanmin = _make_nan_reduction(onp.nanmin, min, inf, nan_if_all_nan=True) nanmax = _make_nan_reduction(onp.nanmax, max, -inf, nan_if_all_nan=True) nansum = _make_nan_reduction(onp.nansum, sum, 0, nan_if_all_nan=False) nanprod = _make_nan_reduction(onp.nanprod, prod, 1, nan_if_all_nan=False) @_wraps(onp.nanmean) def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): if out is not None: raise ValueError("nanmean does not support the `out` argument.") if (issubdtype(_dtype(a), onp.bool_) or issubdtype(_dtype(a), onp.integer)): return mean(a, axis, dtype, out, keepdims) if dtype is None: dtype = _dtype(a) nan_mask = logical_not(isnan(a)) normalizer = sum(nan_mask, axis=axis, dtype=int32, keepdims=keepdims) normalizer = lax.convert_element_type(normalizer, dtype) td = lax.div(nansum(a, axis, dtype=dtype, keepdims=keepdims), normalizer) return td def _make_cumulative_reduction(onp_reduction, window_reduce, init_val, squash_nan=False): # We want to allow XLA to fuse the pad and reduce-window operators to # avoid materializing the padded output. # Consider removing `jit` once again if reduce-window is generalized to # support arbitrary padding. @partial(jit, static_argnums=(1, 2)) def _cumulative_reduction(a, axis, dtype): if axis is None or isscalar(a): a = ravel(a) axis = 0 a_shape = list(shape(a)) num_dims = len(a_shape) if axis < 0: axis = axis + num_dims if axis < 0 or axis >= num_dims: raise ValueError( "axis {} is out of bounds for array of dimension {}".format( axis, num_dims)) if squash_nan: a = where(isnan(a), _constant_like(a, init_val), a) if dtype: a = lax.convert_element_type(a, dtype) if a_shape[axis] == 0: return a padding = [(0, 0, 0)] * num_dims padding[axis] = (a_shape[axis] - 1, 0, 0) a = lax.pad(a, _constant_like(a, init_val), padding) strides = [1] * num_dims window_dims = [1] * num_dims window_dims[axis] = a_shape[axis] return window_reduce( a, window_dims, strides, xla_client.PaddingType.VALID) @_wraps(onp_reduction) def cumulative_reduction(a, axis=None, dtype=None): # jit doesn't support kwargs as static_args. return _cumulative_reduction(a, axis, dtype) return cumulative_reduction cumsum = _make_cumulative_reduction( onp.cumsum, lax._reduce_window_sum, 0, squash_nan=False) cumprod = _make_cumulative_reduction( onp.cumprod, lax._reduce_window_prod, 1, squash_nan=False) cumproduct = cumprod nancumsum = _make_cumulative_reduction( onp.nancumsum, lax._reduce_window_sum, 0, squash_nan=True) nancumprod = _make_cumulative_reduction( onp.nancumprod, lax._reduce_window_prod, 1, squash_nan=True) ### Array-creation functions @partial(jit, static_argnums=(1, 2)) def _pad(array, pad_width, mode, constant_values): array = asarray(array) nd = ndim(array) pad_width = onp.broadcast_to(onp.asarray(pad_width), (nd, 2)) if any(pad_width < 0): raise ValueError("index can't contain negative values") if mode == "constant": constant_values = broadcast_to(asarray(constant_values), (nd, 2)) constant_values = lax.convert_element_type(constant_values, array.dtype) for i in xrange(nd): widths = [(0, 0, 0)] * nd widths[i] = (pad_width[i, 0], 0, 0) array = lax.pad(array, constant_values[i, 0], widths) widths[i] = (0, pad_width[i, 1], 0) array = lax.pad(array, constant_values[i, 1], widths) return array elif mode in ("symmetric", "reflect", "wrap"): for i in xrange(nd): if array.shape[i] == 0: if (pad_width[i, 0] > 0 or pad_width[i, 1] > 0): msg = "Cannot apply '{}' padding to empty axis" raise ValueError(msg.format(mode)) continue n = array.shape[i] rarray = lax.rev(array, dimensions=(i,)) offset = 1 if (mode == "reflect" and n > 1) else 0 wrap_mode = mode == "wrap" def build_padding(padding, forward): xs = [] delta = n - offset while padding > delta: padding -= delta p = array if forward else rarray xs.append(lax.slice_in_dim(p, offset, n, axis=i)) if not wrap_mode: forward = not forward if padding > 0: x = lax.slice_in_dim(array if forward else rarray, offset, padding + offset, axis=i) xs.append(x) return xs parts = reversed(build_padding(pad_width[i, 0], forward=not wrap_mode)) parts = [lax.rev(x, dimensions=(i,)) for x in parts] parts += [array] parts += build_padding(pad_width[i, 1], forward=wrap_mode) array = lax.concatenate(parts, dimension=i) return array else: msg = "Unimplemented padding mode '{}' for np.pad." raise NotImplementedError(msg.format(mode)) @_wraps(onp.pad) def pad(array, pad_width, mode='constant', constant_values=0): return _pad(array, pad_width, mode, constant_values) @_wraps(onp.stack) def stack(arrays, axis=0): if not len(arrays): raise ValueError("Need at least one array to stack.") shape0 = shape(arrays[0]) axis = _canonicalize_axis(axis, len(shape0) + 1) new_shape = list(shape0) new_shape.insert(axis, 1) new_arrays = [] for a in arrays: if shape(a) != shape0: raise ValueError("All input arrays must have the same shape.") new_arrays.append(reshape(a, new_shape)) return concatenate(new_arrays, axis=axis) @_wraps(onp.tile) def tile(a, reps): if isinstance(reps, int): reps = (reps,) a = reshape(a, (1,) * (len(reps) - ndim(a)) + shape(a)) reps = (1,) * (ndim(a) - len(reps)) + tuple(reps) for i, rep in enumerate(reps): a = concatenate([a] * int(rep), axis=i) return a @_wraps(onp.concatenate) def concatenate(arrays, axis=0): if not len(arrays): raise ValueError("Need at least one array to concatenate.") if ndim(arrays[0]) == 0: raise ValueError("Zero-dimensional arrays cannot be concatenated.") axis = _canonicalize_axis(axis, ndim(arrays[0])) arrays = _promote_dtypes(*arrays) # lax.concatenate can be slow to compile for wide concatenations, so form a # tree of concatenations as a workaround especially for op-by-op mode. # (https://github.com/google/jax/issues/653). k = 16 while len(arrays) > 1: arrays = [lax.concatenate(arrays[i:i+k], axis) for i in range(0, len(arrays), k)] return arrays[0] @_wraps(onp.vstack) def vstack(tup): return concatenate([atleast_2d(m) for m in tup], axis=0) row_stack = vstack @_wraps(onp.hstack) def hstack(tup): arrs = [atleast_1d(m) for m in tup] if arrs[0].ndim == 1: return concatenate(arrs, 0) return concatenate(arrs, 1) @_wraps(onp.dstack) def dstack(tup): return concatenate([atleast_3d(m) for m in tup], axis=2) @_wraps(onp.column_stack) def column_stack(tup): arrays = [] for v in tup: arr = array(v) if arr.ndim < 2: arr = arr.reshape((-1, 1)) arrays.append(arr) return concatenate(arrays, 1) @_wraps(onp.atleast_1d, update_doc=False) def atleast_1d(*arys): if len(arys) == 1: arr = array(arys[0]) return arr if ndim(arr) >= 1 else reshape(arr, -1) else: return [atleast_1d(arr) for arr in arys] @_wraps(onp.atleast_2d, update_doc=False) def atleast_2d(*arys): if len(arys) == 1: arr = array(arys[0]) return arr if ndim(arr) >= 2 else reshape(arr, (1, -1)) else: return [atleast_2d(arr) for arr in arys] @_wraps(onp.atleast_3d, update_doc=False) def atleast_3d(*arys): if len(arys) == 1: arr = array(arys[0]) if ndim(arr) <= 1: arr = reshape(arr, (1, -1, 1)) elif ndim(arr) == 2: arr = reshape(arr, shape(arr) + (1,)) return arr else: return [atleast_3d(arr) for arr in arys] @_wraps(onp.array) def array(object, dtype=None, copy=True, order="K", ndmin=0): if order is not None and order != "K": raise NotImplementedError("Only implemented for order='K'") lax._check_user_dtype_supported(dtype, "array") if isinstance(object, ndarray): if dtype and _dtype(object) != dtypes.canonicalize_dtype(dtype): out = lax.convert_element_type(object, dtype) else: out = device_put(object) elif isscalar(object): out = lax.reshape(object, ()) if dtype and _dtype(out) != dtypes.canonicalize_dtype(dtype): out = lax.convert_element_type(out, dtype) elif hasattr(object, '__array__'): # this case is for duck-typed handling of objects that implement `__array__` out = array(object.__array__(), dtype and dtypes.canonicalize_dtype(dtype)) elif isinstance(object, (list, tuple)): if object: out = stack([array(elt, dtype=dtype) for elt in object]) else: out = onp.array([], dtype) else: try: view = memoryview(object) except TypeError: pass # `object` does not support the buffer interface. else: return array(onp.asarray(view), dtype, copy) raise TypeError("Unexpected input type for array: {}".format(type(object))) if ndmin > ndim(out): out = lax.reshape(out, (1,) * (ndmin - ndim(out)) + shape(out)) return out @_wraps(onp.asarray) def asarray(a, dtype=None, order=None): lax._check_user_dtype_supported(dtype, "asarray") return array(a, dtype=dtype, copy=False, order=order) @_wraps(onp.zeros_like) def zeros_like(x, dtype=None): lax._check_user_dtype_supported(dtype, "zeros_like") return lax.full_like(x, 0, dtype) @_wraps(onp.ones_like) def ones_like(x, dtype=None): lax._check_user_dtype_supported(dtype, "ones_like") return lax.full_like(x, 1, dtype) @_wraps(onp.full) def full(shape, fill_value, dtype=None): lax._check_user_dtype_supported(dtype, "full") return lax.full(shape, fill_value, dtype) @_wraps(onp.full_like) def full_like(a, fill_value, dtype=None): lax._check_user_dtype_supported(dtype, "full_like") return lax.full_like(a, fill_value, dtype) @_wraps(onp.zeros) def zeros(shape, dtype=None): if isinstance(shape, types.GeneratorType): raise TypeError("expected sequence object with len >= 0 or a single integer") lax._check_user_dtype_supported(dtype, "zeros") dtype = float_ if dtype is None else dtype shape = (shape,) if isscalar(shape) else shape return lax.full(shape, 0, dtype) @_wraps(onp.ones) def ones(shape, dtype=None): if isinstance(shape, types.GeneratorType): raise TypeError("expected sequence object with len >= 0 or a single integer") lax._check_user_dtype_supported(dtype, "ones") dtype = float_ if dtype is None else dtype shape = (shape,) if isscalar(shape) else shape return lax.full(shape, 1, dtype) @_wraps(onp.array_equal) def array_equal(a1, a2): try: a1, a2 = asarray(a1), asarray(a2) except Exception: return False return shape(a1) == shape(a2) and all(asarray(a1 == a2)) # We can't create uninitialized arrays in XLA; use zeros for empty. empty_like = zeros_like empty = zeros @_wraps(onp.eye) def eye(N, M=None, k=None, dtype=None): lax._check_user_dtype_supported(dtype, "eye") dtype = float_ if dtype is None else dtype M = N if M is None else M if N < 0 or M < 0: msg = "negative dimensions are not allowed, got {} and {}" raise ValueError(msg.format(N, M)) if k is None: return lax.broadcasted_eye(dtype, (N, M), (0, 1)) else: k_dtype = _dtype(k) if not issubdtype(k_dtype, onp.integer): msg = "eye argument `k` must be of integer dtype, got {}" raise TypeError(msg.format(k_dtype)) rows = k + lax.broadcasted_iota(k_dtype, (N, M), 0) cols = lax.broadcasted_iota(k_dtype, (N, M), 1) return lax.convert_element_type(lax.eq(rows, cols), dtype) @_wraps(onp.identity) def identity(n, dtype=None): lax._check_user_dtype_supported(dtype, "identity") return eye(n, dtype=dtype) @_wraps(onp.arange) def arange(start, stop=None, step=None, dtype=None): lax._check_user_dtype_supported(dtype, "arange") # If called like np.arange(N), we create a lazy lax._IotaConstant. if stop is None and step is None: dtype = dtype or _dtype(start) if issubdtype(dtype, onp.integer): return lax.iota(dtype, start) # avoids materializing # Fall back to instantiating an ndarray in host memory dtype = dtype or result_type( *(x for x in (start, stop, step) if x is not None)) return onp.arange(start, stop=stop, step=step, dtype=dtype) def _wrap_numpy_nullary_function(f): """Adapts `f` to return a DeviceArray instead of an onp.ndarray. `f` cannot have any non-static array arguments. """ @_wraps(f, update_doc=False) def wrapper(*args, **kwargs): return asarray(f(*args, **kwargs)) return wrapper @_wraps(onp.linspace) def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0): """Implementation of linspace differentiable in start and stop args.""" lax._check_user_dtype_supported(dtype, "linspace") if num < 0: raise ValueError("Number of samples, %s, must be non-negative." % num) dtype = dtype or result_type(start, stop, float(num)) bounds_shape = list(lax.broadcast_shapes(shape(start), shape(stop))) broadcast_start = broadcast_to(start, bounds_shape) axis = len(bounds_shape) + axis + 1 if axis < 0 else axis bounds_shape.insert(axis, 1) iota_shape = [1,] * len(bounds_shape) iota_shape[axis] = num div = (num - 1) if endpoint else num if num > 1: delta = (stop - start) / div out = (reshape(broadcast_start, bounds_shape) + reshape(lax.iota(dtype, num), iota_shape) * reshape(delta, bounds_shape)) elif num == 1: delta = nan out = reshape(broadcast_start, bounds_shape) else: # num == 0 degenerate case, match onp behavior empty_shape = list(lax.broadcast_shapes(shape(start), shape(stop))) empty_shape.insert(axis, 0) delta = nan out = reshape(array([]), empty_shape).astype(dtype) if retstep: return lax.convert_element_type(out, dtype), delta else: return lax.convert_element_type(out, dtype) @_wraps(onp.logspace) def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None, axis=0): """Implementation of logspace differentiable in start and stop args.""" dtype = dtype or result_type(start, stop, float_) computation_dtype = promote_types(dtype, float_) start = asarray(start, dtype=computation_dtype) stop = asarray(stop, dtype=computation_dtype) lin = linspace(start, stop, num, endpoint=endpoint, retstep=False, dtype=None, axis=axis) return lax.convert_element_type(power(base, lin), dtype) @_wraps(onp.geomspace) def geomspace(start, stop, num=50, endpoint=True, dtype=None, axis=0): """Implementation of geomspace differentiable in start and stop args.""" dtype = dtype or result_type(start, stop, float(num), zeros((), dtype)) computation_dtype = promote_types(dtype, float32) start = asarray(start, dtype=computation_dtype) stop = asarray(stop, dtype=computation_dtype) # follow the numpy geomspace convention for negative and complex endpoints signflip = 1 - (1 - sign(real(start))) * (1 - sign(real(stop))) // 2 res = signflip * logspace(log10(signflip * start), log10(signflip * stop), num, endpoint=endpoint, base=10.0, dtype=computation_dtype, axis=0) if axis != 0: res = moveaxis(res, 0, axis) return lax.convert_element_type(res, dtype) @_wraps(onp.meshgrid) def meshgrid(*args, **kwargs): indexing = kwargs.get("indexing", "xy") sparse = kwargs.get("sparse", False) copy = kwargs.get("copy", True) if not copy: raise ValueError("jax.numpy.meshgrid only supports copy=True") args = list(args) if indexing == "xy": if len(args) >= 2: args[0], args[1] = args[1], args[0] elif indexing != "ij": raise ValueError("Valid values for indexing are 'xy' and 'ij', got {}" .format(indexing)) shape = [] for i, a in enumerate(args): args[i] = a = asarray(a) if len(a.shape) != 1: msg = "Arguments to jax.numpy.meshgrid must be 1D, got shape {}" raise ValueError(msg.format(a.shape)) shape.append(1 if sparse else a.shape[0]) output = [] for i, a in enumerate(args): a = asarray(a) s = shape if sparse: s = list(s) s[i] = a.shape[0] output.append(lax.broadcast_in_dim(a, s, (i,))) if indexing == "xy" and len(args) >= 2: output[0], output[1] = output[1], output[0] return output @_wraps(onp.ix_) def ix_(*args): n = len(args) output = [] for i, a in enumerate(args): a = asarray(a) if len(a.shape) != 1: msg = "Arguments to jax.numpy.ix_ must be 1-dimensional, got shape {}" raise ValueError(msg.format(a.shape)) if _dtype(a) == bool_: raise NotImplementedError( "Boolean arguments to jax.numpy.ix_ are not implemented") shape = [1] * n shape[i] = a.shape[0] if a.size == 0: # Numpy uses an integer index type for empty arrays. output.append(lax.full(shape, onp.zeros((), onp.intp))) else: output.append(lax.reshape(a, shape)) return tuple(output) def _repeat_scalar(a, repeats, axis=None): if not isscalar(repeats): raise NotImplementedError( "_repeat_scalar implementation only supports scalar repeats") if axis is None or isscalar(a): a = ravel(a) axis = 0 a_shape = list(shape(a)) num_dims = len(a_shape) if axis < 0: axis = axis + num_dims if axis < 0 or axis >= num_dims: raise ValueError( "axis {} is out of bounds for array of dimension {}".format( axis, num_dims)) # Broadcasts to [..., X, repeats, ...] and reshapes to [..., X * repeats, ...] broadcast_shape = list(a_shape) broadcast_shape.insert(axis + 1, repeats) broadcast_dims = onp.concatenate((onp.arange(0, axis + 1), onp.arange(axis + 2, num_dims + 1))) a_shape[axis] *= repeats return lax.reshape( lax.broadcast_in_dim(a, broadcast_shape, broadcast_dims), a_shape) @_wraps(onp.repeat) def repeat(a, repeats, axis=None): ''' :param repeats: int or array of ints ''' # use `_repeat_scalar` when possible if isscalar(repeats): return _repeat_scalar(a, repeats, axis) repeats_raveled = ravel(array(repeats)) # make sure it's jax's array type if size(repeats_raveled) == 1: return _repeat_scalar(a, list(repeats_raveled)[0], axis) if axis is None or isscalar(a): a = ravel(a) axis = 0 # repeats must match the dimension along the requested axis a_shape = list(a.shape) n = a_shape[axis] if size(repeats_raveled) != n: raise ValueError("repeats shape {} does not match the dimension on axis {}".format( repeats_raveled.shape, n )) # calculating the new shape total = sum(repeats_raveled) new_shape = a_shape[:] new_shape[axis] = total a_flattened = ravel(a) ''' main algorithm: first break down raveled input array into list of chunks; each chunk is the unit of repeat then tile the repeats to have same length as the list of chunks finally repeat each unit x number of times according to the tiled repeat list ''' chunks = product(a_shape[:axis+1]).item() a_splitted = split(a_flattened, chunks) repeats_tiled = tile(repeats_raveled, chunks // len(repeats_raveled)) ret = array([], dtype=a.dtype) for i, repeat in enumerate(repeats_tiled): if not isinstance(repeat, int): repeat = repeat.item() if repeat != 0: ret = concatenate((ret, tile(a_splitted[i], repeat))) return reshape(ret, new_shape) @_wraps(onp.tri) def tri(N, M=None, k=0, dtype=None): lax._check_user_dtype_supported(dtype, "tri") M = M if M is not None else N dtype = dtype or float32 x = arange(N, dtype=int32) y = arange(M, dtype=int32) mask = lax.ge( (lax.broadcast_in_dim(x, shape=(N, M), broadcast_dimensions=(0,)) + int32(k)), lax.broadcast(y, [N])) return lax.convert_element_type(mask, dtype) @_wraps(onp.tril) def tril(m, k=0): m_shape = shape(m) if len(m_shape) < 2: raise ValueError("Argument to jax.numpy.tril must be at least 2D") mask = tri(*m_shape[-2:], k=k, dtype=bool) return lax.select(lax.broadcast(mask, m_shape[:-2]), m, zeros_like(m)) @_wraps(onp.triu, update_doc=False) def triu(m, k=0): m_shape = shape(m) if len(m_shape) < 2: raise ValueError("Argument to jax.numpy.triu must be at least 2D") mask = tri(*m_shape[-2:], k=k - 1, dtype=bool) return lax.select(lax.broadcast(mask, m_shape[:-2]), zeros_like(m), m) @_wraps(onp.trace) def trace(a, offset=0, axis1=0, axis2=1, dtype=None, out=None): if out: raise NotImplementedError("The 'out' argument to trace is not supported.") lax._check_user_dtype_supported(dtype, "trace") axis1 = _canonicalize_axis(axis1, ndim(a)) axis2 = _canonicalize_axis(axis2, ndim(a)) a_shape = shape(a) if dtype is None: dtype = _dtype(a) if issubdtype(dtype, integer): default_int = dtypes.canonicalize_dtype(onp.int_) if iinfo(dtype).bits < iinfo(default_int).bits: dtype = default_int # Move the axis? dimensions to the end. perm = [i for i in range(len(a_shape)) if i != axis1 and i != axis2] perm = perm + [axis1, axis2] a = lax.transpose(a, perm) # Mask out the diagonal and reduce. a = where(eye(a_shape[axis1], a_shape[axis2], k=offset, dtype=bool), a, zeros_like(a)) return sum(a, axis=(-2, -1), dtype=dtype) def _wrap_indices_function(f): @_wraps(f, update_doc=False) def wrapper(*args, **kwargs): return tuple(asarray(x) for x in f(*args, **kwargs)) return wrapper diag_indices = _wrap_indices_function(onp.diag_indices) tril_indices = _wrap_indices_function(onp.tril_indices) triu_indices = _wrap_indices_function(onp.triu_indices) mask_indices = _wrap_indices_function(onp.mask_indices) @_wraps(onp.diagonal) def diagonal(a, offset=0, axis1=0, axis2=1): a_shape = shape(a) a_ndims = len(a_shape) # Move the two dimensions to the end. axis1 = _canonicalize_axis(axis1, a_ndims) axis2 = _canonicalize_axis(axis2, a_ndims) perm = [i for i in range(a_ndims) if i != axis1 and i != axis2] perm = perm + [axis1, axis2] a = lax.transpose(a, perm) # Mask out the diagonal and reduce over one of the axes a = where(eye(a_shape[axis1], a_shape[axis2], k=offset, dtype=bool), a, zeros_like(a)) reduce_axis = -2 if offset < 0 else -1 d = sum(a, axis=reduce_axis, dtype=_dtype(a)) # Slice out the correct diagonal size. diag_size = _max(0, _min(a_shape[axis1] + _min(offset, 0), a_shape[axis2] - _max(offset, 0))) return lax.slice_in_dim(d, 0, diag_size, axis=-1) @_wraps(onp.diag) def diag(v, k=0): v_shape = shape(v) if len(v_shape) == 1: zero = lambda x: lax.full_like(x, shape=(), fill_value=0) n = v_shape[0] + _abs(k) v = lax.pad(v, zero(v), ((_max(0, k), _max(0, -k), 0),)) return where(eye(n, k=k, dtype=bool), v, zeros_like(v)) elif len(v_shape) == 2: return diagonal(v, offset=k) else: raise ValueError("diag input must be 1d or 2d") @_wraps(onp.polyval) def polyval(p, x): if isinstance(p, onp.poly1d): p = onp.asarray(p) if isinstance(x, onp.poly1d): y = 0 else: y = zeros_like(x) for i in range(len(p)): y = y * x + p[i] return y @_wraps(onp.append) def append(arr, values, axis=None): if axis is None: return concatenate([ravel(arr), ravel(values)], 0) else: return concatenate([arr, values], axis=axis) ### Tensor contraction operations _PRECISION_DOC = """\ In addition to the original NumPy arguments listed below, also supports ``precision`` for extra control over matrix-multiplication precision on supported devices. See :py:func:`jax.lax.dot` for details. """ @_wraps(onp.dot, lax_description=_PRECISION_DOC) def dot(a, b, precision=None): # pylint: disable=missing-docstring _check_arraylike("dot", a, b) a, b = _promote_dtypes(a, b) a_ndim, b_ndim = ndim(a), ndim(b) if a_ndim == 0 or b_ndim == 0: return lax.mul(a, b) if _max(a_ndim, b_ndim) <= 2: return lax.dot(a, b, precision=precision) if b_ndim == 1: contract_dims = ((a_ndim - 1,), (0,)) else: contract_dims = ((a_ndim - 1,), (b_ndim - 2,)) batch_dims = ((), ()) return lax.dot_general(a, b, (contract_dims, batch_dims), precision) @_wraps(onp.matmul, lax_description=_PRECISION_DOC) def matmul(a, b, precision=None): # pylint: disable=missing-docstring _check_arraylike("matmul", a, b) a_is_vec, b_is_vec = (ndim(a) == 1), (ndim(b) == 1) a = lax.reshape(a, (1,) + shape(a)) if a_is_vec else a b = lax.reshape(b, shape(b) + (1,)) if b_is_vec else b a, b = _promote_dtypes(a, b) batch_shape = lax.broadcast_shapes(shape(a)[:-2], shape(b)[:-2]) a = broadcast_to(a, batch_shape + shape(a)[-2:]) b = broadcast_to(b, batch_shape + shape(b)[-2:]) batch_dims = tuple(range(len(batch_shape))) dim_numbers = (((ndim(a) - 1,), (ndim(b) - 2,)), (batch_dims, batch_dims)) result = lax.dot_general(a, b, dim_numbers, precision) if a_is_vec or b_is_vec: m, n = shape(result)[-2:] new_m = () if a_is_vec else (m,) new_n = () if b_is_vec else (n,) return lax.reshape(result, batch_shape + new_m + new_n) else: return result @_wraps(onp.vdot, lax_description=_PRECISION_DOC) def vdot(a, b, precision=None): if issubdtype(_dtype(a), onp.complexfloating): a = conj(a) return dot(a.ravel(), b.ravel(), precision=precision) @_wraps(onp.tensordot, lax_description=_PRECISION_DOC) def tensordot(a, b, axes=2, precision=None): _check_arraylike("tensordot", a, b) if not (ndim(a) >= 1 and ndim(b) >= 1): msg = "tensordot requires a.ndim and b.dim to be at least 1, got {} and {}." raise TypeError(msg.format(ndim(a), ndim(b))) if type(axes) is int: if axes == 0: a, b = _promote_dtypes(a, b) return lax.mul(lax.reshape(a, shape(a) + (1,) * ndim(b)), lax.reshape(b, (1,) * ndim(a) + shape(b))) else: a, b = _promote_dtypes(a, b) a_reshape = lax.reshape(a, (_prod(a.shape[:-axes]), _prod(a.shape[-axes:]))) b_reshape = lax.reshape(b, (_prod(b.shape[:axes]), _prod(b.shape[axes:]))) out_reshape = lax.dot(a_reshape, b_reshape, precision=precision) return lax.reshape(out_reshape, a.shape[:-axes] + b.shape[axes:]) elif type(axes) in (list, tuple) and len(axes) == 2: ax1, ax2 = axes if type(ax1) == type(ax2) == int: a_transposed = moveaxis(a, ax1, -1) if ax1 != a.ndim - 1 else a b_transposed = moveaxis(b, ax2, 0) if ax2 != 0 else b return tensordot(a_transposed, b_transposed, 1, precision) elif type(ax1) in (list, tuple) and type(ax2) in (list, tuple): if len(ax1) != len(ax2): msg = "tensordot requires axes lists to have equal length, got {} and {}." raise TypeError(msg.format(ax1, ax2)) num_axes = len(ax1) a_transposed = moveaxis(a, ax1, tuple(range(a.ndim - num_axes, a.ndim))) b_transposed = moveaxis(b, ax2, tuple(range(num_axes))) return tensordot(a_transposed, b_transposed, num_axes, precision) msg = ("tensordot axes argument must be an int, a pair of ints, or a pair of " "lists/tuples of ints.") raise TypeError(msg) @_wraps(onp.einsum, lax_description=_PRECISION_DOC) def einsum(*operands, **kwargs): optimize = kwargs.pop('optimize', 'auto') optimize = 'greedy' if optimize is True else optimize precision = kwargs.pop('precision', None) if kwargs: msg = 'invalid keyword arguments for einsum: {}' raise TypeError(msg.format(', '.join(kwargs))) # using einsum_call=True here is an internal api for opt_einsum operands, contractions = opt_einsum.contract_path( *operands, einsum_call=True, use_blas=True, optimize=optimize) contractions = tuple(data[:3] for data in contractions) return _einsum(operands, contractions, precision) @_wraps(onp.einsum_path) def einsum_path(subscripts, *operands, **kwargs): optimize = kwargs.pop('optimize', 'greedy') # using einsum_call=True here is an internal api for opt_einsum return opt_einsum.contract_path(subscripts, *operands, optimize=optimize) @partial(jit, static_argnums=(1, 2)) def _einsum(operands, contractions, precision): operands = list(_promote_dtypes(*operands)) sum = lambda x, axes: lax.reduce(x, onp.array(0, x.dtype), lax.add, axes) def sum_uniques(operand, names, uniques): if uniques: axes = [names.index(name) for name in uniques] operand = sum(operand, axes) names = removechars(names, uniques) return operand, names def sum_repeats(operand, names, counts, keep_names): for name, count in counts.items(): if count > 1: axes = [i for i, n in enumerate(names) if n == name] eye = lax.broadcasted_eye(operand.dtype, operand.shape, axes) if name not in keep_names: operand = sum(operand * eye, axes) names = names.replace(name, '') else: operand = sum(operand * eye, axes[:-1]) names = names.replace(name, '', count - 1) return operand, names for operand_indices, contracted_names, einstr in contractions: input_str, result_names = einstr.split('->') input_names = input_str.split(',') # switch on the number of operands to be processed in this loop iteration. # every case here sets 'operand' and 'names'. if len(operand_indices) == 1: operand = operands.pop(operand_indices[0]) names, = input_names counts = collections.Counter(names) # sum out unique contracted indices with a single reduce-sum uniques = [name for name in contracted_names if counts[name] == 1] operand, names = sum_uniques(operand, names, uniques) # for every repeated index, do a contraction against an identity matrix operand, names = sum_repeats(operand, names, counts, result_names) elif len(operand_indices) == 2: lhs, rhs = map(operands.pop, operand_indices) lhs_counts, rhs_counts = map(collections.Counter, input_names) lhs_names, rhs_names = input_names # sum out unique contracted indices in lhs and rhs lhs_uniques = [name for name in contracted_names if lhs_counts[name] == 1 and rhs_counts[name] == 0] lhs, lhs_names = sum_uniques(lhs, lhs_names, lhs_uniques) rhs_uniques = [name for name in contracted_names if rhs_counts[name] == 1 and lhs_counts[name] == 0] rhs, rhs_names = sum_uniques(rhs, rhs_names, rhs_uniques) # for every repeated index, contract against an identity matrix lhs, lhs_names = sum_repeats(lhs, lhs_names, lhs_counts, result_names + rhs_names) rhs, rhs_names = sum_repeats(rhs, rhs_names, rhs_counts, result_names + lhs_names) contracted_names = contracted_names & (set(lhs_names) | set(rhs_names)) batch_names = (set(lhs_names) & set(rhs_names)) - contracted_names lhs_batch, rhs_batch = unzip2((lhs_names.find(n), rhs_names.find(n)) for n in batch_names) # NOTE(mattjj): this can fail non-deterministically in python3, maybe # due to opt_einsum assert _all(name in lhs_names and name in rhs_names and lhs.shape[lhs_names.index(name)] == rhs.shape[rhs_names.index(name)] for name in contracted_names) # move batch dims to the front (required by lax.dot_general, and easier) batch_dims = tuple(range(len(batch_names))) if lhs_batch != rhs_batch or set(lhs_batch) != set(batch_dims): lhs = moveaxis(lhs, lhs_batch, batch_dims) lhs_names = _movechars(lhs_names, lhs_batch, batch_dims) rhs = moveaxis(rhs, rhs_batch, batch_dims) rhs_names = _movechars(rhs_names, rhs_batch, batch_dims) batch_names = ''.join(batch_names) else: batch_dims = tuple(lhs_batch) batch_names = ''.join(lhs_names[i] for i in range(len(lhs_names)) if i in batch_dims) if contracted_names: # contract using lax.dot_general lhs_cont, rhs_cont = unzip2((lhs_names.index(n), rhs_names.index(n)) for n in contracted_names) operand = _dot_general(lhs, rhs, lhs_cont, rhs_cont, len(batch_dims), precision) deleted_names = batch_names + ''.join(contracted_names) names = (batch_names + removechars(lhs_names, deleted_names) + removechars(rhs_names, deleted_names)) else: # no contraction, just a tensor product nbatch = len(batch_names) assert lhs.shape[:nbatch] == rhs.shape[:nbatch] names = batch_names + lhs_names[nbatch:] + rhs_names[nbatch:] lhs_shape = lhs.shape + (1,) * (rhs.ndim - nbatch) rhs_shape = rhs.shape[:nbatch] + (1,) * (lhs.ndim - nbatch) + rhs.shape[nbatch:] operand = lax.reshape(lhs, lhs_shape) * lax.reshape(rhs, rhs_shape) else: raise NotImplementedError # if this is actually reachable, open an issue! # the resulting 'operand' with axis labels 'names' should be a permutation # of the desired result assert len(names) == len(result_names) == len(set(names)) assert set(names) == set(result_names) if names != result_names: perm = tuple([names.index(name) for name in result_names]) operand = lax.transpose(operand, perm) operands.append(operand) # used in next iteration return operands[0] def _dot_general(lhs, rhs, lhs_cont, rhs_cont, nbatch, precision): """Helper for einsum contractions.""" # lax.dot_general has some tight constraints on dimension_numbers that this # wrapper loosens via transposes and reshapes assert len(lhs_cont) == len(rhs_cont) > 0 ncont = len(lhs_cont) lhs_ntensor = lhs.ndim - nbatch - ncont rhs_ntensor = rhs.ndim - nbatch - ncont batch_dims = tuple(range(nbatch)) if ncont == 1 and 0 <= lhs_ntensor <= 1 and 0 <= rhs_ntensor <= 1: dimension_numbers = [(lhs_cont, rhs_cont), (batch_dims, batch_dims)] return lax.dot_general(lhs, rhs, dimension_numbers, precision) else: # move contracting dimensions to the end. lax.dot_general only allows one # contracting dimension, so if there's more than one we collapse them. if ncont > 1: lhs_cdims = tuple(range(lhs.ndim - ncont, lhs.ndim)) lhs = moveaxis(lhs, lhs_cont, lhs_cdims) lhs = lhs.reshape(lhs.shape[:-ncont] + (-1,)) rhs_cdims = tuple(range(rhs.ndim - ncont, rhs.ndim)) rhs = moveaxis(rhs, rhs_cont, rhs_cdims) rhs = rhs.reshape(rhs.shape[:-ncont] + (-1,)) else: lhs = moveaxis(lhs, lhs_cont[0], -1) rhs = moveaxis(rhs, rhs_cont[0], -1) # lax.dot_general only allows zero or one tensor product dims per operand, # so if there's more than one we collapse them. result_shape = lhs.shape[:nbatch] + lhs.shape[nbatch:-1] + rhs.shape[nbatch:-1] if lhs_ntensor > 1: lhs = lhs.reshape(lhs.shape[:nbatch] + (-1,) + lhs.shape[-1:]) if rhs_ntensor > 1: rhs = rhs.reshape(rhs.shape[:nbatch] + (-1,) + rhs.shape[-1:]) lhs_cont, rhs_cont = [lhs.ndim - 1], [rhs.ndim - 1] dimension_numbers = [(lhs_cont, rhs_cont), (batch_dims, batch_dims)] result = lax.dot_general(lhs, rhs, dimension_numbers, precision) return lax.reshape(result, result_shape) def _movechars(s, src, dst): """Helper for einsum string munging, like moveaxis on identifier strings.""" chars = [c for i, c in enumerate(s) if i not in src] for i, j in sorted(zip(dst, src)): chars.insert(i, s[j]) return ''.join(chars) @_wraps(onp.inner, lax_description=_PRECISION_DOC) def inner(a, b, precision=None): if ndim(a) == 0 or ndim(b) == 0: return a * b return tensordot(a, b, (-1, -1), precision=precision) @_wraps(onp.outer) def outer(a, b, out=None): if out: raise NotImplementedError("The 'out' argument to outer is not supported.") return ravel(a)[:, None] * ravel(b) @_wraps(onp.cross) def cross(a, b, axisa=-1, axisb=-1, axisc=-1, axis=None): if axis is not None: axisa = axis axisb = axis axisc = axis a_ndims = len(shape(a)) b_ndims = len(shape(b)) axisa = _canonicalize_axis(axisa, a_ndims) axisb = _canonicalize_axis(axisb, b_ndims) a = moveaxis(a, axisa, -1) b = moveaxis(b, axisb, -1) a_shape = shape(a) b_shape = shape(b) if a_shape[-1] not in (2, 3) or b_shape[-1] not in (2, 3): raise ValueError("Dimension must be either 2 or 3 for cross product") if a_shape[-1] == 2 and b_shape[-1] == 2: return a[..., 0] * b[..., 1] - a[..., 1] * b[..., 0] if a_shape[-1] == 2: a = concatenate((a, zeros(a_shape[:-1] + (1,), dtype=a.dtype)), axis=-1) elif b_shape[-1] == 2: b = concatenate((b, zeros(b_shape[:-1] + (1,), dtype=b.dtype)), axis=-1) a0 = a[..., 0] a1 = a[..., 1] a2 = a[..., 2] b0 = b[..., 0] b1 = b[..., 1] b2 = b[..., 2] c = array([a1 * b2 - a2 * b1, a2 * b0 - a0 * b2, a0 * b1 - a1 * b0]) c_ndims = len(shape(c)) axisc = _canonicalize_axis(axisc, c_ndims) return moveaxis(c, 0, axisc) @_wraps(onp.kron) def kron(a, b): a, b = _promote_dtypes(a, b) if ndim(a) < ndim(b): a = reshape(a, (1,) * (ndim(b) - ndim(a)) + shape(a)) elif ndim(b) < ndim(a): b = reshape(b, (1,) * (ndim(a) - ndim(b)) + shape(b)) a_reshaped = reshape(a, [i for d in shape(a) for i in (d, 1)]) b_reshaped = reshape(b, [i for d in shape(b) for i in (1, d)]) out_shape = tuple(onp.multiply(shape(a), shape(b))) return reshape(lax.mul(a_reshaped, b_reshaped), out_shape) @_wraps(onp.vander) def vander(x, N=None, increasing=False): x = asarray(x) dtype = _dtype(x) if ndim(x) != 1: raise ValueError("x must be a one-dimensional array") x_shape = shape(x) N = N or x_shape[0] if N < 0: raise ValueError("N must be nonnegative") iota = lax.iota(dtype, N) if not increasing: iota = lax.sub(lax._const(iota, N - 1), iota) return power(x[..., None], iota) ### Misc @_wraps(onp.argmax) def argmax(a, axis=None): if axis is None: a = ravel(a) axis = 0 return _argminmax(max, a, axis) @_wraps(onp.argmin) def argmin(a, axis=None): if axis is None: a = ravel(a) axis = 0 return _argminmax(min, a, axis) # TODO(mattjj): redo this lowering with a call to variadic lax.reduce def _argminmax(op, a, axis): shape = [1] * a.ndim shape[axis] = a.shape[axis] idxs = lax.tie_in(a, arange(a.shape[axis])).reshape(shape) maxval = iinfo(dtypes.canonicalize_dtype(idxs.dtype)).max maxval = lax.tie_in(a, maxval) mask_idxs = where(lax._eq_meet(a, op(a, axis, keepdims=True)), idxs, maxval) return min(mask_idxs, axis) @_wraps(onp.sort) def sort(a, axis=-1, kind='quicksort', order=None): if kind != 'quicksort': warnings.warn("'kind' argument to sort is ignored.") if order is not None: raise ValueError("'order' argument to sort is not supported.") if axis is None: return lax.sort(a.ravel(), 0) else: return lax.sort(a, _canonicalize_axis(axis, ndim(a))) @_wraps(onp.argsort) def argsort(a, axis=-1, kind='quicksort', order=None): if kind != 'quicksort': warnings.warn("'kind' argument to argsort is ignored.") if order is not None: raise ValueError("'order' argument to argsort is not supported.") if axis is None: return argsort(a.ravel(), 0) else: axis = _canonicalize_axis(axis, ndim(a)) iota = lax.broadcasted_iota(onp.int64, shape(a), axis) _, perm = lax.sort_key_val(a, iota, dimension=axis) return perm @_wraps(onp.roll) def roll(a, shift, axis=None): a = asarray(a) a_shape = shape(a) if axis is None: return lax.reshape(roll(ravel(a), shift, axis=0), a_shape) a_ndim = len(a_shape) shift = asarray(shift) axis = onp.asarray(axis) b_shape = lax.broadcast_shapes(shift.shape, axis.shape, (1,)) if len(b_shape) != 1: msg = "'shift' and 'axis' arguments to roll must be scalars or 1D arrays" raise ValueError(msg) if b_shape[0] > a_ndim: raise ValueError("More shifts/axes than dimensions of input to roll.") for x, i in zip(broadcast_to(shift, b_shape), onp.broadcast_to(axis, b_shape)): i = _canonicalize_axis(i, a_ndim) x = remainder(x, (a_shape[i] or 1)) a = lax.concatenate((a, a), i) a = lax.dynamic_slice_in_dim(a, a_shape[i] - x, a_shape[i], axis=i) return a @_wraps(onp.take) def take(a, indices, axis=None, out=None, mode=None): if out: raise NotImplementedError("The 'out' argument to np.take is not supported.") a = asarray(a) indices = asarray(indices) if axis is None: a = ravel(a) axis = 0 axis = _canonicalize_axis(axis, ndim(a)) if mode == "raise": # TODO(phawkins): we have no way to report out of bounds errors yet. raise NotImplementedError("The 'raise' mode to np.take is not supported.") elif mode == "wrap": indices = mod(indices, _constant_like(indices, a.shape[axis])) elif mode != "clip" and mode is not None: raise ValueError("Invalid mode '{}' for np.take".format(mode)) index_dims = len(shape(indices)) slice_sizes = list(shape(a)) slice_sizes[axis] = 1 dnums = lax.GatherDimensionNumbers( offset_dims=tuple( list(range(axis)) + list(range(axis + index_dims, len(a.shape) + index_dims - 1))), collapsed_slice_dims=(axis,), start_index_map=(axis,)) return lax.gather(a, indices[..., None], dimension_numbers=dnums, slice_sizes=tuple(slice_sizes)) def _normalize_index(index, axis_size): """Normalizes an index value in the range [-N, N) to the range [0, N).""" return lax.select( lax.lt(index, _constant_like(index, 0)), lax.add(index, _constant_like(index, axis_size)), index) @partial(jit, static_argnums=(2,)) def _take_along_axis(arr, indices, axis): if axis is None: if ndim(indices) != 1: msg = "take_along_axis indices must be 1D if axis=None, got shape {}" raise ValueError(msg.format(indices.shape)) return take_along_axis(arr.ravel(), indices, 0) rank = ndim(arr) if rank != ndim(indices): msg = "indices and arr must have the same number of dimensions; {} vs. {}" raise ValueError(msg.format(ndim(indices), ndim(arr))) axis = _canonicalize_axis(axis, rank) def replace(tup, val): lst = list(tup) lst[axis] = val return tuple(lst) bcast_shape = lax.broadcast_shapes(replace(arr.shape, 1), replace(indices.shape, 1)) indices = broadcast_to(indices, replace(bcast_shape, indices.shape[axis])) arr = broadcast_to(arr, replace(bcast_shape, arr.shape[axis])) axis_size = arr.shape[axis] arr_shape = replace(arr.shape, 1) idx_shape = indices.shape out_shape = lax.broadcast_shapes(idx_shape, arr_shape) index_dims = [i for i, idx in enumerate(idx_shape) if i == axis or idx != 1] gather_index_shape = tuple(onp.array(out_shape)[index_dims]) + (1,) gather_indices = [] slice_sizes = [] offset_dims = [] start_index_map = [] collapsed_slice_dims = [] j = 0 for i in range(rank): if i == axis: indices = _normalize_index(indices, axis_size) gather_indices.append(lax.reshape(indices, gather_index_shape)) slice_sizes.append(1) start_index_map.append(i) collapsed_slice_dims.append(i) j += 1 elif idx_shape[i] != 1: iota = lax.iota(_dtype(indices), out_shape[i]) iota = lax.tie_in(arr, iota) iota = lax.broadcast_in_dim(iota, gather_index_shape, (j,)) gather_indices.append(iota) slice_sizes.append(1) start_index_map.append(i) collapsed_slice_dims.append(i) j += 1 else: # If idx_shape[i] == 1, we can just take the entirety of the arr's axis # and avoid forming an iota index. offset_dims.append(i) slice_sizes.append(arr_shape[i]) gather_indices = lax.concatenate(gather_indices, dimension=j) dnums = lax.GatherDimensionNumbers( offset_dims=tuple(offset_dims), collapsed_slice_dims=tuple(collapsed_slice_dims), start_index_map=tuple(start_index_map)) return lax.gather(arr, gather_indices, dnums, tuple(slice_sizes)) @_wraps(getattr(onp, "take_along_axis", None), update_doc=False) def take_along_axis(arr, indices, axis): return _take_along_axis(arr, indices, axis) ### Indexing def _rewriting_take(arr, idx): # Computes arr[idx]. # All supported cases of indexing can be implemented as an XLA gather, # followed by an optional reverse and a reshape. arr = asarray(arr) treedef, static_idx, dynamic_idx = _split_index_for_jit(idx) return _gather(arr, treedef, static_idx, dynamic_idx) # TODO(phawkins): re-enable jit after fixing excessive recompilation for # slice indexes (e.g., slice(0, 5, None), slice(10, 15, None), etc.). # @partial(jit, static_argnums=(1, 2)) def _gather(arr, treedef, static_idx, dynamic_idx): idx = _merge_static_and_dynamic_indices(treedef, static_idx, dynamic_idx) indexer = _index_to_gather(shape(arr), idx) # shared with _scatter_update y = arr # We avoid generating a gather when indexer.gather_indices.size is empty # unless indexer.slice_shape also corresponds to an empty array. if indexer.gather_indices.size or not _prod(indexer.slice_shape): y = lax.gather(y, indexer.gather_indices, indexer.dnums, indexer.gather_slice_shape) # Reverses axes with negative strides. if indexer.reversed_y_dims: y = lax.rev(y, indexer.reversed_y_dims) # This adds np.newaxis/None dimensions. return lax.reshape(y, indexer.slice_shape) _Indexer = collections.namedtuple("_Indexer", [ # The expected shape of the slice output. "slice_shape", # The slice shape to pass to lax.gather(). "gather_slice_shape", # The gather indices to use. "gather_indices", # A GatherDimensionNumbers object describing the gather to perform. "dnums", # Slice dimensions that have negative strides, and so must be reversed after # the gather. "reversed_y_dims", # For scatters, we must eliminate any axes created by `newaxis`, which # are the following dimensions, which must be of size 1. For gathers, we # simply reshape to `slice_shape` to introduce the new axes. "newaxis_dims", ]) def _split_index_for_jit(idx): """Splits indices into necessarily-static and dynamic parts. Used to pass indices into `jit`-ted function. """ # Convert list indices to tuples in cases (deprecated by NumPy.) idx = _eliminate_deprecated_list_indexing(idx) # Expand any (concrete) boolean indices. We can then use advanced integer # indexing logic to handle them. idx = _expand_bool_indices(idx) leaves, treedef = pytree.flatten(idx) dynamic = [None] * len(leaves) static = [None] * len(leaves) for i, x in enumerate(leaves): if x is Ellipsis: static[i] = x elif isinstance(x, slice): # slice objects aren't hashable. static[i] = (x.start, x.stop, x.step) else: dynamic[i] = x return treedef, tuple(static), dynamic def _merge_static_and_dynamic_indices(treedef, static_idx, dynamic_idx): """Recombines indices that were split by _split_index_for_jit.""" idx = [] for s, d in zip(static_idx, dynamic_idx): if d is not None: idx.append(d) elif isinstance(s, tuple): idx.append(slice(s[0], s[1], s[2])) else: idx.append(s) return treedef.unflatten(idx) def _int(aval): return not aval.shape and issubdtype(aval.dtype, onp.integer) def _index_to_gather(x_shape, idx): # Remove ellipses and add trailing slice(None)s. idx = _canonicalize_tuple_index(len(x_shape), idx) # Check for advanced indexing: # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing # Do the advanced indexing axes appear contiguously? If not, NumPy semantics # move the advanced axes to the front. advanced_axes_are_contiguous = False advanced_indexes = None # The positions of the advanced indexing axes in `idx`. idx_advanced_axes = [] # The positions of the advanced indexes in x's shape. # collapsed, after None axes have been removed. See below. x_advanced_axes = None if _is_advanced_int_indexer(idx): idx_no_nones = [(i, d) for i, d in enumerate(idx) if d is not None] advanced_pairs = ( (asarray(e), i, j) for j, (i, e) in enumerate(idx_no_nones) if (isinstance(e, Sequence) or isinstance(e, ndarray))) advanced_pairs = ((_normalize_index(e, x_shape[j]), i, j) for e, i, j in advanced_pairs) advanced_indexes, idx_advanced_axes, x_advanced_axes = zip(*advanced_pairs) advanced_axes_are_contiguous = onp.all(onp.diff(idx_advanced_axes) == 1) x_axis = 0 # Current axis in x. y_axis = 0 # Current axis in y, before collapsing. See below. collapsed_y_axis = 0 # Current axis in y, after collapsing. # Scatter dimension numbers. offset_dims = [] collapsed_slice_dims = [] start_index_map = [] gather_indices = zeros((0,), dtype=int32) # We perform three transformations to y before the scatter op, in order: # First, y is broadcast to slice_shape. In general `y` only need broadcast to # the right shape. slice_shape = [] # Next, y is squeezed to remove newaxis_dims. This removes np.newaxis/`None` # indices, which the scatter cannot remove itself. newaxis_dims = [] # Finally, we reverse reversed_y_dims to handle slices with negative strides. reversed_y_dims = [] gather_slice_shape = [] for idx_pos, i in enumerate(idx): # Handle the advanced indices here if: # * the advanced indices were not contiguous and we are the start. # * we are at the position of the first advanced index. if (advanced_indexes is not None and (advanced_axes_are_contiguous and idx_pos == idx_advanced_axes[0] or not advanced_axes_are_contiguous and idx_pos == 0)): advanced_indexes = broadcast_arrays(*advanced_indexes) shape = advanced_indexes[0].shape ndim = len(shape) advanced_indexes = [ lax.convert_element_type(lax.reshape(a, shape + (1,)), int32) for a in advanced_indexes] # Broadcast gather_indices from [..., k] to [..., 1, 1, ..., 1, k]. gather_indices = lax.broadcast_in_dim( gather_indices, onp.insert(gather_indices.shape, -1, shape), tuple(range(gather_indices.ndim - 1)) + (gather_indices.ndim + ndim - 1,)) gather_indices = concatenate([gather_indices] + advanced_indexes, -1) start_index_map.extend(x_advanced_axes) collapsed_slice_dims.extend(x_advanced_axes) slice_shape.extend(shape) y_axis += ndim collapsed_y_axis += ndim # Per-index bookkeeping for advanced indexes. if idx_pos in idx_advanced_axes: x_axis += 1 gather_slice_shape.append(1) continue try: abstract_i = core.get_aval(i) except TypeError: abstract_i = None # Handle basic int indexes. if (isinstance(abstract_i, ConcreteArray) or isinstance(abstract_i, ShapedArray)) and _int(abstract_i): i = _normalize_index(i, x_shape[x_axis]) i = lax.convert_element_type(i, int32) i = broadcast_to(i, tuple(gather_indices.shape[:-1]) + (1,)) gather_indices = concatenate((gather_indices, i), -1) collapsed_slice_dims.append(x_axis) gather_slice_shape.append(1) start_index_map.append(x_axis) x_axis += 1 # Handle np.newaxis (None) elif i is None: slice_shape.append(1) newaxis_dims.append(y_axis) y_axis += 1 # Handle slice(None) elif _is_slice_none(i): slice_shape.append(x_shape[x_axis]) gather_slice_shape.append(x_shape[x_axis]) offset_dims.append(collapsed_y_axis) collapsed_y_axis += 1 y_axis += 1 x_axis += 1 # Handle slice index (only static, otherwise an error is raised) elif isinstance(i, slice): if not _all(elt is None or type(core.get_aval(elt)) is ConcreteArray for elt in (i.start, i.stop, i.step)): msg = ("Array slice indices must have static start/stop/step to be used " "with Numpy indexing syntax. Try lax.dynamic_slice/" "dynamic_update_slice instead.") raise IndexError(msg) start, limit, stride, needs_rev = _static_idx(i, x_shape[x_axis]) if needs_rev: reversed_y_dims.append(collapsed_y_axis) if stride == 1: i = lax.convert_element_type(start, int32) i = broadcast_to(i, tuple(gather_indices.shape[:-1]) + (1,)) gather_indices = concatenate((gather_indices, i), -1) slice_shape.append(limit - start) gather_slice_shape.append(limit - start) offset_dims.append(collapsed_y_axis) start_index_map.append(x_axis) else: i = arange(start, limit, stride, dtype=int32) size = i.shape[0] slice_shape.append(size) gather_slice_shape.append(1) gather_indices_shape = tuple(gather_indices.shape[:-1]) + (size,) i = lax.broadcast_in_dim( i, shape=gather_indices_shape + (1,), broadcast_dimensions=(len(gather_indices_shape) - 1,)) gather_indices = lax.broadcast_in_dim( gather_indices, shape=gather_indices_shape + (len(start_index_map),), broadcast_dimensions=( tuple(range(len(gather_indices_shape) - 1)) + (len(gather_indices_shape),))) gather_indices = concatenate( (gather_indices, i), len(gather_indices_shape)) start_index_map.append(x_axis) collapsed_slice_dims.append(x_axis) collapsed_y_axis += 1 y_axis += 1 x_axis += 1 else: if abstract_i and not (issubdtype(abstract_i.dtype, integer) or issubdtype(abstract_i.dtype, bool_)): msg = ("Indexer must have integer or boolean type, got indexer " "with type {} at position {}, indexer value {}") raise TypeError(msg.format(abstract_i.dtype.name, idx_pos, i)) msg = "Indexing mode not yet supported. Open a feature request!\n{}" raise IndexError(msg.format(idx)) dnums = lax.GatherDimensionNumbers( offset_dims = tuple(offset_dims), collapsed_slice_dims = tuple(sorted(collapsed_slice_dims)), start_index_map = tuple(start_index_map) ) return _Indexer( slice_shape=slice_shape, newaxis_dims=tuple(newaxis_dims), gather_slice_shape=gather_slice_shape, reversed_y_dims=reversed_y_dims, dnums=dnums, gather_indices=gather_indices) def _should_unpack_list_index(x): """Helper for _eliminate_deprecated_list_indexing.""" return (isinstance(x, ndarray) and onp.ndim(x) != 0 or isinstance(x, Sequence) or isinstance(x, slice) or x is Ellipsis or x is None) def _eliminate_deprecated_list_indexing(idx): # "Basic slicing is initiated if the selection object is a non-array, # non-tuple sequence containing slice objects, [Ellipses, or newaxis # objects]". Detects this case and canonicalizes to a tuple. This case is # deprecated by NumPy and exists for backward compatibility. if not isinstance(idx, tuple): if isinstance(idx, Sequence) and not isinstance(idx, ndarray): if _any(_should_unpack_list_index(i) for i in idx): idx = tuple(idx) else: idx = (idx,) else: idx = (idx,) return idx def _expand_bool_indices(idx): """Converts concrete bool indexes into advanced integer indexes.""" out = [] for i in idx: try: abstract_i = core.get_aval(i) except TypeError: abstract_i = None if (isinstance(abstract_i, ShapedArray) and issubdtype(abstract_i.dtype, onp.bool_) or isinstance(i, list) and _all(not _shape(e) and issubdtype(_dtype(e), onp.bool_) for e in i)): if isinstance(i, list): i = array(i) abstract_i = core.get_aval(i) if not type(abstract_i) is ConcreteArray: msg = ("Array boolean indices must be static (e.g. no dependence on an " "argument to a jit or vmap function).") raise IndexError(msg) else: out.extend(onp.where(i)) else: out.append(i) return tuple(out) def _is_slice_none(idx): """Return True if idx is equal to slice(None), False otherwise.""" if isinstance(idx, slice): return idx.start is None and idx.stop is None and idx.step is None # TODO(mattjj): clean up this logic def _is_advanced_int_indexer(idx): """Returns True if idx should trigger int array indexing, False otherwise.""" # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing assert isinstance(idx, tuple) if _all(onp.ndim(elt) == 0 for elt in idx): return False return _all(e is None or e is Ellipsis or isinstance(e, slice) or _is_int_arraylike(e) for e in idx) def _is_int_arraylike(x): """Returns True if x is array-like with integer dtype, False otherwise.""" return (isinstance(x, int) and not isinstance(x, bool) or issubdtype(getattr(x, "dtype", None), onp.integer) or isinstance(x, (list, tuple)) and _all(_is_int_arraylike(e) for e in x)) def _canonicalize_tuple_index(arr_ndim, idx): """Helper to remove Ellipsis and add in the implicit trailing slice(None).""" len_without_none = _sum(1 for e in idx if e is not None and e is not Ellipsis) if len_without_none > arr_ndim: msg = "Too many indices for array: {} non-None/Ellipsis indices for dim {}." raise IndexError(msg.format(len_without_none, arr_ndim)) ellipses = (i for i, elt in enumerate(idx) if elt is Ellipsis) ellipsis_index = next(ellipses, None) if ellipsis_index is not None: if next(ellipses, None) is not None: msg = "Multiple ellipses (...) not supported: {}." raise IndexError(msg.format(list(map(type, idx)))) colons = (slice(None),) * (arr_ndim - len_without_none) idx = idx[:ellipsis_index] + colons + idx[ellipsis_index + 1:] elif len_without_none < arr_ndim: colons = (slice(None),) * (arr_ndim - len_without_none) idx = tuple(idx) + colons return idx def _static_idx(idx, size): """Helper function to compute the static slice start/limit/stride values.""" assert isinstance(idx, slice) start, stop, step = idx.indices(size) if (step < 0 and stop >= start) or (step > 0 and start >= stop): return 0, 0, 1, False # sliced to size zero if step > 0: return start, stop, step, False else: k = (start - stop - 1) % (-step) return stop + k + 1, start + 1, -step, True blackman = _wrap_numpy_nullary_function(onp.blackman) bartlett = _wrap_numpy_nullary_function(onp.bartlett) hamming = _wrap_numpy_nullary_function(onp.hamming) hanning = _wrap_numpy_nullary_function(onp.hanning) # TODO: lower `kaiser` via lax to allow non-constant beta values. kaiser = _wrap_numpy_nullary_function(onp.kaiser) def _gcd_cond_fn(xs): x1, x2 = xs return any(x2 != 0) def _gcd_body_fn(xs): x1, x2 = xs x1, x2 = (where(x2 != 0, x2, x1), where(x2 != 0, lax.rem(x1, x2), lax._const(x2, 0))) return (where(x1 < x2, x2, x1), where(x1 < x2, x1, x2)) @_wraps(getattr(onp, "gcd", None)) def gcd(x1, x2): if (not issubdtype(_dtype(x1), integer) or not issubdtype(_dtype(x2), integer)): raise ValueError("Arguments to gcd must be integers.") x1, x2 = _promote_dtypes(lax.abs(x1), lax.abs(x2)) x1, x2 = broadcast_arrays(x1, x2) gcd, _ = lax.while_loop(_gcd_cond_fn, _gcd_body_fn, (x1, x2)) return gcd @_wraps(getattr(onp, "lcm", None)) def lcm(x1, x2): x1, x2 = _promote_dtypes(x1, x2) d = gcd(x1, x2) return where(d == 0, lax._const(d, 0), lax.div(lax.abs(multiply(x1, x2)), d)) @_wraps(onp.cov) def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None): msg = ("jax.numpy.cov not implemented for nontrivial {}. " "Open a feature request at https://github.com/google/jax/issues !") if y is not None: raise NotImplementedError(msg.format('y')) # These next two are actually implemented, just not tested. if fweights is not None: raise NotImplementedError(msg.format('fweights')) if aweights is not None: raise NotImplementedError(msg.format('aweights')) if m.ndim > 2: raise ValueError("m has more than 2 dimensions") # same as numpy error X = array(m, ndmin=2, dtype=dtypes.canonicalize_dtype( result_type(m, onp.float64)), copy=False) if not rowvar and X.shape[0] != 1: X = X.T if X.shape[0] == 0: return onp.array([]).reshape(0, 0) if ddof is None: ddof = 1 if bias == 0 else 0 w = None if fweights is not None: if onp.ndim(fweights) > 1: raise RuntimeError("cannot handle multidimensional fweights") if onp.shape(fweights)[0] != X.shape[1]: raise RuntimeError("incompatible numbers of samples and fweights") w = asarray(fweights) if aweights is not None: if onp.ndim(aweights) > 1: raise RuntimeError("cannot handle multidimensional aweights") if onp.shape(aweights)[0] != X.shape[1]: raise RuntimeError("incompatible numbers of samples and aweights") w = aweights if w is None else w * aweights avg, w_sum = average(X, axis=1, weights=w, returned=True) w_sum = w_sum[0] if w is None: f = X.shape[1] - ddof elif ddof == 0: f = w_sum elif aweights is None: f = w_sum - ddof else: f = w_sum - ddof * sum(w * aweights) / w_sum X = X - avg[:, None] X_T = X.T if w is None else (X * w).T return true_divide(dot(X, X_T.conj()), f).squeeze() @_wraps(onp.corrcoef) def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None): c = cov(x, y, rowvar) if len(shape(c)) == 0: # scalar - this should yield nan for values (nan/nan, inf/inf, 0/0), 1 otherwise return divide(c, c) d = diag(c) stddev = sqrt(real(d)) c = divide(c, stddev[:,None]) c = divide(c, stddev[None,:]) real_part = clip(real(c), -1, 1) if iscomplexobj(c): complex_part = clip(imag(c), -1, 1) c = lax.complex(real_part, complex_part) else: c = real_part return c @_wraps(getattr(onp, "quantile", None)) def quantile(a, q, axis=None, out=None, overwrite_input=False, interpolation="linear", keepdims=False): if overwrite_input or out is not None: msg = ("jax.numpy.quantile does not support overwrite_input=True or " "out != None") raise ValueError(msg) if interpolation != "linear": raise NotImplementedError("Only interpolation='linear' is implemented") return _quantile(a, q, axis, keepdims) @partial(jit, static_argnums=(2, 3)) def _quantile(a, q, axis, keepdims): a = asarray(a) if axis is None: a = ravel(a) axis = 0 elif isinstance(axis, tuple): raise NotImplementedError("Tuple values for axis are not implemented") else: axis = _canonicalize_axis(axis, ndim(a)) q_ndim = ndim(q) if q_ndim > 1: raise ValueError("q must be have rank <= 1, got shape {}".format(shape(q))) q = asarray(q) if not issubdtype(a.dtype, floating) or not issubdtype(q.dtype, floating): msg = "q and a arguments to quantile must be of float type, got {} and {}" raise TypeError(msg.format(a.dtype, q.dtype)) # Promote q to at least float32 for precise interpolation. q = lax.convert_element_type(q, promote_types(q.dtype, float32)) a_shape = shape(a) a = lax.sort(a, dimension=axis) n = a_shape[axis] q = lax.mul(q, _constant_like(q, n - 1)) low = lax.floor(q) high = lax.add(low, _constant_like(low, 1)) high_weight = lax.sub(q, low) low_weight = lax.sub(_constant_like(high_weight, 1), high_weight) low = lax.clamp(_constant_like(low, 0), low, _constant_like(low, n - 1)) high = lax.clamp(_constant_like(high, 0), high, _constant_like(high, n - 1)) low = lax.convert_element_type(low, int64) high = lax.convert_element_type(high, int64) slice_sizes = list(a_shape) slice_sizes[axis] = 1 dnums = lax.GatherDimensionNumbers( offset_dims=tuple(range( q_ndim, len(a_shape) + q_ndim if keepdims else len(a_shape) + q_ndim - 1)), collapsed_slice_dims=() if keepdims else (axis,), start_index_map=(axis,)) low = low[..., None] high = high[..., None] low_value = lax.gather(a, low, dimension_numbers=dnums, slice_sizes=slice_sizes) high_value = lax.gather(a, high, dimension_numbers=dnums, slice_sizes=slice_sizes) if q_ndim == 1: low_weight = lax.broadcast_in_dim(low_weight, low_value.shape, broadcast_dimensions=(0,)) high_weight = lax.broadcast_in_dim(high_weight, high_value.shape, broadcast_dimensions=(0,)) return lax.convert_element_type( lax.add(lax.mul(low_value.astype(q.dtype), low_weight), lax.mul(high_value.astype(q.dtype), high_weight)), a.dtype) @_wraps(onp.percentile) def percentile(a, q, axis=None, out=None, overwrite_input=False, interpolation="linear", keepdims=False): q = true_divide(asarray(q), float32(100.0)) return quantile(a, q, axis=axis, out=out, overwrite_input=overwrite_input, interpolation=interpolation, keepdims=keepdims) @_wraps(onp.median) def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): q = 0.5 return quantile(a, q, axis=axis, out=out, overwrite_input=overwrite_input, keepdims=keepdims) def _astype(arr, dtype): lax._check_user_dtype_supported(dtype, "astype") return lax.convert_element_type(arr, dtype) ### track unimplemented functions def _not_implemented(fun): @_wraps(fun) def wrapped(*args, **kwargs): msg = "Numpy function {} not yet implemented" raise NotImplementedError(msg.format(fun)) return wrapped # Build a set of all unimplemented NumPy functions. for func in get_module_functions(onp): if func.__name__ not in globals(): globals()[func.__name__] = _not_implemented(func) ### add method and operator overloads to arraylike classes # We add operator overloads to DeviceArray and ShapedArray. These method and # operator overloads mainly just forward calls to the corresponding lax_numpy # functions, which can themselves handle instances from any of these classes. def _swap_args(f): return lambda x, y: f(y, x) def _unimplemented_setitem(self, i, x): msg = ("'{}' object does not support item assignment. JAX arrays are " "immutable; perhaps you want jax.ops.index_update or " "jax.ops.index_add instead?") raise TypeError(msg.format(type(self))) _operators = { "getitem": _rewriting_take, "setitem": _unimplemented_setitem, "neg": negative, "pos": positive, "eq": equal, "ne": not_equal, "lt": less, "le": less_equal, "gt": greater, "ge": greater_equal, "abs": abs, "add": add, "radd": add, "sub": subtract, "rsub": _swap_args(subtract), "mul": multiply, "rmul": multiply, "div": divide, "rdiv": _swap_args(divide), "truediv": true_divide, "rtruediv": _swap_args(true_divide), "floordiv": floor_divide, "rfloordiv": _swap_args(floor_divide), "divmod": divmod, "rdivmod": _swap_args(divmod), "mod": mod, "rmod": _swap_args(mod), "pow": power, "rpow": _swap_args(power), "matmul": matmul, "rmatmul": _swap_args(matmul), "and": bitwise_and, "rand": bitwise_and, "or": bitwise_or, "ror": bitwise_or, "xor": bitwise_xor, "rxor": bitwise_xor, "invert": bitwise_not, "lshift": left_shift, "rshift": right_shift, } # These numpy.ndarray methods are just refs to an equivalent numpy function _nondiff_methods = ["all", "any", "argmax", "argmin", "argpartition", "argsort", "nonzero", "searchsorted", "round"] _diff_methods = ["clip", "compress", "conj", "conjugate", "cumprod", "cumsum", "diagonal", "dot", "max", "mean", "min", "prod", "ptp", "ravel", "repeat", "sort", "squeeze", "std", "sum", "swapaxes", "take", "tile", "trace", "transpose", "var"] # Set up operator, method, and property forwarding on Tracer instances containing # ShapedArray avals by following the forwarding conventions for Tracer. # Forward operators using a single-underscore-prefix naming convention: for operator_name, function in _operators.items(): setattr(ShapedArray, "_{}".format(operator_name), staticmethod(function)) # Forward methods and properties using core.aval_method and core.aval_property: for method_name in _nondiff_methods + _diff_methods: setattr(ShapedArray, method_name, core.aval_method(globals()[method_name])) setattr(ShapedArray, "reshape", core.aval_method(_reshape_method)) setattr(ShapedArray, "flatten", core.aval_method(ravel)) setattr(ShapedArray, "T", core.aval_property(transpose)) setattr(ShapedArray, "real", core.aval_property(real)) setattr(ShapedArray, "imag", core.aval_property(imag)) setattr(ShapedArray, "astype", core.aval_method(_astype)) # Forward operators, methods, and properties on DeviceArray to lax_numpy # functions (with no Tracers involved; this forwarding is direct) for operator_name, function in _operators.items(): setattr(DeviceArray, "__{}__".format(operator_name), function) for method_name in _nondiff_methods + _diff_methods: setattr(DeviceArray, method_name, globals()[method_name]) setattr(DeviceArray, "reshape", _reshape_method) setattr(DeviceArray, "flatten", ravel) setattr(DeviceArray, "T", property(transpose)) setattr(DeviceArray, "real", property(real)) setattr(DeviceArray, "imag", property(imag)) setattr(DeviceArray, "astype", _astype) # Extra methods that are handy setattr(ShapedArray, "broadcast", core.aval_method(lax.broadcast)) setattr(ShapedArray, "broadcast_in_dim", core.aval_method(lax.broadcast_in_dim)) setattr(ShapedArray, "split", core.aval_method(split)) setattr(DeviceArray, "broadcast", lax.broadcast) setattr(DeviceArray, "broadcast_in_dim", lax.broadcast_in_dim) setattr(DeviceArray, "split", split) @jit def _unstack(x): if x.ndim == 0: raise ValueError("Argument to _unstack must be non-scalar") return [lax.index_in_dim(x, i, keepdims=False) for i in range(x.shape[0])] setattr(DeviceArray, "_unstack", _unstack)