Merge pull request #27389 from vfdev-5:add-tsan-ft-ci-job-314

PiperOrigin-RevId: 743171768
This commit is contained in:
jax authors 2025-04-02 09:40:31 -07:00
commit c281907fd8
9 changed files with 296 additions and 30 deletions

View File

@ -21,6 +21,10 @@ race:_PyUnicode_InternImmortal
# Fixed in Python 3.14, but not backported to 3.13.
race_top:PyMember_GetOne
# https://github.com/python/cpython/issues/131680
# Fixed in Python 3.14, but not backported to 3.13.
race_top: new_reference
# https://github.com/python/cpython/issues/129748
race:mi_block_set_nextx

View File

@ -0,0 +1,26 @@
# false-positive caused because we haven't tsan-instrumented libgcc_s. Multiple threads
# are racing on a call to __register_frame_info(), but that function appears to be correctly locked internally.
race:llvm::RuntimeDyldELF::registerEHFrames
# https://github.com/openxla/xla/issues/20686
race:dnnl_sgemm
# https://github.com/python/cpython/issues/128050
race:partial_vectorcall_fallback
# Likely only happens when the process is crashing.
race:dump_traceback
# https://github.com/python/cpython/issues/129748
race:mi_block_set_nextx
# https://github.com/python/cpython/issues/128130
race_top:run_eval_code_obj
# Races because the LAPACK and BLAS in our scipy isn't TSAN instrumented.
race:heevd_ffi
race:gesdd_ffi
race:dscal_k_
race:scal_k_
race:gemm_beta
race:gemm_oncopy

View File

@ -22,6 +22,16 @@ jobs:
image: index.docker.io/library/ubuntu@sha256:b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b # ratchet:ubuntu:24.04
strategy:
fail-fast: false
matrix:
include:
- name-prefix: "with 3.13"
python-version: "3.13"
github_branch: "3.13"
requirements_lock_name: "requirements_lock_3_13_ft"
- name-prefix: "with 3.14"
python-version: "3.14"
github_branch: "main"
requirements_lock_name: "requirements_lock_3_14_ft"
defaults:
run:
shell: bash -l {0}
@ -44,22 +54,33 @@ jobs:
with:
repository: python/cpython
path: cpython
ref: "3.13"
ref: ${{ matrix.github_branch }}
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: numpy/numpy
path: numpy
submodules: true
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
if: ${{ matrix.python-version == '3.14' }}
with:
repository: scipy/scipy
path: scipy
submodules: true
- name: Restore cached TSAN CPython
- name: Get year & week number
id: get-date
run: echo "date=$(/bin/date "+%Y-%U")" >> $GITHUB_OUTPUT
shell: bash -l {0}
- name: Restore cached TSAN CPython ${{ matrix.python-version }}
id: cache-cpython-tsan-restore
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: |
./python-tsan.tgz
key: ${{ runner.os }}-cpython-tsan-${{ hashFiles('cpython/configure.ac') }}
key: ${{ runner.os }}-cpython-tsan-${{ matrix.python-version }}-${{ steps.get-date.outputs.date }}
- name: Build CPython with enabled TSAN
- name: Build TSAN CPython ${{ matrix.python-version }}
if: steps.cache-cpython-tsan-restore.outputs.cache-hit != 'true'
run: |
cd cpython
@ -73,19 +94,14 @@ jobs:
# Create archive to be used with bazel as hermetic python:
cd ${GITHUB_WORKSPACE} && tar -czpf python-tsan.tgz cpython-tsan
- name: Save TSAN CPython
- name: Save TSAN CPython ${{ matrix.python-version }}
id: cache-cpython-tsan-save
if: steps.cache-cpython-tsan-restore.outputs.cache-hit != 'true'
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: |
./python-tsan.tgz
key: ${{ runner.os }}-cpython-tsan-${{ hashFiles('cpython/configure.ac') }}
- name: Get year & week number
id: get-date
run: echo "date=$(/bin/date "+%Y-%U")" >> $GITHUB_OUTPUT
shell: bash -l {0}
key: ${{ runner.os }}-cpython-tsan-${{ matrix.python-version }}-${{ steps.get-date.outputs.date }}
- name: Restore cached TSAN Numpy
id: cache-numpy-tsan-restore
@ -93,7 +109,7 @@ jobs:
with:
path: |
./wheelhouse
key: ${{ runner.os }}-numpy-tsan-${{ hashFiles('numpy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
key: ${{ runner.os }}-numpy-tsan-${{ matrix.python-version }}-${{ hashFiles('numpy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
- name: Build TSAN Numpy wheel
if: steps.cache-numpy-tsan-restore.outputs.cache-hit != 'true'
@ -114,7 +130,8 @@ jobs:
python3 -m pip install uv~=0.5.30
# Make sure to install a compatible Cython version (master branch is best for now)
python3 -m uv pip install -r requirements/build_requirements.txt -U git+https://github.com/cython/cython
NO_CYTHON_COMPILE=true python3 -m uv pip install -U git+https://github.com/cython/cython
python3 -m uv pip install -r requirements/build_requirements.txt
CC=clang-18 CXX=clang++-18 python3 -m pip wheel --wheel-dir dist -v . --no-build-isolation -Csetup-args=-Db_sanitize=thread -Csetup-args=-Dbuildtype=debugoptimized
@ -147,7 +164,83 @@ jobs:
with:
path: |
./wheelhouse
key: ${{ runner.os }}-numpy-tsan-${{ hashFiles('numpy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
key: ${{ runner.os }}-numpy-tsan-${{ matrix.python-version }}-${{ hashFiles('numpy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
- name: Restore cached Scipy
if: ${{ matrix.python-version == '3.14' }}
id: cache-scipy-restore
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: |
./wheelhouse
key: ${{ runner.os }}-scipy-${{ matrix.python-version }}-${{ hashFiles('scipy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
- name: Build Scipy wheel
if: ${{ steps.cache-scipy-restore.outputs.cache-hit != 'true' && matrix.python-version == '3.14' }}
run: |
# Install scipy dependencies:
apt-get install -y gfortran libopenblas-dev liblapack-dev pkg-config --no-install-recommends
cd scipy
# If we restored cpython from cache, we need to get python interpreter from python-tsan.tgz
if [ ! -d ${GITHUB_WORKSPACE}/cpython-tsan/bin/ ]; then
echo "Extract cpython from python-tsan.tgz"
pushd .
ls ${GITHUB_WORKSPACE}/python-tsan.tgz
cd ${GITHUB_WORKSPACE} && tar -xzf python-tsan.tgz
ls ${GITHUB_WORKSPACE}/cpython-tsan/bin/
popd
fi
export PATH=${GITHUB_WORKSPACE}/cpython-tsan/bin/:$PATH
python3 -m pip install uv~=0.5.30
# Make sure to install a compatible Cython version (master branch is best for now)
NO_CYTHON_COMPILE=true python3 -m uv pip install -U git+https://github.com/cython/cython
python3 -m uv pip install -U --pre numpy --extra-index-url file://${GITHUB_WORKSPACE}/wheelhouse/
python3 -m uv pip install pythran pybind11 meson-python ninja
python3 -m uv pip list | grep -E "(numpy|pythran|cython|pybind11)"
export CC=clang-18
export CXX=clang++-18
python3 -m pip wheel --wheel-dir dist -vvv . --no-build-isolation --no-deps -Csetup-args=-Dbuildtype=debugoptimized
python3 -m uv pip list | grep -E "(numpy|pythran|cython|pybind11)"
# Create simple index and copy the wheel
mkdir -p ${GITHUB_WORKSPACE}/wheelhouse/scipy
scipy_whl_name=($(cd dist && ls scipy*.whl))
if [ -z "${scipy_whl_name}" ]; then exit 1; fi
echo "Built TSAN Scipy wheel: ${scipy_whl_name}"
cp dist/${scipy_whl_name} ${GITHUB_WORKSPACE}/wheelhouse/scipy
# Recreate wheelhouse index with Numpy and Scipy
cat << EOF > ${GITHUB_WORKSPACE}/wheelhouse/index.html
<!DOCTYPE html><html><body>
<a href="numpy">numpy></a></br>
<a href="scipy">scipy></a></br>
</body></html>
EOF
cat << EOF > ${GITHUB_WORKSPACE}/wheelhouse/scipy/index.html
<!DOCTYPE html><html><body>
<a href="${scipy_whl_name}">${scipy_whl_name}</a></br>
</body></html>
EOF
- name: Save Scipy wheel
id: cache-scipy-save
if: ${{ steps.cache-scipy-restore.outputs.cache-hit != 'true' && matrix.python-version == '3.14' }}
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: |
./wheelhouse
key: ${{ runner.os }}-scipy-${{ matrix.python-version }}-${{ hashFiles('scipy/pyproject.toml') }}-${{ steps.get-date.outputs.date }}
- name: Build Jax and run tests
timeout-minutes: 120
@ -164,7 +257,7 @@ jobs:
python3 -VV
python3 build/build.py build --configure_only \
--python_version=3.13-ft \
--python_version=${{ matrix.python-version }}-ft \
--bazel_options=--repo_env=HERMETIC_PYTHON_URL="file://${GITHUB_WORKSPACE}/python-tsan.tgz" \
--bazel_options=--repo_env=HERMETIC_PYTHON_SHA256=${PYTHON_SHA256} \
--bazel_options=--repo_env=HERMETIC_PYTHON_PREFIX="cpython-tsan/" \
@ -174,18 +267,32 @@ jobs:
--bazel_options=--copt=-g \
--clang_path=/usr/bin/clang-18
# Patch build/requirements_lock_3_13_ft.txt to use TSAN instrumented NumPy
sed -i "s|+--extra-index-url.*|+--extra-index-url file://${GITHUB_WORKSPACE}/wheelhouse/|" .github/workflows/requirements_lock_3_13_ft.patch
cat .github/workflows/requirements_lock_3_13_ft.patch
git apply .github/workflows/requirements_lock_3_13_ft.patch || exit 1
if [ "${{ matrix.python-version }}" == "3.13" ]; then
# Patch build/requirements_lock_3_13_ft.txt to use TSAN instrumented NumPy
# Display the content for debugging in logs
cat build/requirements_lock_3_13_ft.txt | head -15
# Check the patch
cat build/requirements_lock_3_13_ft.txt | head -15 | grep -E "(--pre|.*${GITHUB_WORKSPACE}/wheelhouse/|numpy)"
if [ "$?" == "1" ]; then echo "Could not find the patch in the requirements_lock_3_13_ft.txt"; exit 1; fi
cat build/requirements_lock_3_13_ft.txt | grep -E "(numpy==)"
if [ "$?" == "0" ]; then "Found original numpy dependency in the requirements_lock_3_13_ft.txt"; exit 1; fi
sed -i "s|+--extra-index-url.*|+--extra-index-url file://${GITHUB_WORKSPACE}/wheelhouse/|" .github/workflows/${{ matrix.requirements_lock_name }}.patch
cat .github/workflows/${{ matrix.requirements_lock_name }}.patch
git apply .github/workflows/${{ matrix.requirements_lock_name }}.patch || exit 1
# Display the content for debugging in logs
cat build/${{ matrix.requirements_lock_name }}.txt | head -15
# Check the patch
cat build/${{ matrix.requirements_lock_name }}.txt | head -15 | grep -E "(--pre|.*${GITHUB_WORKSPACE}/wheelhouse/|numpy)"
if [ "$?" == "1" ]; then echo "Could not find the patch in the ${{ matrix.requirements_lock_name }}.txt"; exit 1; fi
cat build/${{ matrix.requirements_lock_name }}.txt | grep -E "(numpy==)"
if [ "$?" == "0" ]; then "Found original numpy dependency in the ${{ matrix.requirements_lock_name }}.txt"; exit 1; fi
else
# Patch build/requirements_lock_3_14_ft.txt to use TSAN instrumented NumPy and Scipy
sed -i "s|--extra-index-url.*|--extra-index-url file://${GITHUB_WORKSPACE}/wheelhouse/|" build/${{ matrix.requirements_lock_name }}.txt
# We should install jpeg dev package to be able to build Pillow from source:
apt-get install -y libjpeg-dev --no-install-recommends
# Install scipy runtime dependencies (in case we restore scipy wheel from cache):
apt-get install -y libopenblas-dev liblapack-dev --no-install-recommends
fi
echo "JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES"
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64"
@ -201,13 +308,18 @@ jobs:
# Check numpy version
./bazel cquery @pypi_numpy//:* | grep whl
if [ "${{ matrix.python-version }}" == "3.14" ]; then
# Check scipy version
./bazel cquery @pypi_scipy//:* | grep whl
fi
# Build JAX and run tests
./bazel test \
--test_env=JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES \
--test_env=JAX_ENABLE_X64=$JAX_ENABLE_X64 \
--test_env=JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS \
--test_env=PYTHON_GIL=0 \
--test_env=TSAN_OPTIONS=halt_on_error=1,suppressions=$PWD/.github/workflows/tsan-suppressions.txt \
--test_env=TSAN_OPTIONS=halt_on_error=1,suppressions=$PWD/.github/workflows/tsan-suppressions_${{ matrix.python-version }}.txt \
--test_env=JAX_TEST_NUM_THREADS=8 \
--test_output=errors \
--local_test_jobs=32 \

View File

@ -14,6 +14,7 @@ python_init_repositories(
"3.12": "//build:requirements_lock_3_12.txt",
"3.13": "//build:requirements_lock_3_13.txt",
"3.13-ft": "//build:requirements_lock_3_13_ft.txt",
"3.14-ft": "//build:requirements_lock_3_14_ft.txt",
},
local_wheel_inclusion_list = [
"jax-*",

View File

@ -496,6 +496,7 @@ async def main():
if args.use_clang:
clang_path = args.clang_path or utils.get_clang_path_or_exit()
clang_major_version = utils.get_clang_major_version(clang_path)
clangpp_path = utils.get_clangpp_path(clang_path)
logging.debug(
"Using Clang as the compiler, clang path: %s, clang version: %s",
clang_path,
@ -505,6 +506,7 @@ async def main():
# Use double quotes around clang path to avoid path issues on Windows.
wheel_build_command_base.append(f"--action_env=CLANG_COMPILER_PATH=\"{clang_path}\"")
wheel_build_command_base.append(f"--repo_env=CC=\"{clang_path}\"")
wheel_build_command_base.append(f"--repo_env=CXX=\"{clangpp_path}\"")
wheel_build_command_base.append(f"--repo_env=BAZEL_COMPILER=\"{clang_path}\"")
if clang_major_version >= 16:

View File

@ -658,7 +658,7 @@ zipp==3.21.0 \
--hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \
--hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931
# via etils
# python 3.13t can compile 0.23.0
# python 3.13t can't compile 0.23.0
# due to https://github.com/indygreg/python-zstandard/issues/231
# zstandard==0.23.0 \
# --hash=sha256:034b88913ecc1b097f528e42b539453fa82c3557e414b3de9d5632c80439a473 \

View File

@ -0,0 +1,107 @@
--pre
--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
numpy
--pre
--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
scipy
absl-py==2.1.0
attrs==24.3.0
auditwheel==6.2.0
build==1.2.2.post1
cloudpickle==3.1.1 # version 3.1.0 leads to recursion error
colorama==0.4.6
contourpy==1.3.1
cycler==0.12.1
etils[epath,epy]==1.11.0
execnet==2.1.1
filelock==3.16.1
flatbuffers==24.12.23
fonttools==4.56.0
fsspec==2024.12.0
hypothesis==6.123.9
importlib-resources==6.5.2
iniconfig==2.0.0
kiwisolver==1.4.8
markdown-it-py==3.0.0
matplotlib==3.10.1
mdurl==0.1.2
ml-dtypes==0.5.1
mpmath==1.3.0
nvidia-cublas-cu12==12.8.3.14 ; sys_platform == "linux"
nvidia-cuda-cupti-cu12==12.8.57 ; sys_platform == "linux"
nvidia-cuda-nvcc-cu12==12.8.61 ; sys_platform == "linux"
nvidia-cuda-runtime-cu12==12.8.57 ; sys_platform == "linux"
nvidia-cudnn-cu12==9.7.1.26 ; sys_platform == "linux"
nvidia-cufft-cu12==11.3.3.41 ; sys_platform == "linux"
nvidia-cusolver-cu12==11.7.2.55 ; sys_platform == "linux"
nvidia-cusparse-cu12==12.5.7.53 ; sys_platform == "linux"
nvidia-nccl-cu12==2.25.1 ; sys_platform == "linux"
nvidia-nvjitlink-cu12==12.8.61 ; sys_platform == "linux"
opt-einsum==3.4.0
packaging==24.2
pillow==11.1.0
pluggy==1.5.0
portpicker==1.6.0
psutil==6.1.1
pyelftools==0.31
pygments==2.19.1
pyparsing==3.2.2 # version 3.2.1 fails with SyntaxError(originally SyntaxWarning): 'return' in a 'finally' block in pyparsing/core.py", line 5716
pyproject-hooks==1.2.0
pytest==8.3.4
pytest-xdist==3.6.1
python-dateutil==2.9.0.post0
rich==13.9.4
six==1.17.0
sortedcontainers==2.4.0
typing-extensions==4.12.2
wheel==0.45.1
zipp==3.21.0
# python 3.14t can't compile 0.23.0
# due to https://github.com/indygreg/python-zstandard/issues/231
# zstandard==0.23.0
setuptools==70.3.0

View File

@ -202,6 +202,20 @@ def get_clang_major_version(clang_path):
return major_version
def get_clangpp_path(clang_path):
clang_path = pathlib.Path(clang_path)
clang_exec_name = clang_path.stem
clangpp_exec_name = clang_exec_name
if "clang++" not in clang_exec_name:
clangpp_exec_name = clang_exec_name.replace("clang", "clang++")
clangpp_path = clang_path.parent / clangpp_exec_name
if not clangpp_path.exists():
raise FileNotFoundError(
f"Failed to get clang++ path from clang path: '{clang_path!s}'. "
f"Tried the path: '{clangpp_path!s}'."
)
return str(clangpp_path)
def get_gcc_major_version(gcc_path: str):
gcc_version_proc = subprocess.run(
[gcc_path, "-dumpversion"],

View File

@ -76,9 +76,9 @@ _CPU_PYPI_WHEEL_DEPS = [
"@pypi_jaxlib//:pkg",
]
# TODO(vam): remove this once zstandard builds against Python 3.13
# TODO(vam): remove this once zstandard builds against Python >3.13
def get_zstandard():
if HERMETIC_PYTHON_VERSION == "3.13" or HERMETIC_PYTHON_VERSION == "3.13-ft":
if HERMETIC_PYTHON_VERSION in ("3.13", "3.13-ft", "3.14", "3.14-ft"):
return []
return ["@pypi_zstandard//:pkg"]