mirror of
https://github.com/ROCm/jax.git
synced 2025-04-18 12:56:07 +00:00
[ROCM]: Generating pytest html logs from unit-tests.
This commit is contained in:
parent
a63197fed8
commit
ef7694f26a
@ -32,6 +32,6 @@ RUN git clone https://github.com/pyenv/pyenv.git /pyenv
|
||||
ENV PYENV_ROOT /pyenv
|
||||
ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
|
||||
RUN pyenv install $PYTHON_VERSION
|
||||
RUN eval "$(pyenv init -)" && pyenv local ${PYTHON_VERSION} && pip3 install --upgrade --force-reinstall setuptools pip && pip install numpy setuptools build wheel six auditwheel scipy pytest pytest-rerunfailures matplotlib absl-py flatbuffers hypothesis
|
||||
RUN eval "$(pyenv init -)" && pyenv local ${PYTHON_VERSION} && pip3 install --upgrade --force-reinstall setuptools pip && pip install numpy setuptools build wheel six auditwheel scipy pytest pytest-html pytest_html_merger pytest-rerunfailures matplotlib absl-py flatbuffers hypothesis
|
||||
|
||||
|
||||
|
@ -13,20 +13,39 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -eux
|
||||
# run test module with multi-gpu requirements. We currently do not have a way to filter tests.
|
||||
# this issue is also tracked in https://github.com/google/jax/issues/7323
|
||||
cmd=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
|
||||
echo $cmd
|
||||
set -eu
|
||||
|
||||
if [[ $cmd -gt 8 ]]; then
|
||||
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 && python3 -m pytest --reruns 3 -x tests/pmap_test.py
|
||||
elif [[ $cmd -gt 4 ]]; then
|
||||
export HIP_VISIBLE_DEVICES=0,1,2,3 && python3 -m pytest --reruns 3 -x tests/pmap_test.py
|
||||
elif [[ $cmd -gt 2 ]]; then
|
||||
export HIP_VISIBLE_DEVICES=0,1 && python3 -m pytest --reruns 3 -x tests/pmap_test.py
|
||||
else
|
||||
export HIP_VISIBLE_DEVICES=0 && python3 -m pytest --reruns 3 -x tests/pmap_test.py
|
||||
# Function to run tests with specified GPUs
|
||||
run_tests() {
|
||||
local base_dir=./logs
|
||||
local gpu_devices="$1"
|
||||
export HIP_VISIBLE_DEVICES=$gpu_devices
|
||||
python3 -m pytest --html=$base_dir/multi_gpu_pmap_test_log.html --reruns 3 -x tests/pmap_test.py
|
||||
python3 -m pytest --html=$base_dir/multi_gpu_multi_device_test_log.html --reruns 3 -x tests/multi_device_test.py
|
||||
python3 -m pytest_html_merger -i $base_dir/ -o $base_dir/final_compiled_report.html
|
||||
}
|
||||
|
||||
# Check for required commands
|
||||
if ! command -v lspci &> /dev/null; then
|
||||
echo "lspci command not found, aborting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
python3 -m pytest --reruns 3 -x tests/multi_device_test.py
|
||||
if ! command -v python3 &> /dev/null; then
|
||||
echo "Python3 is not available, aborting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# GPU detection and test execution
|
||||
gpu_count=$(lspci | grep -c 'controller.*AMD/ATI')
|
||||
echo "Number of AMD/ATI GPUs detected: $gpu_count"
|
||||
|
||||
if [[ $gpu_count -gt 8 ]]; then
|
||||
run_tests "0,1,2,3,4,5,6,7"
|
||||
elif [[ $gpu_count -gt 4 ]]; then
|
||||
run_tests "0,1,2,3"
|
||||
elif [[ $gpu_count -gt 2 ]]; then
|
||||
run_tests "0,1"
|
||||
else
|
||||
run_tests "0"
|
||||
fi
|
||||
|
@ -22,6 +22,26 @@ from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
GPU_LOCK = threading.Lock()
|
||||
LAST_CODE = 0
|
||||
base_dir="./logs"
|
||||
|
||||
def extract_filename(path):
|
||||
base_name = os.path.basename(path)
|
||||
file_name, _ = os.path.splitext(base_name)
|
||||
return file_name
|
||||
|
||||
def generate_final_report(shell=False, env_vars={}):
|
||||
env = os.environ
|
||||
env = {**env, **env_vars}
|
||||
cmd = ["pytest_html_merger", "-i", '{}'.format(base_dir), "-o", '{}/final_compiled_report.html'.format(base_dir)]
|
||||
result = subprocess.run(cmd,
|
||||
shell=shell,
|
||||
capture_output=True,
|
||||
env=env)
|
||||
if result.returncode != 0:
|
||||
print("FAILED - {}".format(" ".join(cmd)))
|
||||
print(result.stderr.decode())
|
||||
# sys.exit(result.returncode)
|
||||
return result.returncode, result.stderr.decode(), result.stdout.decode()
|
||||
|
||||
|
||||
def run_shell_command(cmd, shell=False, env_vars={}):
|
||||
@ -69,7 +89,8 @@ def run_test(testmodule, gpu_tokens):
|
||||
"HIP_VISIBLE_DEVICES": str(target_gpu),
|
||||
"XLA_PYTHON_CLIENT_ALLOCATOR": "default",
|
||||
}
|
||||
cmd = ["python3", "-m", "pytest", "--reruns", "3", "-x", testmodule]
|
||||
testfile = extract_filename(testmodule)
|
||||
cmd = ["python3", "-m", "pytest", '--html={}/{}_log.html'.format(base_dir, testfile), "--reruns", "3", "-x", testmodule]
|
||||
return_code, stderr, stdout = run_shell_command(cmd, env_vars=env_vars)
|
||||
with GPU_LOCK:
|
||||
gpu_tokens.append(target_gpu)
|
||||
@ -102,6 +123,7 @@ def find_num_gpus():
|
||||
def main(args):
|
||||
all_testmodules = collect_testmodules()
|
||||
run_parallel(all_testmodules, args.parallel)
|
||||
generate_final_report()
|
||||
exit(LAST_CODE)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user