mirror of
https://github.com/ROCm/jax.git
synced 2025-04-14 10:56:06 +00:00

Move the common jaxlib/gpu_plugin_extension into jaxlib/gpu/ Cleanup only, no functional changes intended. PiperOrigin-RevId: 738183402
90 lines
2.9 KiB
C++
90 lines
2.9 KiB
C++
/* Copyright 2024 The JAX Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
#include <cstdint>
|
|
#include <string>
|
|
|
|
#include "nanobind/nanobind.h"
|
|
#include "absl/log/log.h"
|
|
#include "absl/strings/str_cat.h"
|
|
#include "rocm/include/hip/hip_runtime.h"
|
|
#include "jaxlib/gpu/gpu_plugin_extension.h"
|
|
|
|
namespace nb = nanobind;
|
|
|
|
namespace xla {
|
|
namespace {
|
|
std::string ToString(hipError_t result) {
|
|
#define OSTREAM_ROCM_ERROR(__name) \
|
|
case hipError##__name: \
|
|
return "HIP_ERROR_" #__name;
|
|
|
|
switch (result) {
|
|
OSTREAM_ROCM_ERROR(InvalidValue)
|
|
OSTREAM_ROCM_ERROR(OutOfMemory)
|
|
OSTREAM_ROCM_ERROR(NotInitialized)
|
|
OSTREAM_ROCM_ERROR(Deinitialized)
|
|
OSTREAM_ROCM_ERROR(NoDevice)
|
|
OSTREAM_ROCM_ERROR(InvalidDevice)
|
|
OSTREAM_ROCM_ERROR(InvalidImage)
|
|
OSTREAM_ROCM_ERROR(InvalidContext)
|
|
OSTREAM_ROCM_ERROR(InvalidHandle)
|
|
OSTREAM_ROCM_ERROR(NotFound)
|
|
OSTREAM_ROCM_ERROR(NotReady)
|
|
OSTREAM_ROCM_ERROR(NoBinaryForGpu)
|
|
|
|
// Encountered an uncorrectable ECC error during execution.
|
|
OSTREAM_ROCM_ERROR(ECCNotCorrectable)
|
|
|
|
// Load/store on an invalid address. Must reboot all context.
|
|
case 700:
|
|
return "ROCM_ERROR_ILLEGAL_ADDRESS";
|
|
// Passed too many / wrong arguments, too many threads for register count.
|
|
case 701:
|
|
return "ROCM_ERROR_LAUNCH_OUT_OF_RESOURCES";
|
|
|
|
OSTREAM_ROCM_ERROR(ContextAlreadyInUse)
|
|
OSTREAM_ROCM_ERROR(PeerAccessUnsupported)
|
|
OSTREAM_ROCM_ERROR(Unknown) // Unknown internal error to ROCM.
|
|
default:
|
|
return absl::StrCat("hipError_t(", static_cast<int>(result), ")");
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
NB_MODULE(rocm_plugin_extension, m) {
|
|
BuildGpuPluginExtension(m);
|
|
m.def(
|
|
"get_device_ordinal",
|
|
[](std::intptr_t data_value) {
|
|
if (data_value == 0) {
|
|
return 0;
|
|
}
|
|
int device_ordinal;
|
|
void* data_ptr = reinterpret_cast<void*>(data_value);
|
|
hipError_t result =
|
|
hipPointerGetAttribute(static_cast<void*>(&device_ordinal),
|
|
HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
|
|
reinterpret_cast<hipDeviceptr_t>(data_ptr));
|
|
if (result != hipSuccess) {
|
|
LOG(FATAL) << "Not able to get the device_ordinal for ptr: "
|
|
<< data_ptr << ". Error: " << ToString(result);
|
|
}
|
|
return device_ordinal;
|
|
},
|
|
nb::arg("data_value"));
|
|
}
|
|
} // namespace xla
|