mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 06:26:06 +00:00
[Libomptarget] Replace std::vector with llvm::SmallVector
The runtime makes some use of `std::vector` data structures. We should be able to replace these trivially with `llvm::SmallVector` instead. This should allow us to avoid heap allocations in the majority of cases now. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D130927
This commit is contained in:
parent
f9b05e6dad
commit
51bda3a0e7
@ -22,11 +22,11 @@
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "ExclusiveAccess.h"
|
||||
#include "omptarget.h"
|
||||
#include "rtl.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
||||
// Forward declarations.
|
||||
struct RTLInfoTy;
|
||||
@ -484,14 +484,14 @@ struct PluginManager {
|
||||
std::list<std::pair<__tgt_device_image, __tgt_image_info>> Images;
|
||||
|
||||
/// Devices associated with RTLs
|
||||
std::vector<std::unique_ptr<DeviceTy>> Devices;
|
||||
llvm::SmallVector<std::unique_ptr<DeviceTy>> Devices;
|
||||
std::mutex RTLsMtx; ///< For RTLs and Devices
|
||||
|
||||
/// Translation table retreived from the binary
|
||||
HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable;
|
||||
std::mutex TrlTblMtx; ///< For Translation Table
|
||||
/// Host offload entries in order of image registration
|
||||
std::vector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
|
||||
llvm::SmallVector<__tgt_offload_entry *> HostEntriesBeginRegistrationOrder;
|
||||
|
||||
/// Map from ptrs on the host to an entry in the Translation Table
|
||||
HostPtrToTableMapTy HostPtrToTableMap;
|
||||
|
@ -14,11 +14,12 @@
|
||||
#define _OMPTARGET_RTL_H
|
||||
|
||||
#include "omptarget.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// Forward declarations.
|
||||
struct DeviceTy;
|
||||
@ -137,7 +138,7 @@ struct RTLsTy {
|
||||
|
||||
// Array of pointers to the detected runtime libraries that have compatible
|
||||
// binaries.
|
||||
std::vector<RTLInfoTy *> UsedRTLs;
|
||||
llvm::SmallVector<RTLInfoTy *> UsedRTLs;
|
||||
|
||||
int64_t RequiresFlags = OMP_REQ_UNDEFINED;
|
||||
|
||||
@ -172,10 +173,12 @@ struct TranslationTable {
|
||||
__tgt_target_table HostTable;
|
||||
|
||||
// Image assigned to a given device.
|
||||
std::vector<__tgt_device_image *> TargetsImages; // One image per device ID.
|
||||
llvm::SmallVector<__tgt_device_image *>
|
||||
TargetsImages; // One image per device ID.
|
||||
|
||||
// Table of entry points or NULL if it was not already computed.
|
||||
std::vector<__tgt_target_table *> TargetsTable; // One table per device ID.
|
||||
llvm::SmallVector<__tgt_target_table *>
|
||||
TargetsTable; // One table per device ID.
|
||||
};
|
||||
typedef std::map<__tgt_offload_entry *, TranslationTable>
|
||||
HostEntriesBeginToTransTableTy;
|
||||
|
@ -173,8 +173,8 @@ EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId,
|
||||
DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", DeviceId,
|
||||
LoopTripcount);
|
||||
PM->TblMapMtx.lock();
|
||||
PM->Devices[DeviceId]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL),
|
||||
LoopTripcount);
|
||||
PM->Devices[DeviceId]->LoopTripCnt[__kmpc_global_thread_num(NULL)] =
|
||||
LoopTripcount;
|
||||
PM->TblMapMtx.unlock();
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,8 @@
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
using llvm::SmallVector;
|
||||
|
||||
int AsyncInfoTy::synchronize() {
|
||||
int Result = OFFLOAD_SUCCESS;
|
||||
if (AsyncInfo.Queue) {
|
||||
@ -384,11 +386,11 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
|
||||
// Construct new arrays for args_base, args, arg_sizes and arg_types
|
||||
// using the information in MapperComponents and call the corresponding
|
||||
// targetData* function using these new arrays.
|
||||
std::vector<void *> MapperArgsBase(MapperComponents.Components.size());
|
||||
std::vector<void *> MapperArgs(MapperComponents.Components.size());
|
||||
std::vector<int64_t> MapperArgSizes(MapperComponents.Components.size());
|
||||
std::vector<int64_t> MapperArgTypes(MapperComponents.Components.size());
|
||||
std::vector<void *> MapperArgNames(MapperComponents.Components.size());
|
||||
SmallVector<void *> MapperArgsBase(MapperComponents.Components.size());
|
||||
SmallVector<void *> MapperArgs(MapperComponents.Components.size());
|
||||
SmallVector<int64_t> MapperArgSizes(MapperComponents.Components.size());
|
||||
SmallVector<int64_t> MapperArgTypes(MapperComponents.Components.size());
|
||||
SmallVector<void *> MapperArgNames(MapperComponents.Components.size());
|
||||
|
||||
for (unsigned I = 0, E = MapperComponents.Components.size(); I < E; ++I) {
|
||||
auto &C = MapperComponents.Components[I];
|
||||
@ -679,7 +681,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
|
||||
int Ret;
|
||||
std::vector<PostProcessingInfo> PostProcessingPtrs;
|
||||
SmallVector<PostProcessingInfo> PostProcessingPtrs;
|
||||
void *FromMapperBase = nullptr;
|
||||
// process each input.
|
||||
for (int32_t I = ArgNum - 1; I >= 0; --I) {
|
||||
@ -883,7 +885,9 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
// If the struct is to be deallocated, remove the shadow entry.
|
||||
if (Info.DelEntry) {
|
||||
DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first));
|
||||
Itr = Device.ShadowPtrMap.erase(Itr);
|
||||
auto OldItr = Itr;
|
||||
Itr++;
|
||||
Device.ShadowPtrMap.erase(OldItr);
|
||||
} else {
|
||||
++Itr;
|
||||
}
|
||||
@ -1171,12 +1175,12 @@ class PrivateArgumentManagerTy {
|
||||
};
|
||||
|
||||
/// A vector of target pointers for all private arguments
|
||||
std::vector<void *> TgtPtrs;
|
||||
SmallVector<void *> TgtPtrs;
|
||||
|
||||
/// A vector of information of all first-private arguments to be packed
|
||||
std::vector<FirstPrivateArgInfoTy> FirstPrivateArgInfo;
|
||||
SmallVector<FirstPrivateArgInfoTy> FirstPrivateArgInfo;
|
||||
/// Host buffer for all arguments to be packed
|
||||
std::vector<char> FirstPrivateArgBuffer;
|
||||
SmallVector<char> FirstPrivateArgBuffer;
|
||||
/// The total size of all arguments to be packed
|
||||
int64_t FirstPrivateArgSize = 0;
|
||||
|
||||
@ -1255,7 +1259,7 @@ public:
|
||||
|
||||
/// Pack first-private arguments, replace place holder pointers in \p TgtArgs,
|
||||
/// and start the transfer.
|
||||
int packAndTransfer(std::vector<void *> &TgtArgs) {
|
||||
int packAndTransfer(SmallVector<void *> &TgtArgs) {
|
||||
if (!FirstPrivateArgInfo.empty()) {
|
||||
assert(FirstPrivateArgSize != 0 &&
|
||||
"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty");
|
||||
@ -1323,8 +1327,8 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
int32_t ArgNum, void **ArgBases, void **Args,
|
||||
int64_t *ArgSizes, int64_t *ArgTypes,
|
||||
map_var_info_t *ArgNames, void **ArgMappers,
|
||||
std::vector<void *> &TgtArgs,
|
||||
std::vector<ptrdiff_t> &TgtOffsets,
|
||||
SmallVector<void *> &TgtArgs,
|
||||
SmallVector<ptrdiff_t> &TgtOffsets,
|
||||
PrivateArgumentManagerTy &PrivateArgumentManager,
|
||||
AsyncInfoTy &AsyncInfo) {
|
||||
TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc);
|
||||
@ -1337,7 +1341,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
}
|
||||
|
||||
// List of (first-)private arrays allocated for this target region
|
||||
std::vector<int> TgtArgsPositions(ArgNum, -1);
|
||||
SmallVector<int> TgtArgsPositions(ArgNum, -1);
|
||||
|
||||
for (int32_t I = 0; I < ArgNum; ++I) {
|
||||
if (!(ArgTypes[I] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
|
||||
@ -1521,8 +1525,8 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
||||
// begin addresses, not bases. That's why we pass args and offsets as two
|
||||
// separate entities so that each plugin can do what it needs. This behavior
|
||||
// was introdued via https://reviews.llvm.org/D33028 and commit 1546d319244c.
|
||||
std::vector<void *> TgtArgs;
|
||||
std::vector<ptrdiff_t> TgtOffsets;
|
||||
SmallVector<void *> TgtArgs;
|
||||
SmallVector<ptrdiff_t> TgtOffsets;
|
||||
|
||||
PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
|
||||
|
||||
@ -1547,11 +1551,11 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
|
||||
TIMESCOPE_WITH_NAME_AND_IDENT(
|
||||
IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", Loc);
|
||||
if (IsTeamConstruct)
|
||||
Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
|
||||
Ret = Device.runTeamRegion(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
|
||||
TgtArgs.size(), TeamNum, ThreadLimit,
|
||||
Tripcount, AsyncInfo);
|
||||
else
|
||||
Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0],
|
||||
Ret = Device.runRegion(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
|
||||
TgtArgs.size(), AsyncInfo);
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ struct MapComponentInfoTy {
|
||||
// components are dynamically decided, so we utilize C++ STL vector
|
||||
// implementation here.
|
||||
struct MapperComponentsTy {
|
||||
std::vector<MapComponentInfoTy> Components;
|
||||
llvm::SmallVector<MapComponentInfoTy> Components;
|
||||
int32_t size() { return Components.size(); }
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user