[OpenMP] Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY

* Add KMP_CPU_EQUAL and KMP_CPU_ISEMPTY to affinity mask API

* Add printout of leader to hardware thread dump

* Allow OMP_PLACES to restrict fullMask

This change fixes an issue with the OMP_PLACES=resource(#) syntax.
Before this change, specifying the number of resources did NOT change
the default number of threads created by the runtime. e.g.,
OMP_PLACES=cores(2) would still create __kmp_avail_proc number of
threads. After this change, the fullMask and __kmp_avail_proc are
modified if necessary so that the final place list dictates which
resources are available and how thus, how many threads are created by
default.

* Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY

For OMP_PLACES, two new features are added:
  1) OMP_PLACES=cores:<attribute> where <attribute> is either
     intel_atom, intel_core, or eff# where # is 0 - number of core
     efficiencies-1. This syntax also supports the optional (#)
     number selection of resources.
  2) OMP_PLACES=core_types|core_effs where this setting will create
     the number of core_types (or core_effs|core_efficiencies).

For KMP_AFFINITY, the granularity setting is expanded to include two new
keywords: core_type, and core_eff (or core_efficiency). This will set
the granularity to include all cores with a particular core type (or
efficiency). e.g., KMP_AFFINITY=granularity=core_type,compact will
create threads which can float across a single core type.

Differential Revision: https://reviews.llvm.org/D154547
This commit is contained in:
Jonathan Peyton 2023-07-05 12:35:57 -05:00
parent 4e429fd2a7
commit b34c7d8c8e
7 changed files with 473 additions and 128 deletions

View File

@ -480,6 +480,8 @@ AffHWSubsetAllFiltered "KMP_HW_SUBSET ignored: all hardware resources woul
AffHWSubsetAttrsNonHybrid "KMP_HW_SUBSET ignored: Too many attributes specified. This machine is not a hybrid architecutre."
AffHWSubsetIgnoringAttr "KMP_HW_SUBSET: ignoring %1$s attribute. This machine is not a hybrid architecutre."
TargetMemNotAvailable "Target memory not available, will use default allocator."
AffIgnoringNonHybrid "%1$s ignored: This machine is not a hybrid architecutre. Using \"%2$s\" instead."
AffIgnoringNotAvailable "%1$s ignored: %2$s is not available. Using \"%3$s\" instead."
# --------------------------------------------------------------------------------------------------
-*- HINTS -*-

View File

@ -690,10 +690,12 @@ extern size_t __kmp_affin_mask_size;
#define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
#define KMP_CPU_CLR(i, mask) (mask)->clear(i)
#define KMP_CPU_ZERO(mask) (mask)->zero()
#define KMP_CPU_ISEMPTY(mask) (mask)->empty()
#define KMP_CPU_COPY(dest, src) (dest)->copy(src)
#define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
#define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
#define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
#define KMP_CPU_EQUAL(dest, src) (dest)->is_equal(src)
#define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
#define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
#define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
@ -730,6 +732,8 @@ public:
virtual void clear(int i) {}
// Zero out entire mask
virtual void zero() {}
// Check whether mask is empty
virtual bool empty() const { return true; }
// Copy src into this mask
virtual void copy(const Mask *src) {}
// this &= rhs
@ -738,6 +742,8 @@ public:
virtual void bitwise_or(const Mask *rhs) {}
// this = ~this
virtual void bitwise_not() {}
// this == rhs
virtual bool is_equal(const Mask *rhs) const { return false; }
// API for iterating over an affinity mask
// for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
virtual int begin() const { return 0; }
@ -866,7 +872,10 @@ typedef struct kmp_affinity_flags_t {
unsigned respect : 2;
unsigned reset : 1;
unsigned initialized : 1;
unsigned reserved : 25;
unsigned core_types_gran : 1;
unsigned core_effs_gran : 1;
unsigned omp_places : 1;
unsigned reserved : 22;
} kmp_affinity_flags_t;
KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);
@ -895,6 +904,7 @@ typedef struct kmp_affinity_t {
enum affinity_type type;
kmp_hw_t gran;
int gran_levels;
kmp_affinity_attrs_t core_attr_gran;
int compact;
int offset;
kmp_affinity_flags_t flags;
@ -909,9 +919,11 @@ typedef struct kmp_affinity_t {
#define KMP_AFFINITY_INIT(env) \
{ \
nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \
{TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0, \
nullptr, nullptr, nullptr, 0, nullptr, env \
nullptr, affinity_default, KMP_HW_UNKNOWN, -1, KMP_AFFINITY_ATTRS_UNKNOWN, \
0, 0, \
{TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE, \
FALSE, FALSE, FALSE}, \
0, nullptr, nullptr, nullptr, 0, nullptr, env \
}
extern enum affinity_top_method __kmp_affinity_top_method;

View File

@ -38,6 +38,43 @@ static hierarchy_info machine_hierarchy;
void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
#if KMP_AFFINITY_SUPPORTED
// Helper class to see if place lists further restrict the fullMask
class kmp_full_mask_modifier_t {
kmp_affin_mask_t *mask;
public:
kmp_full_mask_modifier_t() {
KMP_CPU_ALLOC(mask);
KMP_CPU_ZERO(mask);
}
~kmp_full_mask_modifier_t() {
KMP_CPU_FREE(mask);
mask = nullptr;
}
void include(const kmp_affin_mask_t *other) { KMP_CPU_UNION(mask, other); }
// If the new full mask is different from the current full mask,
// then switch them. Returns true if full mask was affected, false otherwise.
bool restrict_to_mask() {
// See if the new mask further restricts or changes the full mask
if (KMP_CPU_EQUAL(__kmp_affin_fullMask, mask) || KMP_CPU_ISEMPTY(mask))
return false;
return __kmp_topology->restrict_to_mask(mask);
}
};
static inline const char *
__kmp_get_affinity_env_var(const kmp_affinity_t &affinity,
bool for_binding = false) {
if (affinity.flags.omp_places) {
if (for_binding)
return "OMP_PROC_BIND";
return "OMP_PLACES";
}
return affinity.env_var;
}
#endif // KMP_AFFINITY_SUPPORTED
void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
kmp_uint32 depth;
// The test below is true if affinity is available, but set to "none". Need to
@ -207,6 +244,8 @@ void kmp_hw_thread_t::print() const {
if (attrs.is_core_eff_valid())
printf(" (eff=%d)", attrs.get_core_eff());
}
if (leader)
printf(" (leader)");
printf("\n");
}
@ -797,7 +836,40 @@ void kmp_topology_t::print(const char *env_var) const {
#if KMP_AFFINITY_SUPPORTED
void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const {
const char *env_var = affinity.env_var;
const char *env_var = __kmp_get_affinity_env_var(affinity);
// If requested hybrid CPU attributes for granularity (either OMP_PLACES or
// KMP_AFFINITY), but none exist, then reset granularity and have below method
// select a granularity and warn user.
if (!__kmp_is_hybrid_cpu()) {
if (affinity.core_attr_gran.valid) {
// OMP_PLACES with cores:<attribute> but non-hybrid arch, use cores
// instead
KMP_AFF_WARNING(
affinity, AffIgnoringNonHybrid, env_var,
__kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
affinity.gran = KMP_HW_CORE;
affinity.gran_levels = -1;
affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
} else if (affinity.flags.core_types_gran ||
affinity.flags.core_effs_gran) {
// OMP_PLACES=core_types|core_effs but non-hybrid, use cores instead
if (affinity.flags.omp_places) {
KMP_AFF_WARNING(
affinity, AffIgnoringNonHybrid, env_var,
__kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
} else {
// KMP_AFFINITY=granularity=core_type|core_eff,...
KMP_AFF_WARNING(affinity, AffGranularityBad, env_var,
"Intel(R) Hybrid Technology core attribute",
__kmp_hw_get_catalog_string(KMP_HW_CORE));
}
affinity.gran = KMP_HW_CORE;
affinity.gran_levels = -1;
affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
}
}
// Set the number of affinity granularity levels
if (affinity.gran_levels < 0) {
kmp_hw_t gran_type = get_equivalent_type(affinity.gran);
@ -937,6 +1009,7 @@ public:
}
};
#if KMP_AFFINITY_SUPPORTED
static kmp_str_buf_t *
__kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
bool plural) {
@ -952,6 +1025,41 @@ __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
return buf;
}
bool kmp_topology_t::restrict_to_mask(const kmp_affin_mask_t *mask) {
// Apply the filter
bool affected;
int new_index = 0;
for (int i = 0; i < num_hw_threads; ++i) {
int os_id = hw_threads[i].os_id;
if (KMP_CPU_ISSET(os_id, mask)) {
if (i != new_index)
hw_threads[new_index] = hw_threads[i];
new_index++;
} else {
KMP_CPU_CLR(os_id, __kmp_affin_fullMask);
__kmp_avail_proc--;
}
}
KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
affected = (num_hw_threads != new_index);
num_hw_threads = new_index;
// Post hardware subset canonicalization
if (affected) {
_gather_enumeration_information();
_discover_uniformity();
_set_globals();
_set_last_level_cache();
#if KMP_OS_WINDOWS
// Copy filtered full mask if topology has single processor group
if (__kmp_num_proc_groups <= 1)
#endif
__kmp_affin_origMask->copy(__kmp_affin_fullMask);
}
return affected;
}
// Apply the KMP_HW_SUBSET envirable to the topology
// Returns true if KMP_HW_SUBSET filtered any processors
// otherwise, returns false
@ -1156,7 +1264,9 @@ bool kmp_topology_t::filter_hw_subset() {
// Determine which hardware threads should be filtered.
int num_filtered = 0;
bool *filtered = (bool *)__kmp_allocate(sizeof(bool) * num_hw_threads);
kmp_affin_mask_t *filtered_mask;
KMP_CPU_ALLOC(filtered_mask);
KMP_CPU_COPY(filtered_mask, __kmp_affin_fullMask);
for (int i = 0; i < num_hw_threads; ++i) {
kmp_hw_thread_t &hw_thread = hw_threads[i];
// Update type_sub_id
@ -1218,51 +1328,35 @@ bool kmp_topology_t::filter_hw_subset() {
}
}
// Collect filtering information
filtered[i] = should_be_filtered;
if (should_be_filtered)
if (should_be_filtered) {
KMP_CPU_CLR(hw_thread.os_id, filtered_mask);
num_filtered++;
}
}
// One last check that we shouldn't allow filtering entire machine
if (num_filtered == num_hw_threads) {
KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAllFiltered);
__kmp_free(filtered);
return false;
}
// Apply the filter
int new_index = 0;
for (int i = 0; i < num_hw_threads; ++i) {
if (!filtered[i]) {
if (i != new_index)
hw_threads[new_index] = hw_threads[i];
new_index++;
} else {
#if KMP_AFFINITY_SUPPORTED
KMP_CPU_CLR(hw_threads[i].os_id, __kmp_affin_fullMask);
#endif
__kmp_avail_proc--;
}
}
KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
num_hw_threads = new_index;
// Post hardware subset canonicalization
_gather_enumeration_information();
_discover_uniformity();
_set_globals();
_set_last_level_cache();
__kmp_free(filtered);
restrict_to_mask(filtered_mask);
return true;
}
bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const {
bool kmp_topology_t::is_close(int hwt1, int hwt2,
const kmp_affinity_t &stgs) const {
int hw_level = stgs.gran_levels;
if (hw_level >= depth)
return true;
bool retval = true;
const kmp_hw_thread_t &t1 = hw_threads[hwt1];
const kmp_hw_thread_t &t2 = hw_threads[hwt2];
if (stgs.flags.core_types_gran)
return t1.attrs.get_core_type() == t2.attrs.get_core_type();
if (stgs.flags.core_effs_gran)
return t1.attrs.get_core_eff() == t2.attrs.get_core_eff();
for (int i = 0; i < (depth - hw_level); ++i) {
if (t1.ids[i] != t2.ids[i])
return false;
@ -1272,8 +1366,6 @@ bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const {
////////////////////////////////////////////////////////////////////////////////
#if KMP_AFFINITY_SUPPORTED
bool KMPAffinity::picked_api = false;
void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
@ -3353,17 +3445,25 @@ restart_radix_check:
// Create and return a table of affinity masks, indexed by OS thread ID.
// This routine handles OR'ing together all the affinity masks of threads
// that are sufficiently close, if granularity > fine.
template <typename FindNextFunctionType>
static void __kmp_create_os_id_masks(unsigned *numUnique,
kmp_affinity_t &affinity) {
kmp_affinity_t &affinity,
FindNextFunctionType find_next) {
// First form a table of affinity masks in order of OS thread id.
int maxOsId;
int i;
int numAddrs = __kmp_topology->get_num_hw_threads();
int depth = __kmp_topology->get_depth();
const char *env_var = affinity.env_var;
const char *env_var = __kmp_get_affinity_env_var(affinity);
KMP_ASSERT(numAddrs);
KMP_ASSERT(depth);
i = find_next(-1);
// If could not find HW thread location with attributes, then return and
// fallback to increment find_next and disregard core attributes.
if (i >= numAddrs)
return;
maxOsId = 0;
for (i = numAddrs - 1;; --i) {
int osId = __kmp_topology->at(i).os_id;
@ -3393,19 +3493,22 @@ static void __kmp_create_os_id_masks(unsigned *numUnique,
kmp_affin_mask_t *sum;
KMP_CPU_ALLOC_ON_STACK(sum);
KMP_CPU_ZERO(sum);
KMP_CPU_SET(__kmp_topology->at(0).os_id, sum);
for (i = 1; i < numAddrs; i++) {
i = j = leader = find_next(-1);
KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
kmp_full_mask_modifier_t full_mask;
for (i = find_next(i); i < numAddrs; i = find_next(i)) {
// If this thread is sufficiently close to the leader (within the
// granularity setting), then set the bit for this os thread in the
// affinity mask for this group, and go on to the next thread.
if (__kmp_topology->is_close(leader, i, affinity.gran_levels)) {
if (__kmp_topology->is_close(leader, i, affinity)) {
KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
continue;
}
// For every thread in this group, copy the mask to the thread's entry in
// the OS Id mask table. Mark the first address as a leader.
for (; j < i; j++) {
for (; j < i; j = find_next(j)) {
int osId = __kmp_topology->at(j).os_id;
KMP_DEBUG_ASSERT(osId <= maxOsId);
kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
@ -3416,22 +3519,29 @@ static void __kmp_create_os_id_masks(unsigned *numUnique,
// Start a new mask.
leader = i;
full_mask.include(sum);
KMP_CPU_ZERO(sum);
KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
}
// For every thread in last group, copy the mask to the thread's
// entry in the OS Id mask table.
for (; j < i; j++) {
for (; j < i; j = find_next(j)) {
int osId = __kmp_topology->at(j).os_id;
KMP_DEBUG_ASSERT(osId <= maxOsId);
kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
KMP_CPU_COPY(mask, sum);
__kmp_topology->at(j).leader = (j == leader);
}
full_mask.include(sum);
unique++;
KMP_CPU_FREE_FROM_STACK(sum);
// See if the OS Id mask table further restricts or changes the full mask
if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
__kmp_topology->print(env_var);
}
*numUnique = unique;
}
@ -4134,8 +4244,11 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
}
// Create the OS proc to hardware thread map
for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread)
__kmp_osid_to_hwthread_map[__kmp_topology->at(hw_thread).os_id] = hw_thread;
for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread) {
int os_id = __kmp_topology->at(hw_thread).os_id;
if (KMP_CPU_ISSET(os_id, __kmp_affin_fullMask))
__kmp_osid_to_hwthread_map[os_id] = hw_thread;
}
for (unsigned i = 0; i < affinity.num_masks; ++i) {
kmp_affinity_ids_t &ids = affinity.ids[i];
@ -4145,16 +4258,26 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
}
}
// Called when __kmp_topology is ready
static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) {
// Initialize data dependent on __kmp_topology
if (__kmp_topology) {
machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
__kmp_affinity_get_topology_info(affinity);
}
}
// Create a one element mask array (set of places) which only contains the
// initial process's affinity mask
static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
KMP_ASSERT(__kmp_affin_fullMask != NULL);
KMP_ASSERT(affinity.type == affinity_none);
KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
affinity.num_masks = 1;
KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0);
KMP_CPU_COPY(dest, __kmp_affin_fullMask);
__kmp_affinity_get_topology_info(affinity);
__kmp_aux_affinity_initialize_other_data(affinity);
}
static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
@ -4383,13 +4506,6 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
if (verbose)
__kmp_topology->print(env_var);
bool filtered = __kmp_topology->filter_hw_subset();
if (filtered) {
#if KMP_OS_WINDOWS
// Copy filtered full mask if topology has single processor group
if (__kmp_num_proc_groups <= 1)
#endif
__kmp_affin_origMask->copy(__kmp_affin_fullMask);
}
if (filtered && verbose)
__kmp_topology->print("KMP_HW_SUBSET");
return success;
@ -4398,7 +4514,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
bool is_regular_affinity = (&affinity == &__kmp_affinity);
bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
const char *env_var = affinity.env_var;
const char *env_var = __kmp_get_affinity_env_var(affinity);
if (affinity.flags.initialized) {
KMP_ASSERT(__kmp_affin_fullMask != NULL);
@ -4437,7 +4553,36 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
// Create the table of masks, indexed by thread Id.
unsigned numUnique;
__kmp_create_os_id_masks(&numUnique, affinity);
int numAddrs = __kmp_topology->get_num_hw_threads();
// If OMP_PLACES=cores:<attribute> specified, then attempt
// to make OS Id mask table using those attributes
if (affinity.core_attr_gran.valid) {
__kmp_create_os_id_masks(&numUnique, affinity, [&](int idx) {
KMP_ASSERT(idx >= -1);
for (int i = idx + 1; i < numAddrs; ++i)
if (__kmp_topology->at(i).attrs.contains(affinity.core_attr_gran))
return i;
return numAddrs;
});
if (!affinity.os_id_masks) {
const char *core_attribute;
if (affinity.core_attr_gran.core_eff != kmp_hw_attr_t::UNKNOWN_CORE_EFF)
core_attribute = "core_efficiency";
else
core_attribute = "core_type";
KMP_AFF_WARNING(affinity, AffIgnoringNotAvailable, env_var,
core_attribute,
__kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true))
}
}
// If core attributes did not work, or none were specified,
// then make OS Id mask table using typical incremental way.
if (!affinity.os_id_masks) {
__kmp_create_os_id_masks(&numUnique, affinity, [](int idx) {
KMP_ASSERT(idx >= -1);
return idx + 1;
});
}
if (affinity.gran_levels == 0) {
KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
}
@ -4578,6 +4723,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
int i;
unsigned j;
int num_hw_threads = __kmp_topology->get_num_hw_threads();
kmp_full_mask_modifier_t full_mask;
for (i = 0, j = 0; i < num_hw_threads; i++) {
if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) {
continue;
@ -4588,11 +4734,16 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j);
KMP_ASSERT(KMP_CPU_ISSET(osId, src));
KMP_CPU_COPY(dest, src);
full_mask.include(src);
if (++j >= affinity.num_masks) {
break;
}
}
KMP_DEBUG_ASSERT(j == affinity.num_masks);
// See if the places list further restricts or changes the full mask
if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
__kmp_topology->print(env_var);
}
}
// Sort the topology back using ids
__kmp_topology->sort_ids();
@ -4601,7 +4752,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
default:
KMP_ASSERT2(0, "Unexpected affinity setting");
}
__kmp_affinity_get_topology_info(affinity);
__kmp_aux_affinity_initialize_other_data(affinity);
affinity.flags.initialized = TRUE;
}
@ -4722,7 +4873,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
affinity = &__kmp_hh_affinity;
else
affinity = &__kmp_affinity;
env_var = affinity->env_var;
env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
if ((affinity->type == affinity_none) ||

View File

@ -34,6 +34,7 @@ public:
bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
void clear(int i) override { hwloc_bitmap_clr(mask, i); }
void zero() override { hwloc_bitmap_zero(mask); }
bool empty() const override { return hwloc_bitmap_iszero(mask); }
void copy(const KMPAffinity::Mask *src) override {
const Mask *convert = static_cast<const Mask *>(src);
hwloc_bitmap_copy(mask, convert->mask);
@ -47,6 +48,10 @@ public:
hwloc_bitmap_or(mask, mask, convert->mask);
}
void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
bool is_equal(const KMPAffinity::Mask *rhs) const override {
const Mask *convert = static_cast<const Mask *>(rhs);
return hwloc_bitmap_isequal(mask, convert->mask);
}
int begin() const override { return hwloc_bitmap_first(mask); }
int end() const override { return -1; }
int next(int previous) const override {
@ -319,6 +324,13 @@ class KMPNativeAffinity : public KMPAffinity {
for (mask_size_type i = 0; i < e; ++i)
mask[i] = (mask_t)0;
}
bool empty() const override {
mask_size_type e = get_num_mask_types();
for (mask_size_type i = 0; i < e; ++i)
if (mask[i] != (mask_t)0)
return false;
return true;
}
void copy(const KMPAffinity::Mask *src) override {
const Mask *convert = static_cast<const Mask *>(src);
mask_size_type e = get_num_mask_types();
@ -342,6 +354,14 @@ class KMPNativeAffinity : public KMPAffinity {
for (mask_size_type i = 0; i < e; ++i)
mask[i] = ~(mask[i]);
}
bool is_equal(const KMPAffinity::Mask *rhs) const override {
const Mask *convert = static_cast<const Mask *>(rhs);
mask_size_type e = get_num_mask_types();
for (mask_size_type i = 0; i < e; ++i)
if (mask[i] != convert->mask[i])
return false;
return true;
}
int begin() const override {
int retval = 0;
while (retval < end() && !is_set(retval))
@ -459,6 +479,12 @@ class KMPNativeAffinity : public KMPAffinity {
for (int i = 0; i < __kmp_num_proc_groups; ++i)
mask[i] = 0;
}
bool empty() const override {
for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
if (mask[i])
return false;
return true;
}
void copy(const KMPAffinity::Mask *src) override {
const Mask *convert = static_cast<const Mask *>(src);
for (int i = 0; i < __kmp_num_proc_groups; ++i)
@ -478,6 +504,13 @@ class KMPNativeAffinity : public KMPAffinity {
for (int i = 0; i < __kmp_num_proc_groups; ++i)
mask[i] = ~(mask[i]);
}
bool is_equal(const KMPAffinity::Mask *rhs) const override {
const Mask *convert = static_cast<const Mask *>(rhs);
for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
if (mask[i] != convert->mask[i])
return false;
return true;
}
int begin() const override {
int retval = 0;
while (retval < end() && !is_set(retval))
@ -679,6 +712,21 @@ struct kmp_hw_attr_t {
}
return false;
}
#if KMP_AFFINITY_SUPPORTED
bool contains(const kmp_affinity_attrs_t &attr) const {
if (!valid && !attr.valid)
return true;
if (valid && attr.valid) {
if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN)
return (is_core_type_valid() &&
(get_core_type() == (kmp_hw_core_type_t)attr.core_type));
if (attr.core_eff != UNKNOWN_CORE_EFF)
return (is_core_eff_valid() && (get_core_eff() == attr.core_eff));
return true;
}
return false;
}
#endif // KMP_AFFINITY_SUPPORTED
bool operator==(const kmp_hw_attr_t &rhs) const {
return (rhs.valid == valid && rhs.core_eff == core_eff &&
rhs.core_type == core_type);
@ -834,13 +882,18 @@ public:
#if KMP_AFFINITY_SUPPORTED
// Set the granularity for affinity settings
void set_granularity(kmp_affinity_t &stgs) const;
#endif
bool is_close(int hwt1, int hwt2, const kmp_affinity_t &stgs) const;
bool restrict_to_mask(const kmp_affin_mask_t *mask);
bool filter_hw_subset();
bool is_close(int hwt1, int hwt2, int level) const;
#endif
bool is_uniform() const { return flags.uniform; }
// Tell whether a type is a valid type in the topology
// returns KMP_HW_UNKNOWN when there is no equivalent type
kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
kmp_hw_t get_equivalent_type(kmp_hw_t type) const {
if (type == KMP_HW_UNKNOWN)
return KMP_HW_UNKNOWN;
return equivalent[type];
}
// Set type1 = type2
void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);

View File

@ -2005,6 +2005,21 @@ static void __kmp_stg_print_foreign_threads_threadprivate(kmp_str_buf_t *buffer,
// -----------------------------------------------------------------------------
// KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD
static inline const char *
__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) {
switch (type) {
case KMP_HW_CORE_TYPE_UNKNOWN:
return "unknown";
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
case KMP_HW_CORE_TYPE_ATOM:
return "intel_atom";
case KMP_HW_CORE_TYPE_CORE:
return "intel_core";
#endif
}
return "unknown";
}
#if KMP_AFFINITY_SUPPORTED
// Parse the proc id list. Return TRUE if successful, FALSE otherwise.
static int __kmp_parse_affinity_proc_id_list(const char *var, const char *env,
@ -2359,14 +2374,32 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
buf = next;
// Try any hardware topology type for granularity
KMP_FOREACH_HW_TYPE(type) {
const char *name = __kmp_hw_get_keyword(type);
if (__kmp_match_str(name, buf, CCAST(const char **, &next))) {
set_gran(type, -1);
buf = next;
set = true;
break;
// Have to try core_type and core_efficiency matches first since "core"
// will register as core granularity with "extra chars"
if (__kmp_match_str("core_type", buf, CCAST(const char **, &next))) {
set_gran(KMP_HW_CORE, -1);
out_affinity->flags.core_types_gran = 1;
buf = next;
set = true;
} else if (__kmp_match_str("core_efficiency", buf,
CCAST(const char **, &next)) ||
__kmp_match_str("core_eff", buf,
CCAST(const char **, &next))) {
set_gran(KMP_HW_CORE, -1);
out_affinity->flags.core_effs_gran = 1;
buf = next;
set = true;
}
if (!set) {
// Try any hardware topology type for granularity
KMP_FOREACH_HW_TYPE(type) {
const char *name = __kmp_hw_get_keyword(type);
if (__kmp_match_str(name, buf, CCAST(const char **, &next))) {
set_gran(type, -1);
buf = next;
set = true;
break;
}
}
}
if (!set) {
@ -2626,8 +2659,15 @@ static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
__kmp_str_buf_print(buffer, "%s,", "noreset");
}
}
__kmp_str_buf_print(buffer, "granularity=%s,",
__kmp_hw_get_keyword(affinity.gran, false));
__kmp_str_buf_print(buffer, "granularity=");
if (affinity.flags.core_types_gran)
__kmp_str_buf_print(buffer, "core_type,");
else if (affinity.flags.core_effs_gran) {
__kmp_str_buf_print(buffer, "core_eff,");
} else {
__kmp_str_buf_print(
buffer, "%s,", __kmp_hw_get_keyword(affinity.gran, /*plural=*/false));
}
}
if (!KMP_AFFINITY_CAPABLE()) {
__kmp_str_buf_print(buffer, "%s", "disabled");
@ -2745,11 +2785,7 @@ signed := + signed
signed := - signed
-----------------------------------------------------------------------------*/
// Warning to issue for syntax error during parsing of OMP_PLACES
static inline void __kmp_omp_places_syntax_warn(const char *var) {
KMP_WARNING(SyntaxErrorUsing, var, "\"cores\"");
}
// Return TRUE if successful parse, FALSE otherwise
static int __kmp_parse_subplace_list(const char *var, const char **scan) {
const char *next;
@ -2761,7 +2797,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
//
SKIP_WS(*scan);
if ((**scan < '0') || (**scan > '9')) {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = *scan;
@ -2780,7 +2815,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
continue;
}
if (**scan != ':') {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
(*scan)++; // skip ':'
@ -2788,7 +2822,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
// Read count parameter
SKIP_WS(*scan);
if ((**scan < '0') || (**scan > '9')) {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = *scan;
@ -2807,7 +2840,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
continue;
}
if (**scan != ':') {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
(*scan)++; // skip ':'
@ -2829,7 +2861,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
}
SKIP_WS(*scan);
if ((**scan < '0') || (**scan > '9')) {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = *scan;
@ -2848,13 +2879,12 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
(*scan)++; // skip ','
continue;
}
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
return TRUE;
}
// Return TRUE if successful parse, FALSE otherwise
static int __kmp_parse_place(const char *var, const char **scan) {
const char *next;
@ -2866,7 +2896,6 @@ static int __kmp_parse_place(const char *var, const char **scan) {
return FALSE;
}
if (**scan != '}') {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
(*scan)++; // skip '}'
@ -2880,12 +2909,12 @@ static int __kmp_parse_place(const char *var, const char **scan) {
KMP_ASSERT(proc >= 0);
*scan = next;
} else {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
return TRUE;
}
// Return TRUE if successful parse, FALSE otherwise
static int __kmp_parse_place_list(const char *var, const char *env,
char **place_list) {
const char *scan = env;
@ -2908,7 +2937,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
continue;
}
if (*scan != ':') {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
scan++; // skip ':'
@ -2916,7 +2944,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
// Read count parameter
SKIP_WS(scan);
if ((*scan < '0') || (*scan > '9')) {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = scan;
@ -2935,7 +2962,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
continue;
}
if (*scan != ':') {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
scan++; // skip ':'
@ -2957,7 +2983,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
}
SKIP_WS(scan);
if ((*scan < '0') || (*scan > '9')) {
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = scan;
@ -2977,7 +3002,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
continue;
}
__kmp_omp_places_syntax_warn(var);
return FALSE;
}
@ -2991,6 +3015,22 @@ static int __kmp_parse_place_list(const char *var, const char *env,
return TRUE;
}
static inline void __kmp_places_set(enum affinity_type type, kmp_hw_t kind) {
__kmp_affinity.type = type;
__kmp_affinity.gran = kind;
__kmp_affinity.flags.dups = FALSE;
__kmp_affinity.flags.omp_places = TRUE;
}
static void __kmp_places_syntax_error_fallback(char const *name,
kmp_hw_t kind) {
const char *str = __kmp_hw_get_catalog_string(kind, /*plural=*/true);
KMP_WARNING(SyntaxErrorUsing, name, str);
__kmp_places_set(affinity_compact, kind);
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default)
__kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
}
static void __kmp_stg_parse_places(char const *name, char const *value,
void *data) {
struct kmp_place_t {
@ -3001,7 +3041,6 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
bool set = false;
const char *scan = value;
const char *next = scan;
const char *kind = "\"threads\"";
kmp_place_t std_places[] = {{"threads", KMP_HW_THREAD},
{"cores", KMP_HW_CORE},
{"numa_domains", KMP_HW_NUMA},
@ -3020,10 +3059,54 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
const kmp_place_t &place = std_places[i];
if (__kmp_match_str(place.name, scan, &next)) {
scan = next;
__kmp_affinity.type = affinity_compact;
__kmp_affinity.gran = place.type;
__kmp_affinity.flags.dups = FALSE;
__kmp_places_set(affinity_compact, place.type);
set = true;
// Parse core attribute if it exists
if (KMP_HW_MAX_NUM_CORE_TYPES > 1) {
SKIP_WS(scan);
if (*scan == ':') {
if (place.type != KMP_HW_CORE) {
__kmp_places_syntax_error_fallback(name, place.type);
return;
}
scan++; // skip ':'
SKIP_WS(scan);
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
if (__kmp_match_str("intel_core", scan, &next)) {
__kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_CORE;
__kmp_affinity.core_attr_gran.valid = 1;
scan = next;
} else if (__kmp_match_str("intel_atom", scan, &next)) {
__kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_ATOM;
__kmp_affinity.core_attr_gran.valid = 1;
scan = next;
} else
#endif
if (__kmp_match_str("eff", scan, &next)) {
int eff;
if (!isdigit(*next)) {
__kmp_places_syntax_error_fallback(name, place.type);
return;
}
scan = next;
SKIP_DIGITS(next);
eff = __kmp_str_to_int(scan, *next);
if (eff < 0) {
__kmp_places_syntax_error_fallback(name, place.type);
return;
}
if (eff >= KMP_HW_MAX_NUM_CORE_EFFS)
eff = KMP_HW_MAX_NUM_CORE_EFFS - 1;
__kmp_affinity.core_attr_gran.core_eff = eff;
__kmp_affinity.core_attr_gran.valid = 1;
scan = next;
}
if (!__kmp_affinity.core_attr_gran.valid) {
__kmp_places_syntax_error_fallback(name, place.type);
return;
}
}
}
break;
}
}
@ -3035,36 +3118,56 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
continue;
if (__kmp_match_str(name, scan, &next)) {
scan = next;
__kmp_affinity.type = affinity_compact;
__kmp_affinity.gran = type;
__kmp_affinity.flags.dups = FALSE;
__kmp_places_set(affinity_compact, type);
set = true;
break;
}
}
}
// Implementation choices for OMP_PLACES based on core attributes
if (!set) {
if (__kmp_match_str("core_types", scan, &next)) {
scan = next;
if (*scan != '\0') {
KMP_WARNING(ParseExtraCharsWarn, name, scan);
}
__kmp_places_set(affinity_compact, KMP_HW_CORE);
__kmp_affinity.flags.core_types_gran = 1;
set = true;
} else if (__kmp_match_str("core_effs", scan, &next) ||
__kmp_match_str("core_efficiencies", scan, &next)) {
scan = next;
if (*scan != '\0') {
KMP_WARNING(ParseExtraCharsWarn, name, scan);
}
__kmp_places_set(affinity_compact, KMP_HW_CORE);
__kmp_affinity.flags.core_effs_gran = 1;
set = true;
}
}
// Explicit place list
if (!set) {
if (__kmp_affinity.proclist != NULL) {
KMP_INTERNAL_FREE((void *)__kmp_affinity.proclist);
__kmp_affinity.proclist = NULL;
}
if (__kmp_parse_place_list(name, value, &__kmp_affinity.proclist)) {
__kmp_affinity.type = affinity_explicit;
__kmp_affinity.gran = KMP_HW_THREAD;
__kmp_affinity.flags.dups = FALSE;
__kmp_places_set(affinity_explicit, KMP_HW_THREAD);
} else {
// Syntax error fallback
__kmp_affinity.type = affinity_compact;
__kmp_affinity.gran = KMP_HW_CORE;
__kmp_affinity.flags.dups = FALSE;
__kmp_places_syntax_error_fallback(name, KMP_HW_CORE);
}
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
__kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
}
return;
}
kmp_hw_t gran = __kmp_affinity.gran;
if (__kmp_affinity.gran != KMP_HW_UNKNOWN) {
kind = __kmp_hw_get_keyword(__kmp_affinity.gran);
gran = __kmp_affinity.gran;
} else {
gran = KMP_HW_CORE;
}
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
@ -3078,7 +3181,7 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
// Parse option count parameter in parentheses
if (*scan != '(') {
KMP_WARNING(SyntaxErrorUsing, name, kind);
__kmp_places_syntax_error_fallback(name, gran);
return;
}
scan++; // skip '('
@ -3092,7 +3195,7 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
SKIP_WS(scan);
if (*scan != ')') {
KMP_WARNING(SyntaxErrorUsing, name, kind);
__kmp_places_syntax_error_fallback(name, gran);
return;
}
scan++; // skip ')'
@ -3135,12 +3238,37 @@ static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name,
num = 0;
}
if (gran != KMP_HW_UNKNOWN) {
const char *name = __kmp_hw_get_keyword(gran, true);
if (num > 0) {
__kmp_str_buf_print(buffer, "='%s(%d)'\n", name, num);
} else {
__kmp_str_buf_print(buffer, "='%s'\n", name);
// If core_types or core_effs, just print and return
if (__kmp_affinity.flags.core_types_gran) {
__kmp_str_buf_print(buffer, "='%s'\n", "core_types");
return;
}
if (__kmp_affinity.flags.core_effs_gran) {
__kmp_str_buf_print(buffer, "='%s'\n", "core_effs");
return;
}
// threads, cores, sockets, cores:<attribute>, etc.
const char *name = __kmp_hw_get_keyword(gran, true);
__kmp_str_buf_print(buffer, "='%s", name);
// Add core attributes if it exists
if (__kmp_affinity.core_attr_gran.valid) {
kmp_hw_core_type_t ct =
(kmp_hw_core_type_t)__kmp_affinity.core_attr_gran.core_type;
int eff = __kmp_affinity.core_attr_gran.core_eff;
if (ct != KMP_HW_CORE_TYPE_UNKNOWN) {
const char *ct_name = __kmp_hw_get_core_type_keyword(ct);
__kmp_str_buf_print(buffer, ":%s", name, ct_name);
} else if (eff >= 0 && eff < KMP_HW_MAX_NUM_CORE_EFFS) {
__kmp_str_buf_print(buffer, ":eff%d", name, eff);
}
}
// Add the '(#)' part if it exists
if (num > 0)
__kmp_str_buf_print(buffer, "(%d)", num);
__kmp_str_buf_print(buffer, "'\n");
} else {
__kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
}
@ -5139,21 +5267,6 @@ err:
return;
}
static inline const char *
__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) {
switch (type) {
case KMP_HW_CORE_TYPE_UNKNOWN:
return "unknown";
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
case KMP_HW_CORE_TYPE_ATOM:
return "intel_atom";
case KMP_HW_CORE_TYPE_CORE:
return "intel_core";
#endif
}
return "unknown";
}
static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name,
void *data) {
kmp_str_buf_t buf;

View File

@ -1242,6 +1242,7 @@ static void __kmp_atfork_child(void) {
*affinity = KMP_AFFINITY_INIT(affinity->env_var);
__kmp_affin_fullMask = nullptr;
__kmp_affin_origMask = nullptr;
__kmp_topology = nullptr;
#endif // KMP_AFFINITY_SUPPORTED
#if KMP_USE_MONITOR

View File

@ -1,7 +1,20 @@
// RUN: %libomp-compile && env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck %s
// CHECK-DAG: Effective settings
// CHECK: OMP_PLACES=
// CHECK-SAME: cores
// RUN: %libomp-compile
// RUN: env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck --check-prefix=INVALID %s
// RUN: env KMP_SETTINGS=1 OMP_PLACES='sockets(' %libomp-run 2>&1 | FileCheck --check-prefix=SOCKETS %s
// RUN: env KMP_SETTINGS=1 OMP_PLACES='threads()' %libomp-run 2>&1 | FileCheck --check-prefix=THREADS %s
//
// INVALID-DAG: Effective settings
// INVALID: OMP_PLACES=
// INVALID-SAME: cores
//
// SOCKETS-DAG: Effective settings
// SOCKETS: OMP_PLACES=
// SOCKETS-SAME: sockets
//
// THREADS-DAG: Effective settings
// THREADS: OMP_PLACES=
// THREADS-SAME: threads
//
// REQUIRES: affinity
#include "omp_testsuite.h"