[OpenMP] Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY

* Add KMP_CPU_EQUAL and KMP_CPU_ISEMPTY to affinity mask API * Add printout of leader to hardware thread dump * Allow OMP_PLACES to restrict fullMask This change fixes an issue with the OMP_PLACES=resource(#) syntax. Before this change, specifying the number of resources did NOT change the default number of threads created by the runtime. e.g., OMP_PLACES=cores(2) would still create __kmp_avail_proc number of threads. After this change, the fullMask and __kmp_avail_proc are modified if necessary so that the final place list dictates which resources are available and how thus, how many threads are created by default. * Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY For OMP_PLACES, two new features are added: 1) OMP_PLACES=cores:<attribute> where <attribute> is either intel_atom, intel_core, or eff# where # is 0 - number of core efficiencies-1. This syntax also supports the optional (#) number selection of resources. 2) OMP_PLACES=core_types|core_effs where this setting will create the number of core_types (or core_effs|core_efficiencies). For KMP_AFFINITY, the granularity setting is expanded to include two new keywords: core_type, and core_eff (or core_efficiency). This will set the granularity to include all cores with a particular core type (or efficiency). e.g., KMP_AFFINITY=granularity=core_type,compact will create threads which can float across a single core type. Differential Revision: https://reviews.llvm.org/D154547
2025-04-17 04:56:36 +00:00 · 2023-07-05 12:35:57 -05:00 · 2023-07-05 12:35:57 -05:00 · b34c7d8c8e
commit b34c7d8c8e
parent 4e429fd2a7
7 changed files with 473 additions and 128 deletions
--- a/openmp/runtime/src/i18n/en_US.txt
+++ b/openmp/runtime/src/i18n/en_US.txt
@ -480,6 +480,8 @@ AffHWSubsetAllFiltered       "KMP_HW_SUBSET ignored: all hardware resources woul
 AffHWSubsetAttrsNonHybrid    "KMP_HW_SUBSET ignored: Too many attributes specified. This machine is not a hybrid architecutre."
 AffHWSubsetIgnoringAttr      "KMP_HW_SUBSET: ignoring %1$s attribute. This machine is not a hybrid architecutre."
 TargetMemNotAvailable        "Target memory not available, will use default allocator."
+AffIgnoringNonHybrid         "%1$s ignored: This machine is not a hybrid architecutre. Using \"%2$s\" instead."
+AffIgnoringNotAvailable      "%1$s ignored: %2$s is not available. Using \"%3$s\" instead."

 # --------------------------------------------------------------------------------------------------
 -*- HINTS -*-
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@ -690,10 +690,12 @@ extern size_t __kmp_affin_mask_size;
 #define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
 #define KMP_CPU_CLR(i, mask) (mask)->clear(i)
 #define KMP_CPU_ZERO(mask) (mask)->zero()
+#define KMP_CPU_ISEMPTY(mask) (mask)->empty()
 #define KMP_CPU_COPY(dest, src) (dest)->copy(src)
 #define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
 #define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
 #define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
+#define KMP_CPU_EQUAL(dest, src) (dest)->is_equal(src)
 #define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
 #define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
 #define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
@ -730,6 +732,8 @@ public:
    virtual void clear(int i) {}
    // Zero out entire mask
    virtual void zero() {}
+    // Check whether mask is empty
+    virtual bool empty() const { return true; }
    // Copy src into this mask
    virtual void copy(const Mask *src) {}
    // this &= rhs
@ -738,6 +742,8 @@ public:
    virtual void bitwise_or(const Mask *rhs) {}
    // this = ~this
    virtual void bitwise_not() {}
+    // this == rhs
+    virtual bool is_equal(const Mask *rhs) const { return false; }
    // API for iterating over an affinity mask
    // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
    virtual int begin() const { return 0; }
@ -866,7 +872,10 @@ typedef struct kmp_affinity_flags_t {
  unsigned respect : 2;
  unsigned reset : 1;
  unsigned initialized : 1;
-  unsigned reserved : 25;
+  unsigned core_types_gran : 1;
+  unsigned core_effs_gran : 1;
+  unsigned omp_places : 1;
+  unsigned reserved : 22;
 } kmp_affinity_flags_t;
 KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);

@ -895,6 +904,7 @@ typedef struct kmp_affinity_t {
  enum affinity_type type;
  kmp_hw_t gran;
  int gran_levels;
+  kmp_affinity_attrs_t core_attr_gran;
  int compact;
  int offset;
  kmp_affinity_flags_t flags;
@ -909,9 +919,11 @@ typedef struct kmp_affinity_t {

 #define KMP_AFFINITY_INIT(env)                                                 \
  {                                                                            \
-    nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0,                       \
-        {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0,   \
-        nullptr, nullptr, nullptr, 0, nullptr, env                             \
+    nullptr, affinity_default, KMP_HW_UNKNOWN, -1, KMP_AFFINITY_ATTRS_UNKNOWN, \
+        0, 0,                                                                  \
+        {TRUE,  FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE,      \
+         FALSE, FALSE, FALSE},                                                 \
+        0, nullptr, nullptr, nullptr, 0, nullptr, env                          \
  }

 extern enum affinity_top_method __kmp_affinity_top_method;
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@ -38,6 +38,43 @@ static hierarchy_info machine_hierarchy;

 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }

+#if KMP_AFFINITY_SUPPORTED
+// Helper class to see if place lists further restrict the fullMask
+class kmp_full_mask_modifier_t {
+  kmp_affin_mask_t *mask;
+
+public:
+  kmp_full_mask_modifier_t() {
+    KMP_CPU_ALLOC(mask);
+    KMP_CPU_ZERO(mask);
+  }
+  ~kmp_full_mask_modifier_t() {
+    KMP_CPU_FREE(mask);
+    mask = nullptr;
+  }
+  void include(const kmp_affin_mask_t *other) { KMP_CPU_UNION(mask, other); }
+  // If the new full mask is different from the current full mask,
+  // then switch them. Returns true if full mask was affected, false otherwise.
+  bool restrict_to_mask() {
+    // See if the new mask further restricts or changes the full mask
+    if (KMP_CPU_EQUAL(__kmp_affin_fullMask, mask) || KMP_CPU_ISEMPTY(mask))
+      return false;
+    return __kmp_topology->restrict_to_mask(mask);
+  }
+};
+
+static inline const char *
+__kmp_get_affinity_env_var(const kmp_affinity_t &affinity,
+                           bool for_binding = false) {
+  if (affinity.flags.omp_places) {
+    if (for_binding)
+      return "OMP_PROC_BIND";
+    return "OMP_PLACES";
+  }
+  return affinity.env_var;
+}
+#endif // KMP_AFFINITY_SUPPORTED
+
 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
  kmp_uint32 depth;
  // The test below is true if affinity is available, but set to "none". Need to
@ -207,6 +244,8 @@ void kmp_hw_thread_t::print() const {
    if (attrs.is_core_eff_valid())
      printf(" (eff=%d)", attrs.get_core_eff());
  }
+  if (leader)
+    printf(" (leader)");
  printf("\n");
 }

@ -797,7 +836,40 @@ void kmp_topology_t::print(const char *env_var) const {

 #if KMP_AFFINITY_SUPPORTED
 void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const {
-  const char *env_var = affinity.env_var;
+  const char *env_var = __kmp_get_affinity_env_var(affinity);
+  // If requested hybrid CPU attributes for granularity (either OMP_PLACES or
+  // KMP_AFFINITY), but none exist, then reset granularity and have below method
+  // select a granularity and warn user.
+  if (!__kmp_is_hybrid_cpu()) {
+    if (affinity.core_attr_gran.valid) {
+      // OMP_PLACES with cores:<attribute> but non-hybrid arch, use cores
+      // instead
+      KMP_AFF_WARNING(
+          affinity, AffIgnoringNonHybrid, env_var,
+          __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
+      affinity.gran = KMP_HW_CORE;
+      affinity.gran_levels = -1;
+      affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
+      affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
+    } else if (affinity.flags.core_types_gran ||
+               affinity.flags.core_effs_gran) {
+      // OMP_PLACES=core_types|core_effs but non-hybrid, use cores instead
+      if (affinity.flags.omp_places) {
+        KMP_AFF_WARNING(
+            affinity, AffIgnoringNonHybrid, env_var,
+            __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
+      } else {
+        // KMP_AFFINITY=granularity=core_type|core_eff,...
+        KMP_AFF_WARNING(affinity, AffGranularityBad, env_var,
+                        "Intel(R) Hybrid Technology core attribute",
+                        __kmp_hw_get_catalog_string(KMP_HW_CORE));
+      }
+      affinity.gran = KMP_HW_CORE;
+      affinity.gran_levels = -1;
+      affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
+      affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
+    }
+  }
  // Set the number of affinity granularity levels
  if (affinity.gran_levels < 0) {
    kmp_hw_t gran_type = get_equivalent_type(affinity.gran);
@ -937,6 +1009,7 @@ public:
  }
 };

+#if KMP_AFFINITY_SUPPORTED
 static kmp_str_buf_t *
 __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
                                 bool plural) {
@ -952,6 +1025,41 @@ __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
  return buf;
 }

+bool kmp_topology_t::restrict_to_mask(const kmp_affin_mask_t *mask) {
+  // Apply the filter
+  bool affected;
+  int new_index = 0;
+  for (int i = 0; i < num_hw_threads; ++i) {
+    int os_id = hw_threads[i].os_id;
+    if (KMP_CPU_ISSET(os_id, mask)) {
+      if (i != new_index)
+        hw_threads[new_index] = hw_threads[i];
+      new_index++;
+    } else {
+      KMP_CPU_CLR(os_id, __kmp_affin_fullMask);
+      __kmp_avail_proc--;
+    }
+  }
+
+  KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
+  affected = (num_hw_threads != new_index);
+  num_hw_threads = new_index;
+
+  // Post hardware subset canonicalization
+  if (affected) {
+    _gather_enumeration_information();
+    _discover_uniformity();
+    _set_globals();
+    _set_last_level_cache();
+#if KMP_OS_WINDOWS
+    // Copy filtered full mask if topology has single processor group
+    if (__kmp_num_proc_groups <= 1)
+#endif
+      __kmp_affin_origMask->copy(__kmp_affin_fullMask);
+  }
+  return affected;
+}
+
 // Apply the KMP_HW_SUBSET envirable to the topology
 // Returns true if KMP_HW_SUBSET filtered any processors
 // otherwise, returns false
@ -1156,7 +1264,9 @@ bool kmp_topology_t::filter_hw_subset() {

  // Determine which hardware threads should be filtered.
  int num_filtered = 0;
-  bool *filtered = (bool *)__kmp_allocate(sizeof(bool) * num_hw_threads);
+  kmp_affin_mask_t *filtered_mask;
+  KMP_CPU_ALLOC(filtered_mask);
+  KMP_CPU_COPY(filtered_mask, __kmp_affin_fullMask);
  for (int i = 0; i < num_hw_threads; ++i) {
    kmp_hw_thread_t &hw_thread = hw_threads[i];
    // Update type_sub_id
@ -1218,51 +1328,35 @@ bool kmp_topology_t::filter_hw_subset() {
      }
    }
    // Collect filtering information
-    filtered[i] = should_be_filtered;
-    if (should_be_filtered)
+    if (should_be_filtered) {
+      KMP_CPU_CLR(hw_thread.os_id, filtered_mask);
      num_filtered++;
+    }
  }

  // One last check that we shouldn't allow filtering entire machine
  if (num_filtered == num_hw_threads) {
    KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAllFiltered);
-    __kmp_free(filtered);
    return false;
  }

  // Apply the filter
-  int new_index = 0;
-  for (int i = 0; i < num_hw_threads; ++i) {
-    if (!filtered[i]) {
-      if (i != new_index)
-        hw_threads[new_index] = hw_threads[i];
-      new_index++;
-    } else {
-#if KMP_AFFINITY_SUPPORTED
-      KMP_CPU_CLR(hw_threads[i].os_id, __kmp_affin_fullMask);
-#endif
-      __kmp_avail_proc--;
-    }
-  }
-
-  KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
-  num_hw_threads = new_index;
-
-  // Post hardware subset canonicalization
-  _gather_enumeration_information();
-  _discover_uniformity();
-  _set_globals();
-  _set_last_level_cache();
-  __kmp_free(filtered);
+  restrict_to_mask(filtered_mask);
  return true;
 }

-bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const {
+bool kmp_topology_t::is_close(int hwt1, int hwt2,
+                              const kmp_affinity_t &stgs) const {
+  int hw_level = stgs.gran_levels;
  if (hw_level >= depth)
    return true;
  bool retval = true;
  const kmp_hw_thread_t &t1 = hw_threads[hwt1];
  const kmp_hw_thread_t &t2 = hw_threads[hwt2];
+  if (stgs.flags.core_types_gran)
+    return t1.attrs.get_core_type() == t2.attrs.get_core_type();
+  if (stgs.flags.core_effs_gran)
+    return t1.attrs.get_core_eff() == t2.attrs.get_core_eff();
  for (int i = 0; i < (depth - hw_level); ++i) {
    if (t1.ids[i] != t2.ids[i])
      return false;
@ -1272,8 +1366,6 @@ bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const {

 ////////////////////////////////////////////////////////////////////////////////

-#if KMP_AFFINITY_SUPPORTED
-
 bool KMPAffinity::picked_api = false;

 void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
@ -3353,17 +3445,25 @@ restart_radix_check:
 // Create and return a table of affinity masks, indexed by OS thread ID.
 // This routine handles OR'ing together all the affinity masks of threads
 // that are sufficiently close, if granularity > fine.
+template <typename FindNextFunctionType>
 static void __kmp_create_os_id_masks(unsigned *numUnique,
-                                     kmp_affinity_t &affinity) {
+                                     kmp_affinity_t &affinity,
+                                     FindNextFunctionType find_next) {
  // First form a table of affinity masks in order of OS thread id.
  int maxOsId;
  int i;
  int numAddrs = __kmp_topology->get_num_hw_threads();
  int depth = __kmp_topology->get_depth();
-  const char *env_var = affinity.env_var;
+  const char *env_var = __kmp_get_affinity_env_var(affinity);
  KMP_ASSERT(numAddrs);
  KMP_ASSERT(depth);

+  i = find_next(-1);
+  // If could not find HW thread location with attributes, then return and
+  // fallback to increment find_next and disregard core attributes.
+  if (i >= numAddrs)
+    return;
+
  maxOsId = 0;
  for (i = numAddrs - 1;; --i) {
    int osId = __kmp_topology->at(i).os_id;
@ -3393,19 +3493,22 @@ static void __kmp_create_os_id_masks(unsigned *numUnique,
  kmp_affin_mask_t *sum;
  KMP_CPU_ALLOC_ON_STACK(sum);
  KMP_CPU_ZERO(sum);
-  KMP_CPU_SET(__kmp_topology->at(0).os_id, sum);
-  for (i = 1; i < numAddrs; i++) {
+
+  i = j = leader = find_next(-1);
+  KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
+  kmp_full_mask_modifier_t full_mask;
+  for (i = find_next(i); i < numAddrs; i = find_next(i)) {
    // If this thread is sufficiently close to the leader (within the
    // granularity setting), then set the bit for this os thread in the
    // affinity mask for this group, and go on to the next thread.
-    if (__kmp_topology->is_close(leader, i, affinity.gran_levels)) {
+    if (__kmp_topology->is_close(leader, i, affinity)) {
      KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
      continue;
    }

    // For every thread in this group, copy the mask to the thread's entry in
    // the OS Id mask table. Mark the first address as a leader.
-    for (; j < i; j++) {
+    for (; j < i; j = find_next(j)) {
      int osId = __kmp_topology->at(j).os_id;
      KMP_DEBUG_ASSERT(osId <= maxOsId);
      kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
@ -3416,22 +3519,29 @@ static void __kmp_create_os_id_masks(unsigned *numUnique,

    // Start a new mask.
    leader = i;
+    full_mask.include(sum);
    KMP_CPU_ZERO(sum);
    KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
  }

  // For every thread in last group, copy the mask to the thread's
  // entry in the OS Id mask table.
-  for (; j < i; j++) {
+  for (; j < i; j = find_next(j)) {
    int osId = __kmp_topology->at(j).os_id;
    KMP_DEBUG_ASSERT(osId <= maxOsId);
    kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
    KMP_CPU_COPY(mask, sum);
    __kmp_topology->at(j).leader = (j == leader);
  }
+  full_mask.include(sum);
  unique++;
  KMP_CPU_FREE_FROM_STACK(sum);

+  // See if the OS Id mask table further restricts or changes the full mask
+  if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
+    __kmp_topology->print(env_var);
+  }
+
  *numUnique = unique;
 }

@ -4134,8 +4244,11 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
  }

  // Create the OS proc to hardware thread map
-  for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread)
-    __kmp_osid_to_hwthread_map[__kmp_topology->at(hw_thread).os_id] = hw_thread;
+  for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread) {
+    int os_id = __kmp_topology->at(hw_thread).os_id;
+    if (KMP_CPU_ISSET(os_id, __kmp_affin_fullMask))
+      __kmp_osid_to_hwthread_map[os_id] = hw_thread;
+  }

  for (unsigned i = 0; i < affinity.num_masks; ++i) {
    kmp_affinity_ids_t &ids = affinity.ids[i];
@ -4145,16 +4258,26 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
  }
 }

+// Called when __kmp_topology is ready
+static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) {
+  // Initialize data dependent on __kmp_topology
+  if (__kmp_topology) {
+    machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
+    __kmp_affinity_get_topology_info(affinity);
+  }
+}
+
 // Create a one element mask array (set of places) which only contains the
 // initial process's affinity mask
 static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
  KMP_ASSERT(__kmp_affin_fullMask != NULL);
  KMP_ASSERT(affinity.type == affinity_none);
+  KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
  affinity.num_masks = 1;
  KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
  kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0);
  KMP_CPU_COPY(dest, __kmp_affin_fullMask);
-  __kmp_affinity_get_topology_info(affinity);
+  __kmp_aux_affinity_initialize_other_data(affinity);
 }

 static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
@ -4383,13 +4506,6 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
  if (verbose)
    __kmp_topology->print(env_var);
  bool filtered = __kmp_topology->filter_hw_subset();
-  if (filtered) {
-#if KMP_OS_WINDOWS
-    // Copy filtered full mask if topology has single processor group
-    if (__kmp_num_proc_groups <= 1)
-#endif
-      __kmp_affin_origMask->copy(__kmp_affin_fullMask);
-  }
  if (filtered && verbose)
    __kmp_topology->print("KMP_HW_SUBSET");
  return success;
@ -4398,7 +4514,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
 static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
  bool is_regular_affinity = (&affinity == &__kmp_affinity);
  bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
-  const char *env_var = affinity.env_var;
+  const char *env_var = __kmp_get_affinity_env_var(affinity);

  if (affinity.flags.initialized) {
    KMP_ASSERT(__kmp_affin_fullMask != NULL);
@ -4437,7 +4553,36 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {

  // Create the table of masks, indexed by thread Id.
  unsigned numUnique;
-  __kmp_create_os_id_masks(&numUnique, affinity);
+  int numAddrs = __kmp_topology->get_num_hw_threads();
+  // If OMP_PLACES=cores:<attribute> specified, then attempt
+  // to make OS Id mask table using those attributes
+  if (affinity.core_attr_gran.valid) {
+    __kmp_create_os_id_masks(&numUnique, affinity, [&](int idx) {
+      KMP_ASSERT(idx >= -1);
+      for (int i = idx + 1; i < numAddrs; ++i)
+        if (__kmp_topology->at(i).attrs.contains(affinity.core_attr_gran))
+          return i;
+      return numAddrs;
+    });
+    if (!affinity.os_id_masks) {
+      const char *core_attribute;
+      if (affinity.core_attr_gran.core_eff != kmp_hw_attr_t::UNKNOWN_CORE_EFF)
+        core_attribute = "core_efficiency";
+      else
+        core_attribute = "core_type";
+      KMP_AFF_WARNING(affinity, AffIgnoringNotAvailable, env_var,
+                      core_attribute,
+                      __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true))
+    }
+  }
+  // If core attributes did not work, or none were specified,
+  // then make OS Id mask table using typical incremental way.
+  if (!affinity.os_id_masks) {
+    __kmp_create_os_id_masks(&numUnique, affinity, [](int idx) {
+      KMP_ASSERT(idx >= -1);
+      return idx + 1;
+    });
+  }
  if (affinity.gran_levels == 0) {
    KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
  }
@ -4578,6 +4723,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
      int i;
      unsigned j;
      int num_hw_threads = __kmp_topology->get_num_hw_threads();
+      kmp_full_mask_modifier_t full_mask;
      for (i = 0, j = 0; i < num_hw_threads; i++) {
        if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) {
          continue;
@ -4588,11 +4734,16 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
        kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j);
        KMP_ASSERT(KMP_CPU_ISSET(osId, src));
        KMP_CPU_COPY(dest, src);
+        full_mask.include(src);
        if (++j >= affinity.num_masks) {
          break;
        }
      }
      KMP_DEBUG_ASSERT(j == affinity.num_masks);
+      // See if the places list further restricts or changes the full mask
+      if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
+        __kmp_topology->print(env_var);
+      }
    }
    // Sort the topology back using ids
    __kmp_topology->sort_ids();
@ -4601,7 +4752,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
  default:
    KMP_ASSERT2(0, "Unexpected affinity setting");
  }
-  __kmp_affinity_get_topology_info(affinity);
+  __kmp_aux_affinity_initialize_other_data(affinity);
  affinity.flags.initialized = TRUE;
 }

@ -4722,7 +4873,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
    affinity = &__kmp_hh_affinity;
  else
    affinity = &__kmp_affinity;
-  env_var = affinity->env_var;
+  env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);

  if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
    if ((affinity->type == affinity_none) ||
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@ -34,6 +34,7 @@ public:
    bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
    void clear(int i) override { hwloc_bitmap_clr(mask, i); }
    void zero() override { hwloc_bitmap_zero(mask); }
+    bool empty() const override { return hwloc_bitmap_iszero(mask); }
    void copy(const KMPAffinity::Mask *src) override {
      const Mask *convert = static_cast<const Mask *>(src);
      hwloc_bitmap_copy(mask, convert->mask);
@ -47,6 +48,10 @@ public:
      hwloc_bitmap_or(mask, mask, convert->mask);
    }
    void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
+    bool is_equal(const KMPAffinity::Mask *rhs) const override {
+      const Mask *convert = static_cast<const Mask *>(rhs);
+      return hwloc_bitmap_isequal(mask, convert->mask);
+    }
    int begin() const override { return hwloc_bitmap_first(mask); }
    int end() const override { return -1; }
    int next(int previous) const override {
@ -319,6 +324,13 @@ class KMPNativeAffinity : public KMPAffinity {
      for (mask_size_type i = 0; i < e; ++i)
        mask[i] = (mask_t)0;
    }
+    bool empty() const override {
+      mask_size_type e = get_num_mask_types();
+      for (mask_size_type i = 0; i < e; ++i)
+        if (mask[i] != (mask_t)0)
+          return false;
+      return true;
+    }
    void copy(const KMPAffinity::Mask *src) override {
      const Mask *convert = static_cast<const Mask *>(src);
      mask_size_type e = get_num_mask_types();
@ -342,6 +354,14 @@ class KMPNativeAffinity : public KMPAffinity {
      for (mask_size_type i = 0; i < e; ++i)
        mask[i] = ~(mask[i]);
    }
+    bool is_equal(const KMPAffinity::Mask *rhs) const override {
+      const Mask *convert = static_cast<const Mask *>(rhs);
+      mask_size_type e = get_num_mask_types();
+      for (mask_size_type i = 0; i < e; ++i)
+        if (mask[i] != convert->mask[i])
+          return false;
+      return true;
+    }
    int begin() const override {
      int retval = 0;
      while (retval < end() && !is_set(retval))
@ -459,6 +479,12 @@ class KMPNativeAffinity : public KMPAffinity {
      for (int i = 0; i < __kmp_num_proc_groups; ++i)
        mask[i] = 0;
    }
+    bool empty() const override {
+      for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+        if (mask[i])
+          return false;
+      return true;
+    }
    void copy(const KMPAffinity::Mask *src) override {
      const Mask *convert = static_cast<const Mask *>(src);
      for (int i = 0; i < __kmp_num_proc_groups; ++i)
@ -478,6 +504,13 @@ class KMPNativeAffinity : public KMPAffinity {
      for (int i = 0; i < __kmp_num_proc_groups; ++i)
        mask[i] = ~(mask[i]);
    }
+    bool is_equal(const KMPAffinity::Mask *rhs) const override {
+      const Mask *convert = static_cast<const Mask *>(rhs);
+      for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+        if (mask[i] != convert->mask[i])
+          return false;
+      return true;
+    }
    int begin() const override {
      int retval = 0;
      while (retval < end() && !is_set(retval))
@ -679,6 +712,21 @@ struct kmp_hw_attr_t {
    }
    return false;
  }
+#if KMP_AFFINITY_SUPPORTED
+  bool contains(const kmp_affinity_attrs_t &attr) const {
+    if (!valid && !attr.valid)
+      return true;
+    if (valid && attr.valid) {
+      if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN)
+        return (is_core_type_valid() &&
+                (get_core_type() == (kmp_hw_core_type_t)attr.core_type));
+      if (attr.core_eff != UNKNOWN_CORE_EFF)
+        return (is_core_eff_valid() && (get_core_eff() == attr.core_eff));
+      return true;
+    }
+    return false;
+  }
+#endif // KMP_AFFINITY_SUPPORTED
  bool operator==(const kmp_hw_attr_t &rhs) const {
    return (rhs.valid == valid && rhs.core_eff == core_eff &&
            rhs.core_type == core_type);
@ -834,13 +882,18 @@ public:
 #if KMP_AFFINITY_SUPPORTED
  // Set the granularity for affinity settings
  void set_granularity(kmp_affinity_t &stgs) const;
-#endif
+  bool is_close(int hwt1, int hwt2, const kmp_affinity_t &stgs) const;
+  bool restrict_to_mask(const kmp_affin_mask_t *mask);
  bool filter_hw_subset();
-  bool is_close(int hwt1, int hwt2, int level) const;
+#endif
  bool is_uniform() const { return flags.uniform; }
  // Tell whether a type is a valid type in the topology
  // returns KMP_HW_UNKNOWN when there is no equivalent type
-  kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
+  kmp_hw_t get_equivalent_type(kmp_hw_t type) const {
+    if (type == KMP_HW_UNKNOWN)
+      return KMP_HW_UNKNOWN;
+    return equivalent[type];
+  }
  // Set type1 = type2
  void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
    KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@ -2005,6 +2005,21 @@ static void __kmp_stg_print_foreign_threads_threadprivate(kmp_str_buf_t *buffer,
 // -----------------------------------------------------------------------------
 // KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD

+static inline const char *
+__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) {
+  switch (type) {
+  case KMP_HW_CORE_TYPE_UNKNOWN:
+    return "unknown";
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+  case KMP_HW_CORE_TYPE_ATOM:
+    return "intel_atom";
+  case KMP_HW_CORE_TYPE_CORE:
+    return "intel_core";
+#endif
+  }
+  return "unknown";
+}
+
 #if KMP_AFFINITY_SUPPORTED
 // Parse the proc id list.  Return TRUE if successful, FALSE otherwise.
 static int __kmp_parse_affinity_proc_id_list(const char *var, const char *env,
@ -2359,14 +2374,32 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,

      buf = next;

-      // Try any hardware topology type for granularity
-      KMP_FOREACH_HW_TYPE(type) {
-        const char *name = __kmp_hw_get_keyword(type);
-        if (__kmp_match_str(name, buf, CCAST(const char **, &next))) {
-          set_gran(type, -1);
-          buf = next;
-          set = true;
-          break;
+      // Have to try core_type and core_efficiency matches first since "core"
+      // will register as core granularity with "extra chars"
+      if (__kmp_match_str("core_type", buf, CCAST(const char **, &next))) {
+        set_gran(KMP_HW_CORE, -1);
+        out_affinity->flags.core_types_gran = 1;
+        buf = next;
+        set = true;
+      } else if (__kmp_match_str("core_efficiency", buf,
+                                 CCAST(const char **, &next)) ||
+                 __kmp_match_str("core_eff", buf,
+                                 CCAST(const char **, &next))) {
+        set_gran(KMP_HW_CORE, -1);
+        out_affinity->flags.core_effs_gran = 1;
+        buf = next;
+        set = true;
+      }
+      if (!set) {
+        // Try any hardware topology type for granularity
+        KMP_FOREACH_HW_TYPE(type) {
+          const char *name = __kmp_hw_get_keyword(type);
+          if (__kmp_match_str(name, buf, CCAST(const char **, &next))) {
+            set_gran(type, -1);
+            buf = next;
+            set = true;
+            break;
+          }
        }
      }
      if (!set) {
@ -2626,8 +2659,15 @@ static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
        __kmp_str_buf_print(buffer, "%s,", "noreset");
      }
    }
-    __kmp_str_buf_print(buffer, "granularity=%s,",
-                        __kmp_hw_get_keyword(affinity.gran, false));
+    __kmp_str_buf_print(buffer, "granularity=");
+    if (affinity.flags.core_types_gran)
+      __kmp_str_buf_print(buffer, "core_type,");
+    else if (affinity.flags.core_effs_gran) {
+      __kmp_str_buf_print(buffer, "core_eff,");
+    } else {
+      __kmp_str_buf_print(
+          buffer, "%s,", __kmp_hw_get_keyword(affinity.gran, /*plural=*/false));
+    }
  }
  if (!KMP_AFFINITY_CAPABLE()) {
    __kmp_str_buf_print(buffer, "%s", "disabled");
@ -2745,11 +2785,7 @@ signed := + signed
 signed := - signed
 -----------------------------------------------------------------------------*/

-// Warning to issue for syntax error during parsing of OMP_PLACES
-static inline void __kmp_omp_places_syntax_warn(const char *var) {
-  KMP_WARNING(SyntaxErrorUsing, var, "\"cores\"");
-}
-
+// Return TRUE if successful parse, FALSE otherwise
 static int __kmp_parse_subplace_list(const char *var, const char **scan) {
  const char *next;

@ -2761,7 +2797,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
    //
    SKIP_WS(*scan);
    if ((**scan < '0') || (**scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    next = *scan;
@ -2780,7 +2815,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
      continue;
    }
    if (**scan != ':') {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    (*scan)++; // skip ':'
@ -2788,7 +2822,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
    // Read count parameter
    SKIP_WS(*scan);
    if ((**scan < '0') || (**scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    next = *scan;
@ -2807,7 +2840,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
      continue;
    }
    if (**scan != ':') {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    (*scan)++; // skip ':'
@ -2829,7 +2861,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
    }
    SKIP_WS(*scan);
    if ((**scan < '0') || (**scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    next = *scan;
@ -2848,13 +2879,12 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
      (*scan)++; // skip ','
      continue;
    }
-
-    __kmp_omp_places_syntax_warn(var);
    return FALSE;
  }
  return TRUE;
 }

+// Return TRUE if successful parse, FALSE otherwise
 static int __kmp_parse_place(const char *var, const char **scan) {
  const char *next;

@ -2866,7 +2896,6 @@ static int __kmp_parse_place(const char *var, const char **scan) {
      return FALSE;
    }
    if (**scan != '}') {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    (*scan)++; // skip '}'
@ -2880,12 +2909,12 @@ static int __kmp_parse_place(const char *var, const char **scan) {
    KMP_ASSERT(proc >= 0);
    *scan = next;
  } else {
-    __kmp_omp_places_syntax_warn(var);
    return FALSE;
  }
  return TRUE;
 }

+// Return TRUE if successful parse, FALSE otherwise
 static int __kmp_parse_place_list(const char *var, const char *env,
                                  char **place_list) {
  const char *scan = env;
@ -2908,7 +2937,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
      continue;
    }
    if (*scan != ':') {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    scan++; // skip ':'
@ -2916,7 +2944,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
    // Read count parameter
    SKIP_WS(scan);
    if ((*scan < '0') || (*scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    next = scan;
@ -2935,7 +2962,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
      continue;
    }
    if (*scan != ':') {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    scan++; // skip ':'
@ -2957,7 +2983,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
    }
    SKIP_WS(scan);
    if ((*scan < '0') || (*scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
      return FALSE;
    }
    next = scan;
@ -2977,7 +3002,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
      continue;
    }

-    __kmp_omp_places_syntax_warn(var);
    return FALSE;
  }

@ -2991,6 +3015,22 @@ static int __kmp_parse_place_list(const char *var, const char *env,
  return TRUE;
 }

+static inline void __kmp_places_set(enum affinity_type type, kmp_hw_t kind) {
+  __kmp_affinity.type = type;
+  __kmp_affinity.gran = kind;
+  __kmp_affinity.flags.dups = FALSE;
+  __kmp_affinity.flags.omp_places = TRUE;
+}
+
+static void __kmp_places_syntax_error_fallback(char const *name,
+                                               kmp_hw_t kind) {
+  const char *str = __kmp_hw_get_catalog_string(kind, /*plural=*/true);
+  KMP_WARNING(SyntaxErrorUsing, name, str);
+  __kmp_places_set(affinity_compact, kind);
+  if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default)
+    __kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
+}
+
 static void __kmp_stg_parse_places(char const *name, char const *value,
                                   void *data) {
  struct kmp_place_t {
@ -3001,7 +3041,6 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
  bool set = false;
  const char *scan = value;
  const char *next = scan;
-  const char *kind = "\"threads\"";
  kmp_place_t std_places[] = {{"threads", KMP_HW_THREAD},
                              {"cores", KMP_HW_CORE},
                              {"numa_domains", KMP_HW_NUMA},
@ -3020,10 +3059,54 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
    const kmp_place_t &place = std_places[i];
    if (__kmp_match_str(place.name, scan, &next)) {
      scan = next;
-      __kmp_affinity.type = affinity_compact;
-      __kmp_affinity.gran = place.type;
-      __kmp_affinity.flags.dups = FALSE;
+      __kmp_places_set(affinity_compact, place.type);
      set = true;
+      // Parse core attribute if it exists
+      if (KMP_HW_MAX_NUM_CORE_TYPES > 1) {
+        SKIP_WS(scan);
+        if (*scan == ':') {
+          if (place.type != KMP_HW_CORE) {
+            __kmp_places_syntax_error_fallback(name, place.type);
+            return;
+          }
+          scan++; // skip ':'
+          SKIP_WS(scan);
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+          if (__kmp_match_str("intel_core", scan, &next)) {
+            __kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_CORE;
+            __kmp_affinity.core_attr_gran.valid = 1;
+            scan = next;
+          } else if (__kmp_match_str("intel_atom", scan, &next)) {
+            __kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_ATOM;
+            __kmp_affinity.core_attr_gran.valid = 1;
+            scan = next;
+          } else
+#endif
+              if (__kmp_match_str("eff", scan, &next)) {
+            int eff;
+            if (!isdigit(*next)) {
+              __kmp_places_syntax_error_fallback(name, place.type);
+              return;
+            }
+            scan = next;
+            SKIP_DIGITS(next);
+            eff = __kmp_str_to_int(scan, *next);
+            if (eff < 0) {
+              __kmp_places_syntax_error_fallback(name, place.type);
+              return;
+            }
+            if (eff >= KMP_HW_MAX_NUM_CORE_EFFS)
+              eff = KMP_HW_MAX_NUM_CORE_EFFS - 1;
+            __kmp_affinity.core_attr_gran.core_eff = eff;
+            __kmp_affinity.core_attr_gran.valid = 1;
+            scan = next;
+          }
+          if (!__kmp_affinity.core_attr_gran.valid) {
+            __kmp_places_syntax_error_fallback(name, place.type);
+            return;
+          }
+        }
+      }
      break;
    }
  }
@ -3035,36 +3118,56 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
        continue;
      if (__kmp_match_str(name, scan, &next)) {
        scan = next;
-        __kmp_affinity.type = affinity_compact;
-        __kmp_affinity.gran = type;
-        __kmp_affinity.flags.dups = FALSE;
+        __kmp_places_set(affinity_compact, type);
        set = true;
        break;
      }
    }
  }
+  // Implementation choices for OMP_PLACES based on core attributes
+  if (!set) {
+    if (__kmp_match_str("core_types", scan, &next)) {
+      scan = next;
+      if (*scan != '\0') {
+        KMP_WARNING(ParseExtraCharsWarn, name, scan);
+      }
+      __kmp_places_set(affinity_compact, KMP_HW_CORE);
+      __kmp_affinity.flags.core_types_gran = 1;
+      set = true;
+    } else if (__kmp_match_str("core_effs", scan, &next) ||
+               __kmp_match_str("core_efficiencies", scan, &next)) {
+      scan = next;
+      if (*scan != '\0') {
+        KMP_WARNING(ParseExtraCharsWarn, name, scan);
+      }
+      __kmp_places_set(affinity_compact, KMP_HW_CORE);
+      __kmp_affinity.flags.core_effs_gran = 1;
+      set = true;
+    }
+  }
+  // Explicit place list
  if (!set) {
    if (__kmp_affinity.proclist != NULL) {
      KMP_INTERNAL_FREE((void *)__kmp_affinity.proclist);
      __kmp_affinity.proclist = NULL;
    }
    if (__kmp_parse_place_list(name, value, &__kmp_affinity.proclist)) {
-      __kmp_affinity.type = affinity_explicit;
-      __kmp_affinity.gran = KMP_HW_THREAD;
-      __kmp_affinity.flags.dups = FALSE;
+      __kmp_places_set(affinity_explicit, KMP_HW_THREAD);
    } else {
      // Syntax error fallback
-      __kmp_affinity.type = affinity_compact;
-      __kmp_affinity.gran = KMP_HW_CORE;
-      __kmp_affinity.flags.dups = FALSE;
+      __kmp_places_syntax_error_fallback(name, KMP_HW_CORE);
    }
    if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
      __kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
    }
    return;
  }
+
+  kmp_hw_t gran = __kmp_affinity.gran;
  if (__kmp_affinity.gran != KMP_HW_UNKNOWN) {
-    kind = __kmp_hw_get_keyword(__kmp_affinity.gran);
+    gran = __kmp_affinity.gran;
+  } else {
+    gran = KMP_HW_CORE;
  }

  if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
@ -3078,7 +3181,7 @@ static void __kmp_stg_parse_places(char const *name, char const *value,

  // Parse option count parameter in parentheses
  if (*scan != '(') {
-    KMP_WARNING(SyntaxErrorUsing, name, kind);
+    __kmp_places_syntax_error_fallback(name, gran);
    return;
  }
  scan++; // skip '('
@ -3092,7 +3195,7 @@ static void __kmp_stg_parse_places(char const *name, char const *value,

  SKIP_WS(scan);
  if (*scan != ')') {
-    KMP_WARNING(SyntaxErrorUsing, name, kind);
+    __kmp_places_syntax_error_fallback(name, gran);
    return;
  }
  scan++; // skip ')'
@ -3135,12 +3238,37 @@ static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name,
      num = 0;
    }
    if (gran != KMP_HW_UNKNOWN) {
-      const char *name = __kmp_hw_get_keyword(gran, true);
-      if (num > 0) {
-        __kmp_str_buf_print(buffer, "='%s(%d)'\n", name, num);
-      } else {
-        __kmp_str_buf_print(buffer, "='%s'\n", name);
+      // If core_types or core_effs, just print and return
+      if (__kmp_affinity.flags.core_types_gran) {
+        __kmp_str_buf_print(buffer, "='%s'\n", "core_types");
+        return;
      }
+      if (__kmp_affinity.flags.core_effs_gran) {
+        __kmp_str_buf_print(buffer, "='%s'\n", "core_effs");
+        return;
+      }
+
+      // threads, cores, sockets, cores:<attribute>, etc.
+      const char *name = __kmp_hw_get_keyword(gran, true);
+      __kmp_str_buf_print(buffer, "='%s", name);
+
+      // Add core attributes if it exists
+      if (__kmp_affinity.core_attr_gran.valid) {
+        kmp_hw_core_type_t ct =
+            (kmp_hw_core_type_t)__kmp_affinity.core_attr_gran.core_type;
+        int eff = __kmp_affinity.core_attr_gran.core_eff;
+        if (ct != KMP_HW_CORE_TYPE_UNKNOWN) {
+          const char *ct_name = __kmp_hw_get_core_type_keyword(ct);
+          __kmp_str_buf_print(buffer, ":%s", name, ct_name);
+        } else if (eff >= 0 && eff < KMP_HW_MAX_NUM_CORE_EFFS) {
+          __kmp_str_buf_print(buffer, ":eff%d", name, eff);
+        }
+      }
+
+      // Add the '(#)' part if it exists
+      if (num > 0)
+        __kmp_str_buf_print(buffer, "(%d)", num);
+      __kmp_str_buf_print(buffer, "'\n");
    } else {
      __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
    }
@ -5139,21 +5267,6 @@ err:
  return;
 }

-static inline const char *
-__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) {
-  switch (type) {
-  case KMP_HW_CORE_TYPE_UNKNOWN:
-    return "unknown";
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-  case KMP_HW_CORE_TYPE_ATOM:
-    return "intel_atom";
-  case KMP_HW_CORE_TYPE_CORE:
-    return "intel_core";
-#endif
-  }
-  return "unknown";
-}
-
 static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name,
                                      void *data) {
  kmp_str_buf_t buf;
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@ -1242,6 +1242,7 @@ static void __kmp_atfork_child(void) {
    *affinity = KMP_AFFINITY_INIT(affinity->env_var);
  __kmp_affin_fullMask = nullptr;
  __kmp_affin_origMask = nullptr;
+  __kmp_topology = nullptr;
 #endif // KMP_AFFINITY_SUPPORTED

 #if KMP_USE_MONITOR
--- a/openmp/runtime/test/affinity/omp-places-invalid-syntax.c
+++ b/openmp/runtime/test/affinity/omp-places-invalid-syntax.c
@ -1,7 +1,20 @@
-// RUN: %libomp-compile && env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck %s
-// CHECK-DAG: Effective settings
-// CHECK: OMP_PLACES=
-// CHECK-SAME: cores
+// RUN: %libomp-compile
+// RUN: env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck --check-prefix=INVALID %s
+// RUN: env KMP_SETTINGS=1 OMP_PLACES='sockets(' %libomp-run 2>&1 | FileCheck --check-prefix=SOCKETS %s
+// RUN: env KMP_SETTINGS=1 OMP_PLACES='threads()' %libomp-run 2>&1 | FileCheck --check-prefix=THREADS %s
+//
+// INVALID-DAG: Effective settings
+// INVALID: OMP_PLACES=
+// INVALID-SAME: cores
+//
+// SOCKETS-DAG: Effective settings
+// SOCKETS: OMP_PLACES=
+// SOCKETS-SAME: sockets
+//
+// THREADS-DAG: Effective settings
+// THREADS: OMP_PLACES=
+// THREADS-SAME: threads
+//
 // REQUIRES: affinity

 #include "omp_testsuite.h"