[OpenMP] Let primary thread gather topology info for each worker thread

This change has the primary thread create each thread's initial mask
and topology information so it is available immediately after
forking. The setting of mask/topology information is decoupled from the
actual binding. Also add this setting of topology information inside the
__kmp_partition_places mechanism for OMP_PLACES+OMP_PROC_BIND.

Without this, there could be a timing window after the primary
thread signals the workers to fork where worker threads have not yet
established their affinity mask or topology information.

Each worker thread will then bind to the location the primary thread
sets.

Differential Revision: https://reviews.llvm.org/D156727
This commit is contained in:
Jonathan Peyton 2023-07-31 13:46:44 -05:00
parent 3a4f471b11
commit 99f5969565
6 changed files with 56 additions and 63 deletions

View File

@ -3795,7 +3795,8 @@ extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);
extern void __kmp_affinity_uninitialize(void);
extern void __kmp_affinity_set_init_mask(
int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
extern void __kmp_affinity_set_place(int gtid);
void __kmp_affinity_bind_init_mask(int gtid);
extern void __kmp_affinity_bind_place(int gtid);
extern void __kmp_affinity_determine_capable(const char *env_var);
extern int __kmp_aux_set_affinity(void **mask);
extern int __kmp_aux_get_affinity(void **mask);
@ -3811,7 +3812,8 @@ static inline void __kmp_assign_root_init_mask() {
int gtid = __kmp_entry_gtid();
kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
__kmp_affinity_set_init_mask(gtid, TRUE);
__kmp_affinity_set_init_mask(gtid, /*isa_root=*/TRUE);
__kmp_affinity_bind_init_mask(gtid);
r->r.r_affinity_assigned = TRUE;
}
}

View File

@ -4260,8 +4260,8 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
// Called when __kmp_topology is ready
static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) {
// Initialize data dependent on __kmp_topology
if (__kmp_topology) {
// Initialize other data structures which depend on the topology
if (__kmp_topology && __kmp_topology->get_num_hw_threads()) {
machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
__kmp_affinity_get_topology_info(affinity);
}
@ -4527,8 +4527,6 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
if (is_regular_affinity && !__kmp_topology) {
bool success = __kmp_aux_affinity_initialize_topology(affinity);
if (success) {
// Initialize other data structures which depend on the topology
machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
} else {
affinity.type = affinity_none;
@ -4866,14 +4864,12 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
kmp_affin_mask_t *mask;
int i;
const kmp_affinity_t *affinity;
const char *env_var;
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
if (is_hidden_helper)
affinity = &__kmp_hh_affinity;
else
affinity = &__kmp_affinity;
env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
if ((affinity->type == affinity_none) ||
@ -4923,19 +4919,34 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
}
if (i == KMP_PLACE_ALL) {
KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to all places\n",
gtid));
} else {
KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to place %d\n",
gtid, i));
}
KMP_CPU_COPY(th->th.th_affin_mask, mask);
}
void __kmp_affinity_bind_init_mask(int gtid) {
if (!KMP_AFFINITY_CAPABLE()) {
return;
}
kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
const kmp_affinity_t *affinity;
const char *env_var;
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
if (is_hidden_helper)
affinity = &__kmp_hh_affinity;
else
affinity = &__kmp_affinity;
env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
/* to avoid duplicate printing (will be correctly printed on barrier) */
if (affinity->flags.verbose &&
(affinity->type == affinity_none ||
(i != KMP_PLACE_ALL && affinity->type != affinity_balanced)) &&
if (affinity->flags.verbose && (affinity->type == affinity_none ||
(th->th.th_current_place != KMP_PLACE_ALL &&
affinity->type != affinity_balanced)) &&
!KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
@ -4955,7 +4966,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
}
void __kmp_affinity_set_place(int gtid) {
void __kmp_affinity_bind_place(int gtid) {
// Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND
if (!KMP_AFFINITY_CAPABLE() || KMP_HIDDEN_HELPER_THREAD(gtid)) {
return;
@ -4963,7 +4974,7 @@ void __kmp_affinity_set_place(int gtid) {
kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
KA_TRACE(100, ("__kmp_affinity_bind_place: binding T#%d to place %d (current "
"place = %d)\n",
gtid, th->th.th_new_place, th->th.th_current_place));
@ -4985,9 +4996,6 @@ void __kmp_affinity_set_place(int gtid) {
KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place);
KMP_CPU_COPY(th->th.th_affin_mask, mask);
th->th.th_current_place = th->th.th_new_place;
// Copy topology information associated with the place
th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
if (__kmp_affinity.flags.verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];

View File

@ -2591,7 +2591,7 @@ void __kmp_fork_barrier(int gtid, int tid) {
__kmp_gtid_from_thread(this_thr),
this_thr->th.th_current_place));
} else {
__kmp_affinity_set_place(gtid);
__kmp_affinity_bind_place(gtid);
}
}
#endif // KMP_AFFINITY_SUPPORTED

View File

@ -4671,6 +4671,11 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
}
#endif /* KMP_ADJUST_BLOCKTIME */
#if KMP_AFFINITY_SUPPORTED
// Set the affinity and topology information for new thread
__kmp_affinity_set_init_mask(new_gtid, /*isa_root=*/FALSE);
#endif
/* actually fork it and create the new worker thread */
KF_TRACE(
10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
@ -4764,6 +4769,19 @@ static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
}
#if KMP_AFFINITY_SUPPORTED
static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
int first, int last, int newp) {
th->th.th_first_place = first;
th->th.th_last_place = last;
th->th.th_new_place = newp;
if (newp != th->th.th_current_place) {
if (__kmp_display_affinity && team->t.t_display_affinity != 1)
team->t.t_display_affinity = 1;
// Copy topology information associated with the new place
th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
}
}
// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
// It calculates the worker + primary thread's partition based upon the parent
@ -4803,13 +4821,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
for (f = 1; f < n_th; f++) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = masters_place;
if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, first_place, last_place, masters_place);
KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
@ -4840,13 +4852,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
} else {
place++;
}
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = place;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, first_place, last_place, place);
KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
@ -4865,13 +4871,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = place;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, first_place, last_place, place);
s_count++;
if ((s_count == S) && rem && (gap_ct == gap)) {
@ -4938,12 +4938,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);
th->th.th_first_place = place;
th->th.th_new_place = place;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
int fplace = place, nplace = place;
s_count = 1;
while (s_count < S) {
if (place == last_place) {
@ -4966,7 +4961,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
rem--;
gap_ct = 0;
}
th->th.th_last_place = place;
__kmp_set_thread_place(team, th, fplace, place, nplace);
gap_ct++;
if (place == last_place) {
@ -5032,13 +5027,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
KMP_DEBUG_ASSERT(last_place >= first_place);
th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th);
th->th.th_first_place = first;
th->th.th_new_place = place;
th->th.th_last_place = last;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, first, last, place);
KA_TRACE(100,
("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n",
@ -5064,13 +5053,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);
th->th.th_first_place = place;
th->th.th_last_place = place;
th->th.th_new_place = place;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, place, place, place);
s_count++;
if ((s_count == S) && rem && (gap_ct == gap)) {

View File

@ -486,7 +486,7 @@ static void *__kmp_launch_worker(void *thr) {
#endif /* USE_ITT_BUILD */
#if KMP_AFFINITY_SUPPORTED
__kmp_affinity_set_init_mask(gtid, FALSE);
__kmp_affinity_bind_init_mask(gtid);
#endif
#ifdef KMP_CANCEL_THREADS

View File

@ -1006,7 +1006,7 @@ extern "C" void *__stdcall __kmp_launch_worker(void *arg) {
__kmp_itt_thread_name(gtid);
#endif /* USE_ITT_BUILD */
__kmp_affinity_set_init_mask(gtid, FALSE);
__kmp_affinity_bind_init_mask(gtid);
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
// Set FP control regs to be a copy of the parallel initialization thread's.