mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 18:26:37 +00:00
[OpenMP] Let primary thread gather topology info for each worker thread
This change has the primary thread create each thread's initial mask and topology information so it is available immediately after forking. The setting of mask/topology information is decoupled from the actual binding. Also add this setting of topology information inside the __kmp_partition_places mechanism for OMP_PLACES+OMP_PROC_BIND. Without this, there could be a timing window after the primary thread signals the workers to fork where worker threads have not yet established their affinity mask or topology information. Each worker thread will then bind to the location the primary thread sets. Differential Revision: https://reviews.llvm.org/D156727
This commit is contained in:
parent
3a4f471b11
commit
99f5969565
@ -3795,7 +3795,8 @@ extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);
|
||||
extern void __kmp_affinity_uninitialize(void);
|
||||
extern void __kmp_affinity_set_init_mask(
|
||||
int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
|
||||
extern void __kmp_affinity_set_place(int gtid);
|
||||
void __kmp_affinity_bind_init_mask(int gtid);
|
||||
extern void __kmp_affinity_bind_place(int gtid);
|
||||
extern void __kmp_affinity_determine_capable(const char *env_var);
|
||||
extern int __kmp_aux_set_affinity(void **mask);
|
||||
extern int __kmp_aux_get_affinity(void **mask);
|
||||
@ -3811,7 +3812,8 @@ static inline void __kmp_assign_root_init_mask() {
|
||||
int gtid = __kmp_entry_gtid();
|
||||
kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
|
||||
if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
|
||||
__kmp_affinity_set_init_mask(gtid, TRUE);
|
||||
__kmp_affinity_set_init_mask(gtid, /*isa_root=*/TRUE);
|
||||
__kmp_affinity_bind_init_mask(gtid);
|
||||
r->r.r_affinity_assigned = TRUE;
|
||||
}
|
||||
}
|
||||
|
@ -4260,8 +4260,8 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
|
||||
|
||||
// Called when __kmp_topology is ready
|
||||
static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) {
|
||||
// Initialize data dependent on __kmp_topology
|
||||
if (__kmp_topology) {
|
||||
// Initialize other data structures which depend on the topology
|
||||
if (__kmp_topology && __kmp_topology->get_num_hw_threads()) {
|
||||
machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
|
||||
__kmp_affinity_get_topology_info(affinity);
|
||||
}
|
||||
@ -4527,8 +4527,6 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
|
||||
if (is_regular_affinity && !__kmp_topology) {
|
||||
bool success = __kmp_aux_affinity_initialize_topology(affinity);
|
||||
if (success) {
|
||||
// Initialize other data structures which depend on the topology
|
||||
machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
|
||||
KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
|
||||
} else {
|
||||
affinity.type = affinity_none;
|
||||
@ -4866,14 +4864,12 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
|
||||
kmp_affin_mask_t *mask;
|
||||
int i;
|
||||
const kmp_affinity_t *affinity;
|
||||
const char *env_var;
|
||||
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
|
||||
|
||||
if (is_hidden_helper)
|
||||
affinity = &__kmp_hh_affinity;
|
||||
else
|
||||
affinity = &__kmp_affinity;
|
||||
env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
|
||||
|
||||
if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
|
||||
if ((affinity->type == affinity_none) ||
|
||||
@ -4923,19 +4919,34 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
|
||||
}
|
||||
|
||||
if (i == KMP_PLACE_ALL) {
|
||||
KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
|
||||
KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to all places\n",
|
||||
gtid));
|
||||
} else {
|
||||
KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
|
||||
KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to place %d\n",
|
||||
gtid, i));
|
||||
}
|
||||
|
||||
KMP_CPU_COPY(th->th.th_affin_mask, mask);
|
||||
}
|
||||
|
||||
void __kmp_affinity_bind_init_mask(int gtid) {
|
||||
if (!KMP_AFFINITY_CAPABLE()) {
|
||||
return;
|
||||
}
|
||||
kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
|
||||
const kmp_affinity_t *affinity;
|
||||
const char *env_var;
|
||||
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
|
||||
|
||||
if (is_hidden_helper)
|
||||
affinity = &__kmp_hh_affinity;
|
||||
else
|
||||
affinity = &__kmp_affinity;
|
||||
env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
|
||||
/* to avoid duplicate printing (will be correctly printed on barrier) */
|
||||
if (affinity->flags.verbose &&
|
||||
(affinity->type == affinity_none ||
|
||||
(i != KMP_PLACE_ALL && affinity->type != affinity_balanced)) &&
|
||||
if (affinity->flags.verbose && (affinity->type == affinity_none ||
|
||||
(th->th.th_current_place != KMP_PLACE_ALL &&
|
||||
affinity->type != affinity_balanced)) &&
|
||||
!KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
|
||||
char buf[KMP_AFFIN_MASK_PRINT_LEN];
|
||||
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
|
||||
@ -4955,7 +4966,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
|
||||
__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
|
||||
}
|
||||
|
||||
void __kmp_affinity_set_place(int gtid) {
|
||||
void __kmp_affinity_bind_place(int gtid) {
|
||||
// Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND
|
||||
if (!KMP_AFFINITY_CAPABLE() || KMP_HIDDEN_HELPER_THREAD(gtid)) {
|
||||
return;
|
||||
@ -4963,7 +4974,7 @@ void __kmp_affinity_set_place(int gtid) {
|
||||
|
||||
kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
|
||||
|
||||
KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
|
||||
KA_TRACE(100, ("__kmp_affinity_bind_place: binding T#%d to place %d (current "
|
||||
"place = %d)\n",
|
||||
gtid, th->th.th_new_place, th->th.th_current_place));
|
||||
|
||||
@ -4985,9 +4996,6 @@ void __kmp_affinity_set_place(int gtid) {
|
||||
KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place);
|
||||
KMP_CPU_COPY(th->th.th_affin_mask, mask);
|
||||
th->th.th_current_place = th->th.th_new_place;
|
||||
// Copy topology information associated with the place
|
||||
th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
|
||||
th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
|
||||
|
||||
if (__kmp_affinity.flags.verbose) {
|
||||
char buf[KMP_AFFIN_MASK_PRINT_LEN];
|
||||
|
@ -2591,7 +2591,7 @@ void __kmp_fork_barrier(int gtid, int tid) {
|
||||
__kmp_gtid_from_thread(this_thr),
|
||||
this_thr->th.th_current_place));
|
||||
} else {
|
||||
__kmp_affinity_set_place(gtid);
|
||||
__kmp_affinity_bind_place(gtid);
|
||||
}
|
||||
}
|
||||
#endif // KMP_AFFINITY_SUPPORTED
|
||||
|
@ -4671,6 +4671,11 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
|
||||
}
|
||||
#endif /* KMP_ADJUST_BLOCKTIME */
|
||||
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
// Set the affinity and topology information for new thread
|
||||
__kmp_affinity_set_init_mask(new_gtid, /*isa_root=*/FALSE);
|
||||
#endif
|
||||
|
||||
/* actually fork it and create the new worker thread */
|
||||
KF_TRACE(
|
||||
10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
|
||||
@ -4764,6 +4769,19 @@ static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
|
||||
}
|
||||
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
|
||||
int first, int last, int newp) {
|
||||
th->th.th_first_place = first;
|
||||
th->th.th_last_place = last;
|
||||
th->th.th_new_place = newp;
|
||||
if (newp != th->th.th_current_place) {
|
||||
if (__kmp_display_affinity && team->t.t_display_affinity != 1)
|
||||
team->t.t_display_affinity = 1;
|
||||
// Copy topology information associated with the new place
|
||||
th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
|
||||
th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
|
||||
}
|
||||
}
|
||||
|
||||
// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
|
||||
// It calculates the worker + primary thread's partition based upon the parent
|
||||
@ -4803,13 +4821,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
|
||||
for (f = 1; f < n_th; f++) {
|
||||
kmp_info_t *th = team->t.t_threads[f];
|
||||
KMP_DEBUG_ASSERT(th != NULL);
|
||||
th->th.th_first_place = first_place;
|
||||
th->th.th_last_place = last_place;
|
||||
th->th.th_new_place = masters_place;
|
||||
if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
|
||||
team->t.t_display_affinity != 1) {
|
||||
team->t.t_display_affinity = 1;
|
||||
}
|
||||
__kmp_set_thread_place(team, th, first_place, last_place, masters_place);
|
||||
|
||||
KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
|
||||
"partition = [%d,%d]\n",
|
||||
@ -4840,13 +4852,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
|
||||
} else {
|
||||
place++;
|
||||
}
|
||||
th->th.th_first_place = first_place;
|
||||
th->th.th_last_place = last_place;
|
||||
th->th.th_new_place = place;
|
||||
if (__kmp_display_affinity && place != th->th.th_current_place &&
|
||||
team->t.t_display_affinity != 1) {
|
||||
team->t.t_display_affinity = 1;
|
||||
}
|
||||
__kmp_set_thread_place(team, th, first_place, last_place, place);
|
||||
|
||||
KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
|
||||
"partition = [%d,%d]\n",
|
||||
@ -4865,13 +4871,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
|
||||
kmp_info_t *th = team->t.t_threads[f];
|
||||
KMP_DEBUG_ASSERT(th != NULL);
|
||||
|
||||
th->th.th_first_place = first_place;
|
||||
th->th.th_last_place = last_place;
|
||||
th->th.th_new_place = place;
|
||||
if (__kmp_display_affinity && place != th->th.th_current_place &&
|
||||
team->t.t_display_affinity != 1) {
|
||||
team->t.t_display_affinity = 1;
|
||||
}
|
||||
__kmp_set_thread_place(team, th, first_place, last_place, place);
|
||||
s_count++;
|
||||
|
||||
if ((s_count == S) && rem && (gap_ct == gap)) {
|
||||
@ -4938,12 +4938,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
|
||||
kmp_info_t *th = team->t.t_threads[f];
|
||||
KMP_DEBUG_ASSERT(th != NULL);
|
||||
|
||||
th->th.th_first_place = place;
|
||||
th->th.th_new_place = place;
|
||||
if (__kmp_display_affinity && place != th->th.th_current_place &&
|
||||
team->t.t_display_affinity != 1) {
|
||||
team->t.t_display_affinity = 1;
|
||||
}
|
||||
int fplace = place, nplace = place;
|
||||
s_count = 1;
|
||||
while (s_count < S) {
|
||||
if (place == last_place) {
|
||||
@ -4966,7 +4961,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
|
||||
rem--;
|
||||
gap_ct = 0;
|
||||
}
|
||||
th->th.th_last_place = place;
|
||||
__kmp_set_thread_place(team, th, fplace, place, nplace);
|
||||
gap_ct++;
|
||||
|
||||
if (place == last_place) {
|
||||
@ -5032,13 +5027,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
|
||||
KMP_DEBUG_ASSERT(last_place >= first_place);
|
||||
th = team->t.t_threads[f];
|
||||
KMP_DEBUG_ASSERT(th);
|
||||
th->th.th_first_place = first;
|
||||
th->th.th_new_place = place;
|
||||
th->th.th_last_place = last;
|
||||
if (__kmp_display_affinity && place != th->th.th_current_place &&
|
||||
team->t.t_display_affinity != 1) {
|
||||
team->t.t_display_affinity = 1;
|
||||
}
|
||||
__kmp_set_thread_place(team, th, first, last, place);
|
||||
KA_TRACE(100,
|
||||
("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
|
||||
"partition = [%d,%d], spacing = %.4f\n",
|
||||
@ -5064,13 +5053,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
|
||||
kmp_info_t *th = team->t.t_threads[f];
|
||||
KMP_DEBUG_ASSERT(th != NULL);
|
||||
|
||||
th->th.th_first_place = place;
|
||||
th->th.th_last_place = place;
|
||||
th->th.th_new_place = place;
|
||||
if (__kmp_display_affinity && place != th->th.th_current_place &&
|
||||
team->t.t_display_affinity != 1) {
|
||||
team->t.t_display_affinity = 1;
|
||||
}
|
||||
__kmp_set_thread_place(team, th, place, place, place);
|
||||
s_count++;
|
||||
|
||||
if ((s_count == S) && rem && (gap_ct == gap)) {
|
||||
|
@ -486,7 +486,7 @@ static void *__kmp_launch_worker(void *thr) {
|
||||
#endif /* USE_ITT_BUILD */
|
||||
|
||||
#if KMP_AFFINITY_SUPPORTED
|
||||
__kmp_affinity_set_init_mask(gtid, FALSE);
|
||||
__kmp_affinity_bind_init_mask(gtid);
|
||||
#endif
|
||||
|
||||
#ifdef KMP_CANCEL_THREADS
|
||||
|
@ -1006,7 +1006,7 @@ extern "C" void *__stdcall __kmp_launch_worker(void *arg) {
|
||||
__kmp_itt_thread_name(gtid);
|
||||
#endif /* USE_ITT_BUILD */
|
||||
|
||||
__kmp_affinity_set_init_mask(gtid, FALSE);
|
||||
__kmp_affinity_bind_init_mask(gtid);
|
||||
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
// Set FP control regs to be a copy of the parallel initialization thread's.
|
||||
|
Loading…
x
Reference in New Issue
Block a user