mirror of
https://github.com/ROCm/jax.git
synced 2025-04-19 05:16:06 +00:00
Log the key used for barrier
PiperOrigin-RevId: 463741926
This commit is contained in:
parent
27655af6b9
commit
0dbf492cec
@ -346,13 +346,16 @@ class AsyncManager:
|
||||
|
||||
# All processes will wait at the barrier. When all processes are at the
|
||||
# barrier, the barrier will be satisfied. If not, then it will timeout.
|
||||
self._client.wait_at_barrier(_get_key(self._count), self._timeout_in_ms)
|
||||
key_for_barrier = _get_key(self._count)
|
||||
logging.info('Key used for barrier is %s for process %s',
|
||||
key_for_barrier, current_process)
|
||||
self._client.wait_at_barrier(key_for_barrier, self._timeout_in_ms)
|
||||
logging.info('Finished waiting at barrier for process %s',
|
||||
current_process)
|
||||
|
||||
if current_process == 0:
|
||||
self._on_commit_callback()
|
||||
self._client.key_value_set(_get_key(self._count), _CHECKPOINT_SUCCESS)
|
||||
self._client.key_value_set(key_for_barrier, _CHECKPOINT_SUCCESS)
|
||||
|
||||
except Exception as e:
|
||||
self._exception = e
|
||||
|
Loading…
x
Reference in New Issue
Block a user