From 1282878c52cb116093c6e3e8cde0c2623b624802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Thu, 13 Mar 2025 10:18:02 +0100 Subject: [PATCH] [CI] Fix bad timestamps being reported (#130941) Yesterday, the monitoring reported a job queued for 23h59. After some checks, it appears no such job existed: the age of the workflows on completion was at most 5 hours during the last 48 hours. After some digging, I found out GitHub could return a job with a start date slightly before the creation date, or completion date before start date. This would cause python to compute a negative timedelta, which would then be reported in grafana as a full 24h delta due to the conversions. Adding code to ignore negative delta, but logging them. --- .ci/metrics/metrics.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/.ci/metrics/metrics.py b/.ci/metrics/metrics.py index e1712bd01515..3878dce342fb 100644 --- a/.ci/metrics/metrics.py +++ b/.ci/metrics/metrics.py @@ -168,8 +168,24 @@ def github_get_metrics( created_at = job.created_at started_at = job.started_at completed_at = job.completed_at - queue_time = started_at - created_at - run_time = completed_at - started_at + + # GitHub API can return results where the started_at is slightly + # later then the created_at (or completed earlier than started). + # This would cause a -23h59mn delta, which will show up as +24h + # queue/run time on grafana. + if started_at < created_at: + logging.info( + "Workflow {} started before being created.".format(task.id) + ) + queue_time = datetime.timedelta(seconds=0) + else: + queue_time = started_at - created_at + if completed_at < started_at: + logging.info("Workflow {} finished before starting.".format(task.id)) + run_time = datetime.timedelta(seconds=0) + else: + run_time = completed_at - started_at + if run_time.seconds == 0: continue