mirror of
https://github.com/ROCm/jax.git
synced 2025-04-17 12:26:07 +00:00
nightly multiprocess test: create issue on failure
This commit is contained in:
parent
169345311a
commit
7ec6acd981
45
.github/workflows/cat_slurm_logs.py
vendored
Normal file
45
.github/workflows/cat_slurm_logs.py
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Script used in the nightly-ci-multiprocess-gpu workflow to process logs."""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
ISSUE_FORMAT = """\
|
||||
<details><summary>Failure summary {name}</summary>
|
||||
|
||||
```
|
||||
{content}
|
||||
```
|
||||
|
||||
</details>
|
||||
"""
|
||||
|
||||
def main(logfiles: List[str], outfile: str):
|
||||
print(f"extracting content of {logfiles}")
|
||||
print(f"and writing to {outfile}")
|
||||
with open(outfile, 'w') as f:
|
||||
for logfile in logfiles:
|
||||
content = open(logfile).read()
|
||||
f.write(ISSUE_FORMAT.format(name=os.path.basename(logfile), content=content))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("logfiles", nargs="+", help="The path to the input logfiles")
|
||||
parser.add_argument("--outfile", help="The path to the parsed output file to be created.",
|
||||
default="parsed_logs.txt")
|
||||
args = parser.parse_args()
|
||||
main(logfiles=args.logfiles, outfile=args.outfile)
|
@ -51,4 +51,76 @@ jobs:
|
||||
if: always()
|
||||
with:
|
||||
name: output-from-nodes
|
||||
path: "outputs/*.txt"
|
||||
path: "outputs/*.txt"
|
||||
|
||||
report:
|
||||
name: report
|
||||
needs: build
|
||||
if: |
|
||||
failure()
|
||||
&& github.event_name == 'schedule'
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.x"
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: /tmp/workspace/logs
|
||||
- name: Parse log output
|
||||
run: |
|
||||
ls /tmp/workspace/logs/output-from-nodes/
|
||||
python .github/workflows/cat_slurm_logs.py /tmp/workspace/logs/output-from-nodes/*.txt --outfile=parsed-logs.txt
|
||||
- name: Report failures
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const parsed_logs = fs.readFileSync('parsed-logs.txt', 'utf8');
|
||||
const title = "⚠️ Nightly GPU Multiprocess CI failed ⚠️"
|
||||
const workflow_url = `https://github.com/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`
|
||||
const issue_body = `[Workflow Run URL](${workflow_url})\n${parsed_logs}`
|
||||
// Run GraphQL query against GitHub API to find the most recent open issue used for reporting failures
|
||||
const query = `query($owner:String!, $name:String!, $creator:String!, $label:String!){
|
||||
repository(owner: $owner, name: $name) {
|
||||
issues(first: 1, states: OPEN, filterBy: {createdBy: $creator, labels: [$label]}, orderBy: {field: CREATED_AT, direction: DESC}) {
|
||||
edges {
|
||||
node {
|
||||
body
|
||||
id
|
||||
number
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`;
|
||||
const variables = {
|
||||
owner: context.repo.owner,
|
||||
name: context.repo.repo,
|
||||
label: 'Nightly-CI',
|
||||
creator: "github-actions[bot]"
|
||||
}
|
||||
const result = await github.graphql(query, variables)
|
||||
// If no issue is open, create a new issue,
|
||||
// else update the body of the existing issue.
|
||||
if (result.repository.issues.edges.length === 0) {
|
||||
github.rest.issues.create({
|
||||
owner: variables.owner,
|
||||
repo: variables.name,
|
||||
body: issue_body,
|
||||
title: title,
|
||||
labels: [variables.label]
|
||||
})
|
||||
} else {
|
||||
github.rest.issues.update({
|
||||
owner: variables.owner,
|
||||
repo: variables.name,
|
||||
issue_number: result.repository.issues.edges[0].node.number,
|
||||
body: issue_body
|
||||
})
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user