Merge pull request #11818 from google:nightly-issue

PiperOrigin-RevId: 466717610
This commit is contained in:
jax authors 2022-08-10 10:08:35 -07:00
commit 9922308342
2 changed files with 118 additions and 1 deletions

45
.github/workflows/cat_slurm_logs.py vendored Normal file
View File

@ -0,0 +1,45 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script used in the nightly-ci-multiprocess-gpu workflow to process logs."""
import argparse
import os
from typing import List
ISSUE_FORMAT = """\
<details><summary>Failure summary {name}</summary>
```
{content}
```
</details>
"""
def main(logfiles: List[str], outfile: str):
print(f"extracting content of {logfiles}")
print(f"and writing to {outfile}")
with open(outfile, 'w') as f:
for logfile in logfiles:
content = open(logfile).read()
f.write(ISSUE_FORMAT.format(name=os.path.basename(logfile), content=content))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("logfiles", nargs="+", help="The path to the input logfiles")
parser.add_argument("--outfile", help="The path to the parsed output file to be created.",
default="parsed_logs.txt")
args = parser.parse_args()
main(logfiles=args.logfiles, outfile=args.outfile)

View File

@ -51,4 +51,76 @@ jobs:
if: always()
with:
name: output-from-nodes
path: "outputs/*.txt"
path: "outputs/*.txt"
report:
name: report
needs: build
if: |
failure()
&& github.event_name == 'schedule'
runs-on: ubuntu-latest
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.x"
- uses: actions/download-artifact@v3
with:
path: /tmp/workspace/logs
- name: Parse log output
run: |
ls /tmp/workspace/logs/output-from-nodes/
python .github/workflows/cat_slurm_logs.py /tmp/workspace/logs/output-from-nodes/*.txt --outfile=parsed-logs.txt
- name: Report failures
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require('fs');
const parsed_logs = fs.readFileSync('parsed-logs.txt', 'utf8');
const title = "⚠️ Nightly GPU Multiprocess CI failed ⚠️"
const workflow_url = `https://github.com/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`
const issue_body = `[Workflow Run URL](${workflow_url})\n${parsed_logs}`
// Run GraphQL query against GitHub API to find the most recent open issue used for reporting failures
const query = `query($owner:String!, $name:String!, $creator:String!, $label:String!){
repository(owner: $owner, name: $name) {
issues(first: 1, states: OPEN, filterBy: {createdBy: $creator, labels: [$label]}, orderBy: {field: CREATED_AT, direction: DESC}) {
edges {
node {
body
id
number
}
}
}
}
}`;
const variables = {
owner: context.repo.owner,
name: context.repo.repo,
label: 'Nightly-CI',
creator: "github-actions[bot]"
}
const result = await github.graphql(query, variables)
// If no issue is open, create a new issue,
// else update the body of the existing issue.
if (result.repository.issues.edges.length === 0) {
github.rest.issues.create({
owner: variables.owner,
repo: variables.name,
body: issue_body,
title: title,
labels: [variables.label]
})
} else {
github.rest.issues.update({
owner: variables.owner,
repo: variables.name,
issue_number: result.repository.issues.edges[0].node.number,
body: issue_body
})
}