[libc++] Augment Github - CSV synchronization script to auto-create new Github issues (#118139)

This makes it easier to create the Github issues and add them to the libc++ conformance project after a WG21 meeting.
2025-04-16 00:16:30 +00:00 · 2024-12-02 13:47:47 -05:00 · 2024-12-02 13:47:47 -05:00 · 1b03747ed8
commit 1b03747ed8
parent 57452bb3a9
1 changed files with 111 additions and 26 deletions
--- a/libcxx/utils/synchronize_csv_status_files.py
+++ b/libcxx/utils/synchronize_csv_status_files.py
@ -281,12 +281,64 @@ def write_csv(output: pathlib.Path, rows: List[Tuple]):
        for row in rows:
            writer.writerow(row)

-def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]:
+def create_github_issue(paper: PaperInfo, labels: List[str]) -> None:
+    """
+    Create a new Github issue representing the given PaperInfo.
+    """
+    paper_name = paper.paper_name.replace('``', '`').replace('\\', '')
+
+    create_cli = ['gh', 'issue', 'create', '--repo', 'llvm/llvm-project',
+                    '--title', f'{paper.paper_number}: {paper_name}',
+                    '--body', f'**Link:** https://wg21.link/{paper.paper_number}',
+                    '--project', 'libc++ Standards Conformance',
+                    '--label', 'libc++']
+
+    for label in labels:
+        create_cli += ['--label', label]
+
+    print("Do you want to create the following issue?")
+    print(create_cli)
+    answer = input("y/n: ")
+    if answer == 'n':
+        print("Not creating issue")
+        return
+    elif answer != 'y':
+        print(f"Invalid answer {answer}, skipping")
+        return
+
+    print("Creating issue")
+    issue_link = subprocess.check_output(create_cli).decode().strip()
+    print(f"Created tracking issue for {paper.paper_number}: {issue_link}")
+
+    # Retrieve the "Github project item ID" by re-adding the issue to the project again,
+    # even though we created it inside the project in the first place.
+    item_add_cli = ['gh', 'project', 'item-add', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--url', issue_link, '--format', 'json']
+    item = json.loads(subprocess.check_output(item_add_cli).decode().strip())
+
+    # Then, adjust the 'Meeting Voted' field of that item.
+    meeting_voted_cli = ['gh', 'project', 'item-edit',
+                                '--project-id', 'PVT_kwDOAQWwKc4AlOgt',
+                                '--field-id', 'PVTF_lADOAQWwKc4AlOgtzgdUEXI', '--text', paper.meeting,
+                                '--id', item['id']]
+    subprocess.check_call(meeting_voted_cli)
+
+    # And also adjust the 'Status' field of the item to 'To Do'.
+    status_cli = ['gh', 'project', 'item-edit',
+                                '--project-id', 'PVT_kwDOAQWwKc4AlOgt',
+                                '--field-id', 'PVTSSF_lADOAQWwKc4AlOgtzgdUBak', '--single-select-option-id', 'f75ad846',
+                                '--id', item['id']]
+    subprocess.check_call(status_cli)
+
+def sync_csv(rows: List[Tuple], from_github: List[PaperInfo], create_new: bool, labels: List[str] = None) -> List[Tuple]:
    """
    Given a list of CSV rows representing an existing status file and a list of PaperInfos representing
    up-to-date (but potentially incomplete) tracking information from Github, this function returns the
    new CSV rows synchronized with the up-to-date information.

+    If `create_new` is True and a paper from the CSV file is not tracked on Github yet, this also prompts
+    to create a new issue on Github for tracking it. In that case the created issue is tagged with the
+    provided labels.
+
    Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date
    PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented,
    it is an error (i.e. the result is not a CSV row where the paper is *not* implemented).
@ -305,16 +357,20 @@ def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]:
        # issue tracking it, which we validate below.
        tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number]

-        # If there is no tracking issue for that row in the CSV, this is an error since we're
-        # missing a Github issue.
-        if len(tracking) == 0:
-            print(f"Can't find any Github issue for CSV row: {row}")
+        # If there's more than one tracking issue, something is weird.
+        if len(tracking) > 1:
+            print(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}")
            results.append(row)
            continue

-        # If there's more than one tracking issue, something is weird too.
-        if len(tracking) > 1:
-            print(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}")
+        # If there is no tracking issue for that row and we are creating new issues, do that.
+        # Otherwise just log that we're missing an issue.
+        if len(tracking) == 0:
+            if create_new:
+                assert labels is not None, "Missing labels when creating new Github issues"
+                create_github_issue(paper, labels=labels)
+            else:
+                print(f"Can't find any Github issue for CSV row: {row}")
            results.append(row)
            continue

@ -322,32 +378,61 @@ def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]:

    return results

-CSV_FILES_TO_SYNC = [
-    'Cxx17Issues.csv',
-    'Cxx17Papers.csv',
-    'Cxx20Issues.csv',
-    'Cxx20Papers.csv',
-    'Cxx23Issues.csv',
-    'Cxx23Papers.csv',
-    'Cxx2cIssues.csv',
-    'Cxx2cPapers.csv',
-]
+CSV_FILES_TO_SYNC = {
+    'Cxx17Issues.csv': ['c++17', 'lwg-issue'],
+    'Cxx17Papers.csv': ['c++17', 'wg21 paper'],
+    'Cxx20Issues.csv': ['c++20', 'lwg-issue'],
+    'Cxx20Papers.csv': ['c++20', 'wg21 paper'],
+    'Cxx23Issues.csv': ['c++23', 'lwg-issue'],
+    'Cxx23Papers.csv': ['c++23', 'wg21 paper'],
+    'Cxx2cIssues.csv': ['c++26', 'lwg-issue'],
+    'Cxx2cPapers.csv': ['c++26', 'wg21 paper'],
+}
+
+def main(argv):
+    import argparse
+    parser = argparse.ArgumentParser(prog='synchronize-status-files',
+        description='Synchronize the libc++ conformance status files with Github issues')
+    parser.add_argument('--validate-only', action='store_true',
+        help="Only perform the data validation of CSV files.")
+    parser.add_argument('--create-new', action='store_true',
+        help="Create new Github issues for CSV rows that do not correspond to any existing Github issue.")
+    parser.add_argument('--load-github-from', type=str,
+        help="A json file to load the Github project information from instead of querying the API. This is useful for testing to avoid rate limiting.")
+    args = parser.parse_args(argv)

-def main():
    libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

-    # Extract the list of PaperInfos from issues we're tracking on Github.
-    print("Loading all issues from Github")
-    gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999']
-    project_info = json.loads(subprocess.check_output(gh_command_line))
+    # Perform data validation for all the CSV files.
+    print("Performing data validation of the CSV files")
+    for filename in CSV_FILES_TO_SYNC:
+        csv = load_csv(libcxx_root / 'docs' / 'Status' / filename)
+        for row in csv[1:]: # Skip the header
+            if row[0] != "": # Skip separator rows
+                PaperInfo.from_csv_row(row)
+
+    if args.validate_only:
+        return
+
+    # Load all the Github issues tracking papers from Github.
+    if args.load_github_from:
+        print(f"Loading all issues from {args.load_github_from}")
+        with open(args.load_github_from, 'r') as f:
+            project_info = json.load(f)
+    else:
+        print("Loading all issues from Github")
+        gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999']
+        project_info = json.loads(subprocess.check_output(gh_command_line))
    from_github = [PaperInfo.from_github_issue(i) for i in project_info['items']]

-    for filename in CSV_FILES_TO_SYNC:
+    # Synchronize CSV files with the Github issues.
+    for (filename, labels) in CSV_FILES_TO_SYNC.items():
        print(f"Synchronizing {filename} with Github issues")
        file = libcxx_root / 'docs' / 'Status' / filename
        csv = load_csv(file)
-        synced = sync_csv(csv, from_github)
+        synced = sync_csv(csv, from_github, create_new=args.create_new, labels=labels)
        write_csv(file, synced)

 if __name__ == '__main__':
-    main()
+    import sys
+    main(sys.argv[1:])