In [2]:
import requests
import subprocess
import json
import re
import shutil
import numpy as np
import datetime
import openai
from dotenv import load_dotenv
import os

from generate_release_objects import ReleaseURL, PR
from generate_release_objects import setup_openai_api, get_release_date, write_prs_to_file

In [3]:
# Set up the OpenAI API key from the .env file
setup_openai_api()

In [4]:

label_to_category = {
    "highlight": "## Release highlights",
    "enhancement": "## Enhancements",
    "deprecation": "## Deprecations",
    "bug": "## Bug fixes",
    "documentation": "## Documentation"
}

categories = { 
    "highlight": [],
    "enhancement": [],
    "deprecation": [],
    "bug": [],
    "documentation": []
}

label_hierarchy = ["highlight", "deprecation", "bug", "enhancement", "documentation"]

In [5]:
def collect_github_urls(): 
    """Collects release URLs from user.

    Returns:
        List[ReleaseURL]: A list of ReleaseURL objects

    Exits:
        If the user presses enter and no URLs were entered
    """
    urls = []
    while True:
        url = input("Enter a full GitHub release URL (leave empty to finish): ")
        if not url:
            if not urls:  # Check if no URLs have been added yet
                print("Error: You must specify at least one full GitHub release URL.")
                exit(1)  # Exit the script with an error code
            break
        urls.append(ReleaseURL(url))
    return urls 

github_urls = collect_github_urls() # the only big global variable

In [6]:

release_datetime = get_release_date()
formatted_release_date = release_datetime.strftime("%Y-%b-%d").lower()
original_release_date = release_datetime.strftime("%B %-d, %Y")

In [7]:

directory_path = f"../site/releases/{formatted_release_date}/"
os.makedirs(directory_path, exist_ok=True)
output_file = f"{directory_path}release-notes.qmd"

In [8]:

print("Generating & editing release notes ...")

with open(output_file, "w") as file:
    file.write(f"---\ntitle: \"{original_release_date}\"\n---\n\n")


Generating & editing release notes ...


In [9]:
# new one starts here
release_components = dict()
release_components.update(categories)

In [10]:
for url in github_urls:
    url.set_repo_and_tag_name() 

In [11]:
ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')

def notebook_extract_prs(self):
    """Extracts PRs from the release URL.

    Modifies:
        self.prs
        self.data_json
    """
    cmd_release = ['gh', 'api', f'repos/{self.repo_name}/releases/tags/{self.tag_name}']
    result_release = subprocess.run(cmd_release, capture_output=True, text=True)

    if result_release.returncode != 0:
        print(f"Error: Command failed with return code {result_release.returncode}")
        print(f"stderr: {result_release.stderr}")
        return

    output_release = result_release.stdout.strip()
    # print(f"Raw output: {repr(output_release)}")

    output_release_clean = ansi_escape.sub('', output_release)

    try:
        self.data_json = json.loads(output_release_clean)
    except json.JSONDecodeError:
        print(f"Error: Unable to parse release data for URL '{self.url}'.")      
    
    if 'body' in self.data_json:
        body = self.data_json['body']
        pr_numbers = re.findall(r"https://github\.com/.+/pull/(\d+)", body)

        for pr_number in pr_numbers: # initialize PR objects using pr_numbers and add to list of PRs
            curr_PR = PR(self.repo_name, pr_number)
            self.prs.append(curr_PR)

    else:
        print(f"Error: No body found in release data for URL '{self.url}'.")

ReleaseURL.extract_prs = notebook_extract_prs

for url in github_urls:
    url.extract_prs() # initializes PR objects into a list for each URL

In [12]:
def notebook_load_data_json(self):
        """Loads the JSON data from a PR to self.data_json, sets to None if any labels are 'internal'

        Modifies:
            self.data_json
        """
        cmd = ['gh', 'pr', 'view', self.pr_number, '--json', 'title,body,url,labels', '--repo', self.repo_name]
        result = subprocess.run(cmd, capture_output=True, text=True)
        output = result.stdout.strip()

        output_clean = ansi_escape.sub('', output)

        try:
            self.data_json = json.loads(output_clean)
        except json.JSONDecodeError:
            print(f"Error: Unable to parse PR data for PR number {self.pr_number} in repository {self.repo_name}.")
            return None
        
        if any(label['name'] == 'internal' for label in self.data_json['labels']):
            self.data_json = None  # Ignore PRs with the 'internal' label

PR.load_data_json = notebook_load_data_json

for url in github_urls:
    for pr in url.prs:
        pr.load_data_json() # loads json file into object


In [13]:
editing_instructions_body = """
    Please edit the provided technical content according to the following guidelines:

    - Use simple and neutral language in the active voice.
    - Address users directly in the second person with "you".
    - Use present tense by avoiding the use of "will".
    - Apply sentence-style capitalization to text
    - Always capitalize the first letter of text on each line.
    - Rewrite sentences that are longer than 25 words as multiple sentences.
    - Only split text across multiple lines if the text contains more than three sentences.
    - Avoid handwaving references to "it" or "this" by including the text referred to. 
    - Treat short text of less than ten words without a period at the end as a heading. 
    - Enclose any words joined by underscores in backticks (`) if they aren't already.
    - Remove exclamation marks from text.
    - Remove quotes around non-code words.
    - Remove the text "feat:" from the output
    - Maintain existing punctuation at the end of sentences.
    - Maintain all original hyperlinks for reference.
    - Preserve all comments in the format <!--- COMMENT ---> as they appear in the text.
    """

for url in github_urls:
    for pr in url.prs:
        if pr.data_json: 
            if pr.extract_external_release_notes(): pr.edit_text_with_openai(False, editing_instructions_body)


In [14]:
editing_instructions_title = """
    Please edit the provided technical content according to the following guidelines:

    - Use simple and neutral language in the active voice.
    - Address users directly in the second person with "you".
    - Use present tense by avoiding the use of "will".
    - Apply sentence-style capitalization to text
    - Always capitalize the first letter of text on each line.
    - Rewrite sentences that are longer than 25 words as multiple sentences.
    - Only split text across multiple lines if the text contains more than three sentences.
    - Avoid handwaving references to "it" or "this" by including the text referred to. 
    - Treat short text of less than ten words without a period at the end as a heading. 
    - Enclose any words joined by underscores in backticks (`) if they aren't already.
    - Remove exclamation marks from text.
    - Remove quotes around non-code words.
    - Remove the text "feat:" from the output
    - Maintain existing punctuation at the end of sentences.
    - Maintain all original hyperlinks for reference.
    - Preserve all comments in the format <!--- COMMENT ---> as they appear in the text.
    """

for url in github_urls:
    for pr in url.prs:
        if pr.data_json: 
            pr.title = pr.data_json['title']
            pr.clean_title(editing_instructions_title)


"Model Workflows" section in Guides
"Model Workflows" section in Guides
Model workflows section in guides
Create training collateral
Create training collateral
Create training collateral
Clarify what review means
Clarify what review means
Clarify what review means


Updated test descriptions 
Updated test descriptions
Updated test descriptions


In [15]:

for url in github_urls:
    for pr in url.prs:
        if pr.data_json: pr.labels = [label['name'] for label in pr.data_json['labels']]


In [16]:

for url in github_urls:
    for pr in url.prs:
        if pr.data_json: pr.pr_details = {
            'pr_number': pr.pr_number,
            'title': pr.cleaned_title,
            'full_title': pr.data_json['title'],
            'url': pr.data_json['url'],
            'labels': ", ".join(pr.labels),
            'notes': pr.edited_text
        }


In [17]:

for url in github_urls:
    for pr in url.prs:
        if pr.data_json:
            assigned = False 
            for priority_label in label_hierarchy:
                if priority_label in pr.labels:
                    release_components[priority_label].append(pr.pr_details)
                    assigned = True
                    break
            if not assigned:
                release_components.setdefault('other', []).append(pr.pr_details)

In [18]:
# Write categorized PRs to the file
with open(output_file, "a") as file:
    write_prs_to_file(file, release_components, label_to_category)


In [19]:

def update_quarto_yaml(release_date):
    """Updates the _quarto.yml file to include the release notes file so it can be accessed on the website.

    Params:
        release_date - release notes use the release date as the file name.
    
    Modifies:
        _quarto.yml file
    """
    yaml_filename = "../site/_quarto.yml"
    temp_yaml_filename = "../site/_quarto_temp.yml"

    # Copy the original YAML file to a temporary file
    shutil.copyfile(yaml_filename, temp_yaml_filename)

    with open(temp_yaml_filename, 'r') as file:
        lines = file.readlines()

    # Format the release date for insertion into the YAML file
    formatted_release_date = release_date.strftime("%Y-%b-%d").lower()

    with open(yaml_filename, 'w') as file:
        add_release_content = False
        insert_index = -1

        for i, line in enumerate(lines):
            file.write(line)
            if line.strip() == "# MAKE-RELEASE-NOTES-EMBED-MARKER":
                add_release_content = True
                insert_index = i

            if add_release_content and i == insert_index:
                file.write(f'        - releases/{formatted_release_date}/release-notes.qmd\n')
                add_release_content = False

    # Remove the temporary file
    os.remove(temp_yaml_filename)
    
    print(f"Added release notes to _quarto.yml, line {insert_index + 2}")

update_quarto_yaml(release_datetime)

Added release notes to _quarto.yml, line 106


In [20]:
# After completing all tasks, print git status to show output files
try:
    result = subprocess.run(["git", "status", "--short"], check=True, text=True, capture_output=True)
    lines = result.stdout.split('\n')
    print("Files to commit:")
    for line in lines:
        if line.startswith((' M', '??', 'A ')):
            print(line)
except subprocess.CalledProcessError as e:
    print("Failed to run git status:", e)

Files to commit:
 M ../site/Makefile
 M ../site/_quarto.yml
 M ../site/_site/about/contributing/style-guide/conventions.html
 M ../site/_site/about/contributing/style-guide/style-guide.html
 M ../site/_site/about/contributing/style-guide/voice-and-tone.html
 M ../site/_site/about/fine-print/data-privacy-policy.html
 M ../site/_site/about/overview-model-documentation.html
 M ../site/_site/about/overview-model-risk-management.html
 M ../site/_site/about/overview.html
 M ../site/_site/developer/get-started-developer-framework.html
 M ../site/_site/developer/model-documentation/install-and-initialize-developer-framework.html
 M ../site/_site/developer/model-documentation/store-credentials-in-env-file.html
 M ../site/_site/developer/model-documentation/supported-models.html
 M ../site/_site/developer/model-testing/test-descriptions.html
 M ../site/_site/developer/model-testing/testing-overview.html
 M ../site/_site/developer/samples-jupyter-notebooks.html
 M ../site/_site/get-started/develo