# Generate release notes

## Import necessary libraries
In order to run this notebook, check that the kernel is _____ and _____.

Now run the following code cell to import necessary libraries.

In [None]:
import requests
import subprocess
import json
import re
import shutil
import numpy as np
import datetime
import openai
from dotenv import load_dotenv
import os
from functools import reduce

## Setup for OpenAI API

Replace "YOUR_OPENAI_API_KEY" with your API key. 

TODO: Make an if/else that checks for a .env file and grab from there first before doing this thing

In [None]:
def setup_openai_api(api_key):
    if not api_key:
        raise EnvironmentError("OpenAI API key is not set.")

    openai.api_key = api_key

# Input your OpenAI API key here
api_key = "YOUR_OPENAI_API_KEY"
setup_openai_api(api_key)

## Add GitHub release URLs
Add release URLs from the documentation, backend, and frontend repos into the cell below.

In [None]:
github_urls = [
    "https://github.com/your_repo/releases/tag/your_tag"
    # Add more URLs here
    "Paste documentation repo - delete this line if you don't need, or add more lines if there are more"
    "Paste frontend repo"
    "Paste backend repo"
    "Paste developer framework repo"
]

## Get PR numbers from URLs
Running this cell will grab the URLs and parse the data from them. It mainly checks if the URLs from above are the right format.

In [None]:
def get_pr_numbers_from_url(release_url):
    match = re.search(r"github\.com/(.+)/releases/tag/(.+)$", release_url)
    if not match:
        print(f"Error: Invalid URL format '{release_url}'.")
        return [], []
    
    repo_name, tag_name = match.groups()
    cmd_release = ['gh', 'api', f'repos/{repo_name}/releases/tags/{tag_name}']
    result_release = subprocess.run(cmd_release, capture_output=True, text=True)
    output_release = result_release.stdout.strip()
    try:
        release_data = json.loads(output_release)
    except json.JSONDecodeError:
        print(f"Error: Unable to parse release data for URL '{release_url}'.")
        return repo_name, []
    
    if 'body' in release_data:
        body = release_data['body']
        pr_numbers = re.findall(r"https://github\.com/.+/pull/(\d+)", body)
        return repo_name, set(pr_numbers)
    else:
        print(f"Error: No body found in release data for URL '{release_url}'.")
        return repo_name, []

# Example usage:
# repo_name, pr_numbers = get_pr_numbers_from_url(github_urls[0])
# print(repo_name, pr_numbers)

## Get PR data
This cell works

In [None]:
def get_pr_data(repo_name, pr_number):
    cmd = ['gh', 'pr', 'view', pr_number, '--json', 'title,body,url,labels', '--repo', repo_name]
    result = subprocess.run(cmd, capture_output=True, text=True)
    output = result.stdout.strip()
    try:
        pr_data = json.loads(output)
    except json.JSONDecodeError:
        print(f"Error: Unable to parse PR data for PR number {pr_number} in repository {repo_name}.")
        return None
    
    if any(label['name'] == 'internal' for label in pr_data['labels']):
        return None  # Ignore PRs with the 'internal' label
    return pr_data

# Example usage:
# pr_data = get_pr_data(repo_name, pr_numbers.pop())
# print(pr_data)

## Extract external release notes
This function looks through our release notes and takes the data that we marked external, or put under **External Release Notes**.

In [None]:
def extract_external_release_notes(pr_body):
    match = re.search(r"## External Release Notes(.+)", pr_body, re.DOTALL)
    if match:
        extracted_text = match.group(1).strip()
        modified_text = '\n'.join(''.join(['#', line]) if line.lstrip().startswith('###') else line for line in extracted_text.split('\n'))
        edited_text = edit_text_with_openai(modified_text)
        return edited_text
    return None

# Example usage:
# if pr_data:
#     release_notes = extract_external_release_notes(pr_data['body'])
#     print(release_notes)


## Generate the title
Not sure what this does exactly, but we'll get there

In [None]:
def clean_title(title):
    title = re.sub(r"\[.*?\]", "", title)
    parts = title.split('/')
    if len(parts) > 1:
        title = parts[-1].strip()
        if title and title[0].islower():
            title = title[0].upper() + title[1:]
    title = title.strip()
    edited_title = edit_text_with_openai(title)
    return edited_title.rstrip('.')

# Example usage:
# if pr_data:
#     cleaned_title = clean_title(pr_data['title'])
#     print(cleaned_title)


## Feed data into ChatGPT to generate release notes
The text in the triple-quotes (""") is fed into ChatGPT to generate release notes out of our data. If you find that the output isn't how you like it, for example it's too informal, you can modify the prompt below.

TODO: Editing instructions in a separate cell with the text from above

In [None]:
# Cell 8: Edit text with OpenAI
def edit_text_with_openai(lines):
    original_text = "\n".join(lines)
    client = openai.OpenAI() 

    editing_instructions = """
    Please edit the provided technical content according to the following guidelines:
    - Use simple and neutral language in the active voice.
    - Address users directly in the second person with "you".
    - Use present tense by avoiding the use of "will".
    - Apply sentence-style capitalization to text
    - Always capitalize the first letter of text on each line.
    - Rewrite sentences that are longer than 25 words as multiple sentences.
    - Only split text across multiple lines if the text contains more than three sentences.
    - Avoid handwaving references to "it" or "this" by including the text referred to. 
    - Treat short text of less than ten words without a period at the end as a heading. 
    - Enclose any words joined by underscores in backticks (`) if they aren't already.
    - Remove exclamation marks from text.
    - Remove quotes around non-code words.
    - Remove the text "feat:" from the output
    - Maintain existing punctuation at the end of sentences.
    - Maintain all original hyperlinks for reference.
    - Preserve all comments in the format <!--- COMMENT ---> as they appear in the text.
    """

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "system", 
                    "content": editing_instructions
                },
                {
                    "role": "user", 
                    "content": original_text
                }
                ],
            max_tokens=4096,  # Adjust the token limit as needed
            frequency_penalty=0.5,  # Modify repetition tendencies
            presence_penalty=0.5  # Encourage diversity in responses
        )
        edited_text = response.choices[0].message.content
        return edited_text

    except Exception as e:
        print(f"\nFailed to edit text with OpenAI: {str(e)}")
        print(f"\n{lines}\n")
        return lines  # Return the original lines if the edit fails

# Example usage:
# edited_text = edit_text_with_openai(["Example text to edit."])
# print(edited_text)


## Set the release date
This function sets the release date as three business days from today, because _____. If you would like to change that, _____.

TODO: set `date_input` in noteboook instead of calling `input()`.

In [None]:
def get_release_date():
    today = datetime.datetime.now().date()
    np_today = np.datetime64(today, 'D')
    three_business_days = np.busday_offset(np_today, 3, roll='forward')
    three_business_days = three_business_days.astype('datetime64[D]').astype(datetime.date)
    default_date = three_business_days.strftime("%B %d, %Y")

    date_input = input(f"Enter the release date (Month Day, Year) [{default_date}]: ") or default_date
    try:
        validated_date = datetime.datetime.strptime(date_input, "%B %d, %Y")
        return validated_date
    except ValueError:
        print("Invalid date format. Please try again using the format Month Day, Year (e.g., January 1, 2020).")
        return get_release_date()

# Example usage:
# release_datetime = get_release_date()
# formatted_release_date = release_datetime.strftime("%Y-%b-%d").lower()
# original_release_date = release_datetime.strftime("%B %-d, %Y")
# print(formatted_release_date, original_release_date)


## Update the Quarto file
In order for our newly generated release notes to appear on our doc site, we need to update the `_quarto.yml` file to include them. This function adds our file info into the file so it will appear on the sidebar next to our other release notes.

In [None]:
def update_quarto_yaml(output_file, release_date):
    yaml_filename = "_quarto.yml"
    temp_yaml_filename = "_quarto_temp.yml"

    shutil.copyfile(yaml_filename, temp_yaml_filename)

    with open(temp_yaml_filename, 'r') as file:
        lines = file.readlines()

    formatted_release_date = release_date.strftime("%Y-%b-%d").lower()

    with open(yaml_filename, 'w') as file:
        add_release_content = False
        insert_index = -1

        for i, line in enumerate(lines):
            file.write(line)
            if line.strip() == "# MAKE-RELEASE-NOTES-EMBED-MARKER":
                add_release_content = True
                insert_index = i

            if add_release_content and i == insert_index:
                file.write(f'        - releases/{formatted_release_date}/release-notes.qmd\n')
                add_release_content = False

    os.remove(temp_yaml_filename)
    
    print(f"Added release notes to _quarto.yml, line {insert_index + 2}")

# Example usage:
# update_quarto_yaml(output_file, release_datetime)

## Write PRs to file
Also not quite sure what this one does yet so we will update when we figure that out.

In [None]:
def write_prs_to_file(file, categories, label_to_category):
    for label, pr_list in categories.items():
        if pr_list:
            output_lines = [f"{label_to_category.get(label, '## Other')}\n\n"]
            last_line_was_blank = False

            for pr in pr_list:
                pr_lines = [
                    f"<!---\nPR #{pr['pr_number']}: {pr['full_title']}\n",
                    f"URL: {pr['url']}\n",
                    f"Labels: {pr['labels']}\n",
                    f"--->\n### {pr['title']}\n\n"
                ]
                
                if pr['notes']:
                    pr_lines.append(f"{pr['notes']}\n\n")
                
                for line in pr_lines:
                    if line.strip() == "":
                        if last_line_was_blank:
                            continue
                        last_line_was_blank = True
                    else:
                        last_line_was_blank = False
                    output_lines.append(line)

            file.writelines(output_lines)

# Example usage:
# with open(output_file, "a") as file:
#     write_prs_to_file(file, categories, label_to_category)


## Big boy generation stuff
This function does the actual build of the release notes to fit our format. I think I want to split this code block into little ones up above so this gets done a little at a time.

In [None]:

label_to_category = {
    "highlight": "## Release highlights",
    "enhancement": "## Enhancements",
    "deprecation": "## Deprecations",
    "bug": "## Bug fixes",
    "documentation": "## Documentation"
}
categories = {
    "highlight": [],
    "enhancement": [],
    "deprecation": [],
    "bug": [],
    "documentation": []
}
label_hierarchy = ["highlight", "deprecation", "bug", "enhancement", "documentation"]

release_datetime = get_release_date()
formatted_release_date = release_datetime.strftime("%Y-%b-%d").lower()
original_release_date = release_datetime.strftime("%B %-d, %Y")

directory_path = f"releases/{formatted_release_date}/"
os.makedirs(directory_path, exist_ok=True)
output_file = f"{directory_path}release-notes.qmd"

print("Generating & editing release notes ...")

with open(output_file, "w") as file:
    file.write(f"---\ntitle: \"{original_release_date}\"\n---\n\n")

for url in github_urls:
    repo_name, pr_numbers = get_pr_numbers_from_url(url)
    if pr_numbers:
        for pr_number in pr_numbers:
            pr_data = get_pr_data(repo_name, pr_number)
            print(f"  Processing {repo_name}/#{pr_number} ...")
            if pr_data:
                release_notes = extract_external_release_notes(pr_data['body'])
                cleaned_title = clean_title(pr_data['title'])
                labels = [label['name'] for label in pr_data['labels']]
                pr_details = {
                    'pr_number': pr_number,
                    'title': cleaned_title,
                    'full_title': pr_data['title'],
                    'url': pr_data['url'],
                    'labels': ", ".join(labels),
                    'notes': release_notes
                }

                assigned = False
                for priority_label in label_hierarchy:
                    if priority_label in labels:
                        categories[priority_label].append(pr_details)
                        assigned = True
                        break
                if not assigned:
                    categories.setdefault('other', []).append(pr_details)

with open(output_file, "a") as file:
    write_prs_to_file(file, categories, label_to_category)

update_quarto_yaml(output_file, release_datetime)

try:
    result = subprocess.run(["git", "status", "--short"], check=True, text=True, capture_output=True)
    lines = result.stdout.split('\n')
    print("Files to commit:")
    for line in lines:
        if line.startswith((' M', '??', 'A ')):
            print(line)
except subprocess.CalledProcessError as e:
    print("Failed to run git status:", e)


In [None]:

for url in github_urls:
    repo_name, pr_numbers = get_pr_numbers_from_url(url)
    for pr_number in pr_numbers:
        pr_data = get_pr_data(repo_name, pr_number)
        if pr_data:
            release_notes = extract_external_release_notes(pr_data['body'])
            release_notes = edit_text_with_openai(release_notes)



In [None]:

for url in github_urls:
    repo_name, pr_numbers = get_pr_numbers_from_url(url)
    for pr_number in pr_numbers:
        pr_data = get_pr_data(repo_name, pr_number)
        if pr_data:
            cleaned_title = clean_title(pr_data['title'])
            



In [None]:

for url in github_urls:
    repo_name, pr_numbers = get_pr_numbers_from_url(url)
    for pr_number in pr_numbers:
        pr_data = get_pr_data(repo_name, pr_number)
        if pr_data:
            labels = [label['name'] for label in pr_data['labels']]



In [None]:

for url in github_urls:
    repo_name, pr_numbers = get_pr_numbers_from_url(url)
    for pr_number in pr_numbers:
        pr_data = get_pr_data(repo_name, pr_number)
        if pr_data:



In [None]:

for url in github_urls:
    repo_name, pr_numbers = get_pr_numbers_from_url(url)
    for pr_number in pr_numbers:
        pr_data = get_pr_data(repo_name, pr_number)
        if pr_data:



In [None]:

for url in github_urls:
    repo_name, pr_numbers = get_pr_numbers_from_url(url)
    for pr_number in pr_numbers:
        pr_data = get_pr_data(repo_name, pr_number)
        if pr_data:

