## Adds docstrings/comments to code and generates code summary

### Model Usage  

- **Open Source Models:**

  - Deployed via Endpoint: Hosted on a server and accessed remotely (Qwen 1.5-7)
  - Run Locally on Machine: Executed directly on a local device (Ollama running Llama 3.2-1B)

- **Closed Source Models:**  
  - Accessed through API key authentication: (OpenAI, Anthropic).  


In [69]:
# imports

import os
import io
import sys
import json
import requests
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai
import anthropic
import ollama
from IPython.display import Markdown, display, update_display
import gradio as gr
from huggingface_hub import login, InferenceClient
from transformers import AutoTokenizer, pipeline

In [75]:
# environment

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY')
CODE_QWEN_URL = os.environ['CODE_QWEN_URL'] 
BIGBIRD_PEGASUS_URL = os.environ['BIGBIRD_PEGASUS_URL']
HF_TOKEN = os.environ['HF_TOKEN']

In [4]:
!ollama pull llama3.2:1b

[?25lpulling manifest â ‹ [?25h[?25l[2K[1Gpulling manifest â ™ [?25h[?25l[2K[1Gpulling manifest â ¹ [?25h[?25l[2K[1Gpulling manifest â ¸ [?25h[?25l[2K[1Gpulling manifest â ¼ [?25h[?25l[2K[1Gpulling manifest â ´ [?25h[?25l[2K[1Gpulling manifest â ¦ [?25h[?25l[2K[1Gpulling manifest â § [?25h[?25l[2K[1Gpulling manifest â ‡ [?25h[?25l[2K[1Gpulling manifest â � [?25h[?25l[2K[1Gpulling manifest 
pulling 74701a8c35f6... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 1.3 GB                         
pulling 966de95ca8a6... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 1.4 KB                         
pulling fcc5a6bec9da... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 7.7 KB                         
pulling a70ff7e570d9... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 6.0 KB                         
pulling 4f659a1e86d7... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–�  485 B            

In [5]:
openai = OpenAI()
claude = anthropic.Anthropic()

In [72]:
OPENAI_MODEL = "gpt-4o-mini"
CLAUDE_MODEL = "claude-3-haiku-20240307"
LLAMA_MODEL = "llama3.2:1b"

In [73]:
code_qwen = "Qwen/CodeQwen1.5-7B-Chat"
bigbird_pegasus = "google/bigbird-pegasus-large-arxiv"
login(HF_TOKEN, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [67]:
# Uses Llama to Check Which Language the Code is Written In
system_message_comments = "You are an assistant designed to add docstrings and helpful comments to code for documentation purposes."
system_message_comments += "Respond back with properly formatted code, including docstrings and comments. Keep comments concise. "
system_message_comments += "Do not respond with greetings, or any such extra output"

In [171]:
system_message_summary = "You are an assistant designed to summarise code for documentation purposes. You are not to display code again."
system_message_summary += "Respond back with a properly crafted summary, mentioning key details regarding to the code, such as workflow, code language."
system_message_summary += "Do not respond with greetings, or any such extra output. Do not respond in Markdown. Be thorough, keep explanation level at undergraduate level."

In [17]:
def user_prompt_for(code):
    user_prompt = "Rewrite this code to include helpful comments and docstrings. "
    user_prompt += "Respond only with code.\n"
    user_prompt += code
    return user_prompt

In [172]:
def user_prompt_for_summary(code):
    user_prompt = "Return the summary of the code.\n"
    user_prompt += code
    return user_prompt

In [33]:
def messages_for(code):
    messages = [
        {"role": "system", "content": system_message_comments},
        {"role":"user", "content" : user_prompt_for(code)}
    ]
    return messages

In [34]:
def messages_for_summary(code):
    messages = [
        {"role": "system", "content": system_message_summary},
        {"role":"user", "content" : user_prompt_for_summary(code)}
    ]
    return messages

In [19]:
func = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [160]:
def call_llama(code):
    # commented code
    messages = messages_for(code)
    response1 = ollama.chat(model=LLAMA_MODEL, messages=messages)

    # summary
    messages = messages_for_summary(code)
    response2 = ollama.chat(model=LLAMA_MODEL, messages=messages)
    
    return response1['message']['content'],response2['message']['content']

In [168]:
def call_claude(code):
    # commented code
    message1 = claude.messages.create(
        model=CLAUDE_MODEL,
        system=system_message_comments,
        messages=([{"role": "user", "content":user_prompt_for(code)}]),
        max_tokens=500
    )

    # summary
    message2 = claude.messages.create(
        model=CLAUDE_MODEL,
        system=system_message_summary,
        messages=([{"role": "user", "content":user_prompt_for_summary(code)}]),
        max_tokens=500
    )
    
    return message1.content[0].text,message2.content[0].text

In [115]:
def call_gpt(code):
    # commented code
    completion1 = openai.chat.completions.create(
        model=OPENAI_MODEL,
        messages=messages_for(code),
    )

    #summary
    completion2 = openai.chat.completions.create(
        model=OPENAI_MODEL,
        messages=messages_for_summary(code),
    )
    
    return completion1.choices[0].message.content,completion2.choices[0].message.content

In [178]:
def call_codeqwen(code):
    # commented code
    tokenizer = AutoTokenizer.from_pretrained(code_qwen)
    messages = messages_for(code)
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    client = InferenceClient(CODE_QWEN_URL, token=HF_TOKEN)
    response1 = client.text_generation(text, details=True, max_new_tokens=1000)

    # summary
    tokenizer = AutoTokenizer.from_pretrained(code_qwen)
    messages = messages_for_summary(code)
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    client = InferenceClient(CODE_QWEN_URL, token=HF_TOKEN)
    response2 = client.text_generation(text, details=True, max_new_tokens=1000)
    
    return response1.generated_text ,response2.generated_text 

In [179]:
def create_docs(code,model):
    if model == "Llama":
        comments,summary = call_llama(code)
    elif model == "Claude":
        comments,summary = call_claude(code)
    elif model == "GPT":
        comments,summary = call_gpt(code)
    elif model == "CodeQwen":
        comments,summary = call_codeqwen(code)
    else:
        raise ValueError("Unknown Model")
    return comments,summary

In [180]:
css = """
.comments {background-color: #00599C;}
.summary {background-color: #008B8B;}
"""

In [181]:
with gr.Blocks(css=css) as ui:
    gr.Markdown("### Code Documentation and Formatting")
    with gr.Row():
        code = gr.Textbox(label="Input Code: ", value=func, lines=10)
    with gr.Row():
        model = gr.Dropdown(["GPT","Claude","Llama","CodeQwen"],label="Select model",value="GPT")
    with gr.Row():
        docs = gr.Button("Add Comments and Sumarise Code")
    with gr.Row():
        commented_code = gr.Textbox(label= "Formatted Code", lines=10,elem_classes=["comments"])
        code_summary = gr.Textbox(label = "Code Summary", lines=10,elem_classes=["summary"])
    docs.click(create_docs,inputs=[code,model],outputs=[commented_code,code_summary]),

In [182]:
ui.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7880

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "C:\Users\92310\anaconda3\envs\llms\Lib\site-packages\huggingface_hub\utils\_http.py", line 409, in hf_raise_for_status
    response.raise_for_status()
  File "C:\Users\92310\anaconda3\envs\llms\Lib\site-packages\requests\models.py", line 1024, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 400 Client Error: Bad Request for url: https://ihnm5eu3qkozcoio.us-east-1.aws.endpoints.huggingface.cloud/

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\92310\anaconda3\envs\llms\Lib\site-packages\gradio\queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\92310\anaconda3\envs\llms\Lib\site-packages\gradio\route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^

In [13]:
print(call_llama(func))

```python
import time

def calculate(iterations, param1, param2):
    """
    Calculate the average reciprocal of (i * param1 - param2) / j for all i from 1 to iterations.

    Args:
        iterations (int): The number of iterations.
        param1 (float): The first parameter for the calculation.
        param2 (float): The second parameter for the calculation.

    Returns:
        float: The calculated result.
    """
    # Initialize the result variable with 1.0
    result = 1.0
    
    # Iterate over all i from 1 to iterations
    for i in range(1, iterations+1):
        # Calculate j as (i * param1 - param2)
        j = i * param1 - param2
        
        # Add the reciprocal of (j / result) to the total result
        result += (1/j)
        
        # Subtract the reciprocal of (i * param1 + param2) from the total result
        result -= (1/(i * param1 + param2))
    
    return result

# Record the start time
start_time = time.time()

# Calculate 4 times the initial result