<a href="https://colab.research.google.com/github/vibhavparekh05-afk/IHWP_PROJECT/blob/main/RESEARCH_PROTOTYPE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import google.generativeai as genai
import pandas as pd
import numpy as np
import ruptures as rpt
import io
import textwrap
from IPython.display import Markdown

# A helper function to make the output look nice.
def to_markdown(text):
  text = text.replace('â€¢', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

# --------------------------------------------------------
API_KEY = "AIzaSyBYuJCSMB5aG0M2EL7PtbhgXJoYD-_FUDo"
# --------------------------------------------------------


# --- Main execution block ---
try:
    # STEP 4: Generate a sample dataset in memory
    print("Generating a sample performance dataset...")
    data_string = "timestamp,cpu_usage\n"
    for i in range(100):
        if i < 50:
            cpu = 20 + np.random.normal(0, 0.1) # Before the change
        else:
            cpu = 22 + np.random.normal(0, 0.1) # After the change (larger regression)
        data_string += f"{i},{cpu:.4f}\n"

    # Load the string data into a pandas DataFrame
    df = pd.read_csv(io.StringIO(data_string))
    print(f"...Dataset created with {len(df)} data points.\n")


    # STEP 5: Perform Changepoint Detection
    print("Detecting changepoint in the data...")
    points = df['cpu_usage'].values
    algo = rpt.Pelt(model="l2").fit(points)
    # *** MODIFICATION: Lowered penalty to make detection more sensitive. ***
    result = algo.predict(pen=3)

    if not result or len(result) <= 1:
        raise ValueError("Could not detect a clear changepoint in the data.")

    changepoint_index = result[0]
    print(f"...Changepoint detected at timestamp index: {changepoint_index}\n")


    # STEP 6: Split the data based on the detected changepoint
    print("Splitting data into 'before' and 'after' segments...")
    before_data = df['cpu_usage'].iloc[:changepoint_index].tolist()
    after_data = df['cpu_usage'].iloc[changepoint_index:].tolist()
    before_data_rounded = [round(x, 2) for x in before_data]
    after_data_rounded = [round(x, 2) for x in after_data]
    print("...Data successfully split.\n")


    # STEP 7: Configure and call the Gemini API
    genai.configure(api_key=API_KEY)
    model = genai.GenerativeModel('gemini-2.5-pro')

    prompt = f"""
    Analyze the following software performance data to determine if a performance regression occurred.
    The data represents CPU usage over time, split by an automatically detected 'changepoint' where a new software version was likely released.

    Data before the change (first 10 values): {before_data_rounded[:10]}...
    Data after the change (first 10 values): {after_data_rounded[:10]}...

    Based on the overall data, was there a statistically significant performance regression?
    Categorize the impact as HIGH_IMPACT, LOW_IMPACT, or NO_IMPACT.
    Provide a one-sentence justification for your analysis based on the change in mean CPU usage.

    Format your response as:
    IMPACT: [Your Category]
    JUSTIFICATION: [Your Sentence]
    """

    print("Sending data to the Gemini API for final analysis...")
    response = model.generate_content(prompt)
    print("...Analysis complete!\n")


    # STEP 8: Display the final result
    print("--- Prototype Complete: Gemini Model Analysis ---")
    display(to_markdown(response.text))
    print("-------------------------------------------------")

except Exception as e:
    print(f"\nAN UNEXPECTED ERROR OCCURRED: {e}")

Generating a sample performance dataset...
...Dataset created with 100 data points.

Detecting changepoint in the data...
...Changepoint detected at timestamp index: 50

Splitting data into 'before' and 'after' segments...
...Data successfully split.

Sending data to the Gemini API for final analysis...
...Analysis complete!

--- Prototype Complete: Gemini Model Analysis ---


> IMPACT: HIGH_IMPACT
> JUSTIFICATION: Mean CPU usage increased by nearly 10% from ~20.0 to ~21.9 after the changepoint, indicating a significant performance degradation.

-------------------------------------------------
