<a href="https://colab.research.google.com/github/vishnusnath447/BYOB/blob/main/bottest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers torch python-docx scipy accelerate requests

In [6]:
from transformers import AutoTokenizer, AutoModel
import torch
from docx import Document
from scipy.spatial.distance import cosine

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

# Function to extract text from a .docx file, including tables
def extract_text_from_docx(file_path):
    doc = Document(file_path)
    data_rows = []

    # Extract text from tables
    for table in doc.tables:
        for row in table.rows:
            row_text = [cell.text.strip() for cell in row.cells if cell.text.strip()]
            if row_text:
                data_rows.append(" | ".join(row_text))  # Store rows as strings

    return data_rows

# Read document and extract rows
file_path = "alerts.docx"
table_rows = extract_text_from_docx(file_path)

# Function to get embedding
def get_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()  # Mean pooling
    return embedding

# Embed each table row
row_embeddings = {row: get_embedding(row) for row in table_rows}

# Embed the query "High CPU Usage"
query_text = "Kubernetes Pod CrashLoop"
query_embedding = get_embedding(query_text)

# Find the most similar row using cosine similarity
best_match = min(row_embeddings.items(), key=lambda x: cosine(query_embedding, x[1]))

# Print the most relevant row
# print("Best match for 'Disk Space Low':")
# print(best_match[0])


# Simple Formating using Rule-based

In [7]:
import re

def format_resolution_steps(text):
    # Extract only the resolution steps part
    if "|" in text:
        resolution_part = text.split("|")[-1].strip()  # Get the last part after '|'
    else:
        resolution_part = text.strip()

    # Split steps using numbers like "1.", "2.", etc.
    steps = re.split(r'\d+\.\s*', resolution_part)
    steps = [step.strip() for step in steps if step.strip()]  # Remove empty elements

    # Format output
    formatted_output = "Resolution steps:\n"
    for i, step in enumerate(steps, start=1):
        formatted_output += f"Step {i}: {step}\n"

    return formatted_output.strip()

# Example output from retrieval
retrieved_text = best_match[0]

# Format the retrieved output
formatted_result = format_resolution_steps(retrieved_text)
print(formatted_result)


Resolution steps:
Step 1: Check pod logs for errors.
Step 2: Restart the pod manually.
Step 3: Investigate root cause and fix.


# Fomating using downloaded model ( no use )

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load a small model (TinyLlama-1.1B)
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto")

# Function to format alert resolution steps
def format_alert(prompt):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
    output = model.generate(input_ids, max_new_tokens=100)
    result = tokenizer.decode(output[0], skip_special_tokens=True)

    # Trim output to remove unwanted text
    result = result.strip().split("\n")
    result = [line for line in result if line.startswith("-") or line.startswith("*")]  # Keep only bullet points
    return "\n".join(result)

# Example input (retrieved alert data)
retrieved_text = best_match[0]

# Improved prompt
prompt = f"""
Extract only the resolution steps from the following alert and format them as bullet points. Do not add any extra text.

Alert: {retrieved_text}
"""

# Get formatted response
formatted_output = format_alert(prompt)
print(formatted_output)





# Using Model to format text , Together API

In [11]:
import requests

TOGETHER_API_KEY = "c05c3f10cb1601a44b6761a7cc69899c69f8728b4e0ea4c70e4fb407957f0a51"  # Get from https://www.together.ai/

def format_alert_together(alert_text):
    url = "https://api.together.xyz/v1/completions"
    headers = {"Authorization": f"Bearer {TOGETHER_API_KEY}", "Content-Type": "application/json"}

    prompt = f"""
    Extract only the resolution steps from the following alert and format them as bullet points. Do not add any extra text.

    Alert: {alert_text}
    """

    payload = {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.2
    }

    response = requests.post(url, headers=headers, json=payload)
    response_json = response.json()

    #Print the full response for debugging
    print("API Response:", response_json)

    # Check if 'choices' exists
    if "choices" in response_json:
        return response_json["choices"][0]["text"].strip()
    else:
        return f"Error: {response_json}"  # Return the error message

# Example Input
retrieved_text = best_match[0]

formatted_output = format_alert_together(retrieved_text)
print(formatted_output)



API Response: {'id': '914445c36bc22b2a', 'object': 'text.completion', 'created': 1739947300, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'prompt': [], 'choices': [{'text': ' becomes:\n    * Check pod logs for errors.\n    * Restart the pod manually.\n    * Investigate root cause and fix.', 'finish_reason': 'eos', 'seed': 8123650474972279000, 'logprobs': None, 'index': 0}], 'usage': {'prompt_tokens': 80, 'completion_tokens': 27, 'total_tokens': 107}}
becomes:
    * Check pod logs for errors.
    * Restart the pod manually.
    * Investigate root cause and fix.
