<a href="https://colab.research.google.com/github/vishnusnath447/BYOB/blob/main/bottest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers torch python-docx scipy

In [9]:
from transformers import AutoTokenizer, AutoModel
import torch
from docx import Document
from scipy.spatial.distance import cosine

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")

# Function to extract text from a .docx file, including tables
def extract_text_from_docx(file_path):
    doc = Document(file_path)
    data_rows = []

    # Extract text from tables
    for table in doc.tables:
        for row in table.rows:
            row_text = [cell.text.strip() for cell in row.cells if cell.text.strip()]
            if row_text:
                data_rows.append(" | ".join(row_text))  # Store rows as strings

    return data_rows

# Read document and extract rows
file_path = "alerts.docx"
table_rows = extract_text_from_docx(file_path)

# Function to get embedding
def get_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()  # Mean pooling
    return embedding

# Embed each table row
row_embeddings = {row: get_embedding(row) for row in table_rows}

# Embed the query "High CPU Usage"
query_text = "Database Connection Fail"
query_embedding = get_embedding(query_text)

# Find the most similar row using cosine similarity
best_match = min(row_embeddings.items(), key=lambda x: cosine(query_embedding, x[1]))

# Print the most relevant row
# print("Best match for 'Disk Space Low':")
# print(best_match[0])


In [10]:
import re

def format_resolution_steps(text):
    # Extract only the resolution steps part
    if "|" in text:
        resolution_part = text.split("|")[-1].strip()  # Get the last part after '|'
    else:
        resolution_part = text.strip()

    # Split steps using numbers like "1.", "2.", etc.
    steps = re.split(r'\d+\.\s*', resolution_part)
    steps = [step.strip() for step in steps if step.strip()]  # Remove empty elements

    # Format output
    formatted_output = "Resolution steps:\n"
    for i, step in enumerate(steps, start=1):
        formatted_output += f"Step {i}: {step}\n"

    return formatted_output.strip()

# Example output from retrieval
retrieved_text = best_match[0]

# Format the retrieved output
formatted_result = format_resolution_steps(retrieved_text)
print(formatted_result)


Resolution steps:
Step 1: Verify database availability.
Step 2: Check network connectivity.
Step 3: Restart database service if needed.
