In [1]:
pip install sentence-transformers pandas numpy


Collecting sentence-transformers
  Downloading sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Collecting pandas
  Using cached pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting numpy
  Using cached numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Downloading transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
Collecting tqdm (from sentence-transformers)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Using cached torch-2.5.1-cp312-none-macosx_11_0_arm64.whl.metadata (28 kB)
Collecting scikit-learn (from sentence-transformers)
  Using cached scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl.metadata (13 kB)
Collecting scipy (from sentence-transformers)
  Using cached scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)


In [11]:
import pandas as pd


file_path = '/Users/vikneshv/Downloads/helpdesk_customer_tickets.csv'  # Replace with the path to your dataset
data = pd.read_csv(file_path)

print("Dataset loaded successfully.")
print(data.head())


Dataset loaded successfully.
    id                                            subject  \
0   36  Anfrage zu den Spezifikationen und Anpassungso...   
1   39               Déconnexions fréquentes et plantages   
2  243                        Problema de sonido Dell XPS   
3  381  Assistance requise pour la configuration du ta...   
4  663  Urgente: Assistência Imediata Necessária para ...   

                                                body  \
0  Sehr geehrtes Support-Team des Tech Online Sto...   
1  Le client signale des déconnexions fréquentes ...   
2  Problema con el sonido, manejando como devoluc...   
3  Cher support client,\n\nNotre client, <name>, ...   
4  Caro Suporte ao Cliente da Firma de Consultori...   

                                              answer      type  \
0  Sehr geehrter <name>,\n\nvielen Dank für Ihr I...   Request   
1  Nous allons enquêter sur le problème avec Zoom...  Incident   
2  Gracias por su correo electrónico. Procesaremo...   Problem   
3  

In [3]:

data_filtered = data[['subject', 'body', 'answer']].dropna()

print("Filtered relevant columns: 'subject', 'body', 'answer'.")
print(f"Total rows after filtering: {len(data_filtered)}")


Filtered relevant columns: 'subject', 'body', 'answer'.
Total rows after filtering: 599


In [4]:
from sentence_transformers import SentenceTransformer


print("Loading SentenceTransformer model...")
model = SentenceTransformer('all-mpnet-base-v2')
print("Model loaded successfully.")


  from .autonotebook import tqdm as notebook_tqdm


Loading SentenceTransformer model...
Model loaded successfully.


In [5]:

data_filtered['combined_text'] = data_filtered['subject'] + " " + data_filtered['body']

print("Combined 'subject' and 'body' into a single text field.")
print(data_filtered[['combined_text']].head())



Combined 'subject' and 'body' into a single text field.
                                       combined_text
0  Anfrage zu den Spezifikationen und Anpassungso...
1  Déconnexions fréquentes et plantages Le client...
2  Problema de sonido Dell XPS Problema con el so...
3  Assistance requise pour la configuration du ta...
4  Urgente: Assistência Imediata Necessária para ...


In [6]:
import numpy as np


print("Generating embeddings for the dataset... This may take some time.")
embeddings = model.encode(data_filtered['combined_text'].tolist(), convert_to_tensor=True)

print("Embeddings generated successfully.")


Generating embeddings for the dataset... This may take some time.
Embeddings generated successfully.


In [7]:

print("Converting embeddings to NumPy format...")
data_filtered['embedding'] = [emb.cpu().numpy() for emb in embeddings]

print("Embeddings stored in the dataset.")


Converting embeddings to NumPy format...
Embeddings stored in the dataset.


In [8]:

data_filtered.to_pickle('preprocessed_data.pkl')

print("Preprocessed data saved to 'preprocessed_data.pkl'.")


Preprocessed data saved to 'preprocessed_data.pkl'.


In [9]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util


print("Loading preprocessed data...")
data_filtered = pd.read_pickle('preprocessed_data.pkl')
print("Preprocessed data loaded successfully.")


Loading preprocessed data...
Preprocessed data loaded successfully.


In [14]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch


data = pd.read_pickle('preprocessed_data.pkl')
model = SentenceTransformer('all-mpnet-base-v2')
query = "Issue with Dell XPS 13 9310 After Update"
query_embedding = model.encode(query, convert_to_tensor=True)
precomputed_embeddings = torch.tensor(data['embedding'].values.tolist()).to(query_embedding.device)
similarities = util.cos_sim(query_embedding, precomputed_embeddings).squeeze(0).cpu().numpy()
most_similar_idx = similarities.argmax()
best_match = data.iloc[most_similar_idx]
print(f"Most similar entry found: \nSubject: {best_match['subject']}\nBody: {best_match['body']}\nAnswer: {best_match['answer']}")



Most similar entry found: 
Subject: Issue with Dell XPS 13 9310 After Update
Body: Dear Tech Online Store customer support,

I would like to report an issue regarding my Dell XPS 13 9310. After a recent Windows update, I have been experiencing screen flickering. This issue has made my laptop hard to use. Please advise on the next steps to resolve this.

Thank you,
<name>
Answer: Dear <name>,

Sorry to hear about the screen flickering on your Dell XPS 13 9310. We recommend updating your graphics drivers and checking for any additional Windows updates. If the issue persists, please contact our technical support for further assistance.

Best regards,
Tech Online Store Customer Support
