# Tests

In [5]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")

query_embedding = model.encode("How big is London")
passage_embedding = model.encode(["Es una ciudad muy grande", "My name is Vitor"])

print("Similarity:", util.dot_score(query_embedding, passage_embedding))

In [6]:
model

In [7]:
model2 = SentenceTransformer("multi-qa-mpnet-base-cos-v1")

In [8]:
query_embedding = model2.encode("How big is London")
passage_embedding = model2.encode(
    ["Es una ciudad muy grande", "My name is Vitor", "London is a big city"]
)

print("Similarity:", util.dot_score(query_embedding, passage_embedding))

In [9]:
query_embedding.shape

In [10]:
incidencias_embedding = model2.encode(
    [
        "PANTALLA TACTIL NO VA BIEN.  PANTALLA TACTIL NO FUNCIONA CORRECTAMENTE. AZC3BLUEFECOSB"
    ]
)

errors_embedding = model2.encode(
    [
        "Fallo de comunicaciones El termostato no comunica con la central o no se puede asociar.",
        "Fallo de pantalla Hay un fallo estético en la pantalla, no se representan bien las imágenes.",
        "No funciona el táctil El termostato representa imágenes y reacciona cuando se quita y pone alimentación, pero las pulsaciones funcionan mal o no funcionan",
    ]
)

In [11]:
def calculate_cosine_similarity(embeddings):
    return util.pytorch_cos_sim(embeddings[0], embeddings[1])


for error in errors_embedding:
    print(calculate_cosine_similarity([incidencias_embedding[0], error]))

In [23]:
model = SentenceTransformer("all-MiniLM-L6-v2")

# Our sentences we like to encode
sentences = [
    "This framework generates embeddings for each input sentence",
    "Sentences are passed as a list of strings.",
    "The quick brown fox jumps over the lazy dog.",
]

# Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

# Print the embeddings
for sentence, embedding in zip(sentences, embeddings):
    print("Sentence:", sentence)
    print("Embedding:", embedding)
    print("")

In [2]:
import pandas as pd

corpus = pd.read_csv(
    "../DATA/processed/2024-05-15/corpus_spanish.csv", sep="¬", engine="python"
)

corpus.head()

In [13]:
df = corpus.sample(5000)

teste = model2.encode(df["text_to_analyse"].tolist())

In [14]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    print("GPU is available")
    # Get the name of the GPU
    print("GPU name:", torch.cuda.get_device_name(0))
    # Print number of GPUs available
    print("Number of GPUs available:", torch.cuda.device_count())
else:
    print("GPU is not available")