## Example of performing pr_auc evaluation

This demo shows how to evaluate an embedding model's embeddings with pairwise similarity score. 


In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'
from easydl.dml.simulation import generate_2d_gaussian_points
import pandas as pd
import numpy as np
import plotly.express as px
from easydl.data import GenericXYLambdaAutoLabelEncoderDataset
from easydl.dml.evaluation import calculate_cosine_similarity_matrix, create_pairwise_similarity_ground_truth_matrix, evaluate_pairwise_score_matrix_with_true_label, StandardEmbeddingEvaluationV1
from easydl.visualization import plot_precision_recall_vs_threshold_curve, PairwiseScoreAnalysisPlot


# Generated two clusters of 2D Gaussian points as embeddings, so that we can visualize it. 
group_1 = generate_2d_gaussian_points(100, [0, 1], 0.1, random_seed=42)
group_2 = generate_2d_gaussian_points(100, [1, 0], 0.1, random_seed=43)
embeddings = np.concatenate([group_1, group_2], axis=0)

df_points = pd.DataFrame(embeddings, columns=['x', 'y'])
df_points['label'] = ['G1'] * len(group_1) + ['G2'] * len(group_2)

fig = px.scatter(df_points, x='x', y='y', color='label', width=600, height=600)
fig.show()

test_dataset = GenericXYLambdaAutoLabelEncoderDataset(x_loader_lambda=lambda index: df_points.iloc[index][['x', 'y']], y_loader_lambda=lambda index: df_points.iloc[index]['label'], length=len(df_points))
evaluator = StandardEmbeddingEvaluationV1(test_dataset, save_pairwise_score_matrix_in_metric_dict=True)
embeddings = df_points[['x', 'y']].to_numpy()
metrics = evaluator.evaluate_given_embeddings(embeddings)
pair_gt_matrix = evaluator.pairwise_similarity_ground_truth_matrix
pair_score_matrix = metrics['pairwise_similarity_score_matrix']

analyzer = PairwiseScoreAnalysisPlot(pair_gt_matrix, pair_score_matrix)
analyzer.analyze(n_bins=50)
fig = analyzer.plot()
fig.show()