# Toy examples for correlation tiles

This jupyter notebook reproduces the nine tiles of Figure 4 in :cite:t:`Pierard2024TheTile'. 

It provides toy examples showing correlation tiles showing the rank correlations (Kendall τ ) between 9 probabilistic scores (those that belong to the ranking scores), and all ranking scores, for a uniform distribution of performances. 
The correlation values have been estimated based on 10,000 performances drawn at random.

In [5]:
import scipy.stats

from sorbetto.performance.distribution.uniform_distribution_of_two_class_classification_performances import UniformDistributionOfTwoClassClassificationPerformances
from sorbetto.parameterization.parameterization_default import ParameterizationDefault
#from sorbetto.flavor.correlation_flavor import CorrelationFlavor
#from sorbetto.tile.correlation_tile import CorrelationTile
from sorbetto.ranking.ranking_score import RankingScore
from sorbetto.core.importance import Importance

correlation_fct = scipy.stats.kendalltau

# Draw 10,000 performances at random
number_of_performances_drawn_at_random = 10000
performances_drawn_at_random = UniformDistributionOfTwoClassClassificationPerformances("Uniform distribution of performances").drawAtRandom(number_of_performances_drawn_at_random)

In [6]:
print(performances_drawn_at_random)

FiniteSetOfTwoClassClassificationPerformances(name=random performances and performances=
TwoClassClassificationPerformance(name=unnamed two-class classification performance, ptn=0.10862352142464343, pfp=0.17083247733935752, pfn=0.6598163852723112, ptp=0.060727615963687906)
TwoClassClassificationPerformance(name=unnamed two-class classification performance, ptn=0.04257583511984791, pfp=0.7179812253967371, pfn=0.06859757784605608, ptp=0.17084536163735894)
TwoClassClassificationPerformance(name=unnamed two-class classification performance, ptn=0.12495580588810579, pfp=0.09710812475446133, pfn=0.389626340191721, ptp=0.3883097291657119)
TwoClassClassificationPerformance(name=unnamed two-class classification performance, ptn=0.07632557086765247, pfp=0.347693272877498, pfn=0.4216606376742155, ptp=0.1543205185806341)
TwoClassClassificationPerformance(name=unnamed two-class classification performance, ptn=0.00980845635324551, pfp=0.6313291919998051, pfn=0.28827354369092983, ptp=0.07058880795601

## Negative Predictive Value $NPV$

In [None]:
npv_score = RankingScore.getNegativePredictiveValueScore()
npv_correlation_tile = CorrelationTile(name = "NPV Correlation Tile", 
                                       parameterization = ParameterizationDefault(), 
                                       symbolic_flavor = CorrelationFlavor(correlation_coefficient = correlation_fct, score = npv_score), 
                                       performances_list = performances_drawn_at_random, 
                                       resolution = 1001)
npv_correlation_tile.draw()

## $X^{C}_{{tn,tp}|{tn,fn,tp}}$

In [None]:
importance = Importance(itn=1, ifp=0, ifn=1, itp=1)
score2 = RankingScore(importance)
score2_correlation_tile = CorrelationTile(name = "$X^{C}_{{tn,tp}|{tn,fn,tp}}$ Correlation Tile", 
                                       parameterization = ParameterizationDefault(), 
                                       symbolic_flavor = CorrelationFlavor(correlation_coefficient = correlation_fct, score = score2), 
                                       performances_list = performances_drawn_at_random, 
                                       resolution = 1001)
score2_correlation_tile.draw()

## True Positive Rate $TPR$

In [None]:
tpr_correlation_tile.draw()

## Inverse Jaccard $J_{-}$

In [None]:
inverse_jaccard_correlation_tile.draw()

## Accuracy $A$

In [None]:
accuracy_correlation_tile.draw()

## Jaccard $J_{+}$

In [None]:
jaccard_correlation_tile.draw()

## True Negative Rate $TNR$

In [None]:
TNR_correlation_tile.draw()

## $X^{C}_{{tn,tp}|{tn,fp,tp}}$

In [None]:
score8_correlation_tile.draw()

## Positive Predictive Value $PPV$

In [None]:
PPV_correlation_tile.draw()

## The 9 Correlation Tiles showing the rank correlations between the 9 probabilistic scores above, and all ranking scores, for a uniform distribution of performances

In [None]:
import matplotlib.pyplot as plt

# Initialise the subplot figure and axis
figure, axis = plt.subplots(3, 3)

npv_correlation_tile.draw(fig = figure, ax = axis[0,0])
score2_correlation_tile.draw(fig = figure, ax = axis[0,1])
tpr_correlation_tile.draw(fig = figure, ax = axis[0,2])

inverse_jaccard_correlation_tile.draw(fig = figure, ax = axis[1,0])
accuracy_correlation_tile.draw(fig = figure, ax = axis[1,1])
jaccard_correlation_tile.draw(fig = figure, ax = axis[1,2])

TNR_correlation_tile.draw(fig = figure, ax = axis[2,0])
score8_correlation_tile.draw(fig = figure, ax = axis[2,1])
PPV_correlation_tile.draw(fig = figure, ax = axis[2,2])