In [3]:
import pandas as pd

In [9]:
scores = pd.DataFrame({
    'Name': ['Query1', 'Query2', 'Query3', 'Query4', 'Query5', 'Query6', 'Query7'],
    'Score1': [4, 2, 1, 5, 5, 1, 3],
    'Score2': [5, 4, 3, 5, 5, 1, 2],
    'Score3': [5, 3, 2, 5, 5, 1, 2],
    'Score4': [4, 3, 1, 5, 5, 1, 2],
    'Score5': [4, 3, 1, 5, 5, 1, 3],
    'Score6': [5, 3, 1, 5, 5, 2, 4],
    'Score7': [4, 3, 3, 5, 5, 3, 2],
    'Score8': [4, 2, 1, 5, 5, 2, 3],
})
scores

Unnamed: 0,Name,Score1,Score2,Score3,Score4,Score5,Score6,Score7,Score8
0,Query1,4,5,5,4,4,5,4,4
1,Query2,2,4,3,3,3,3,3,2
2,Query3,1,3,2,1,1,1,3,1
3,Query4,5,5,5,5,5,5,5,5
4,Query5,5,5,5,5,5,5,5,5
5,Query6,1,1,1,1,1,2,3,2
6,Query7,3,2,2,2,3,4,2,3


In [12]:
# Calculate the mean score for each row, excluding the 'Name' column
scores['Average'] = scores.loc[:, 'Score1':'Score8'].mean(axis=1)
scores['Average']

0    4.375
1    2.875
2    1.625
3    5.000
4    5.000
5    1.500
6    2.625
Name: Average, dtype: float64

In [13]:
from sklearn.metrics import pairwise_distances

# Extract the scores for pairwise computation, excluding the 'Name' column
score_values = scores.loc[:, 'Score1':'Score8']

# Compute pairwise Euclidean distances
pairwise_results = pairwise_distances(score_values, metric='euclidean')

# Create a DataFrame to represent the pairwise distances
pairwise_df = pd.DataFrame(pairwise_results, index=scores['Name'], columns=scores['Name'])

pairwise_df

Name,Query1,Query2,Query3,Query4,Query5,Query6,Query7
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Query1,0.0,4.472136,8.124038,2.236068,2.236068,8.544004,5.477226
Query2,4.472136,0.0,4.0,6.244998,6.244998,4.795832,3.162278
Query3,8.124038,4.0,0.0,9.848858,9.848858,2.645751,4.898979
Query4,2.236068,6.244998,9.848858,0.0,0.0,10.099505,7.0
Query5,2.236068,6.244998,9.848858,0.0,0.0,10.099505,7.0
Query6,8.544004,4.795832,2.645751,10.099505,10.099505,0.0,4.123106
Query7,5.477226,3.162278,4.898979,7.0,7.0,4.123106,0.0


In [15]:
from itertools import combinations

# Generate all possible pairs of queries
query_pairs = list(combinations(scores['Name'], 2))

pairwise_rankings = []

# Iterate through the query pairs and compute pairwise rank based on the sum of scores
for (q1, q2) in query_pairs:
    score_q1 = score_values.loc[scores['Name'] == q1].sum(axis=1).values[0]
    score_q2 = score_values.loc[scores['Name'] == q2].sum(axis=1).values[0]
    
    # Label: 1 if q1 is ranked higher than q2, -1 otherwise
    if score_q1 > score_q2:
        pairwise_rankings.append((q1, q2, 1))
    else:
        pairwise_rankings.append((q1, q2, -1))

# Display the pairwise rankings
pairwise_rankings[:10]  # Displaying first 10 results for brevity

[('Query1', 'Query2', 1),
 ('Query1', 'Query3', 1),
 ('Query1', 'Query4', -1),
 ('Query1', 'Query5', -1),
 ('Query1', 'Query6', 1),
 ('Query1', 'Query7', 1),
 ('Query2', 'Query3', 1),
 ('Query2', 'Query4', -1),
 ('Query2', 'Query5', -1),
 ('Query2', 'Query6', 1)]

In [16]:
from collections import defaultdict

# Initialize a dictionary to count wins for each query
wins_count = defaultdict(int)

# Count the number of wins for each query
for (q1, q2, result) in pairwise_rankings:
    if result == 1:
        wins_count[q1] += 1
    else:
        wins_count[q2] += 1

# Sort queries by number of wins in descending order
final_ranking = sorted(wins_count.items(), key=lambda x: x[1], reverse=True)

final_ranking

[('Query5', 6),
 ('Query4', 5),
 ('Query1', 4),
 ('Query2', 3),
 ('Query7', 2),
 ('Query3', 1)]