In [4]:
import math

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [5]:
# Earth radius in kilometers
Earth_Radius_KM = 6371

# ref : https://en.wikipedia.org/wiki/Haversine_formula
def haversine_distance(origin: tuple[float, float], destination: tuple[float, float]) -> float:
    '''
        Calculates the haversine distance between two geographic coordinates in kilometers.

        The haversine formula is used to calculate the great-circle distance between two points on a sphere given their geographic coordinates (latitude and longitude). This function takes two tuples of latitude and longitude in degrees, converts them to radians, and returns the distance in kilometers using the Earth's radius.

        Args:
            origin (tuple): A tuple of (latitude, longitude) in degrees for the first location.
            destination (tuple): A tuple of (latitude, longitude) in degrees for the second location.

        Returns:
            float: The distance between the two locations in kilometers.
    '''
    # convert to radians
    origin_lat_rad, origin_lon_rad = map(math.radians, origin)
    dest_lat_rad, dest_lon_rad = map(math.radians, destination)
    
    # differences in coordinates
    diff_in_lat = origin_lat_rad - dest_lat_rad
    diff_in_lon = origin_lon_rad - dest_lon_rad
    
    # haversine formula
    haversine_value = math.sin(diff_in_lat / 2)**2 + math.cos(origin_lat_rad) * math.cos(dest_lat_rad) * math.sin(diff_in_lon / 2)**2
    great_circle_angle = 2 * math.atan2(math.sqrt(haversine_value), math.sqrt(1 - haversine_value))
    
    # return distance in kilometers
    return Earth_Radius_KM * great_circle_angle

[1.7946459460383746,
 2.345332820322107,
 2.4152431456443124,
 2.3889656330519986,
 3.8864963676299915,
 4.915107145183374,
 12.546019960900033,
 22.446085333650725,
 22.60725075555429,
 28.256855697968415]

In [None]:
# geneate random datasets with distance, ranking index [0, 1000], ranking score [0, 100], and recall score [0, 1]
origin_location = (13.730535078167222, 100.51722744907757)
random_locations = [(13.741460172760418, 100.52945703900683), (13.722747225901431, 100.49704947551552), (13.751713487254452, 100.51226159917519), (13.743477711408703, 100.4995739810327), (13.714646756745294, 100.54927435500979), (13.693737924271923, 100.49201727752865), (13.619711506655612, 100.53902398019436), (13.912907423320988, 100.6063458466953), (13.914512135214704, 100.60634242176562), (13.522935037016458, 100.66803031320607)]

# calculate the distance between the origin and each random location
distance = [haversine_distance(origin_location, location) for location in random_locations]

# ranking index proportional to distance (scaled to [0, 1000])
def calculate_ranking_index(distances):
    # scale distances to [0, 1000]
    return np.clip((distances / 100) * 1000, 0, 1000).astype(int)
# apply the function to each row's distance list
ranking_index = [calculate_ranking_index(d) for d in distance]

# ranking score proportional to ranking index (scaled to [0, 100])
def calculate_inverse_ranking_score(index):
    # inverse proportionality: higher index -> lower score
    epsilon = 1e-5
    score = 1000 / (index + epsilon) 
    # hormalize to 0 to 100 scale
    normalized_score = np.clip(score, 0, 100)
    return normalized_score
ranking_score = np.array([calculate_inverse_ranking_score(index) for index in ranking_index])

# recall score [0, 1]
recall_score = np.random.uniform(0, 1, size=len(random_locations))

df =  pd.DataFrame({
    "Distance": distance,
    "Ranking Index": ranking_index,
    "Ranking Score": ranking_score,
    "Recall Score": recall_score
})
