In [1]:
import numpy as np
import pandas as pd

In [35]:
def loss_function(
    x: np.ndarray,
    y: np.ndarray,
    a: float,
    b: float,
    dist: callable
) -> float:
    """Compute the loss function for the given x and y vectors.

    Args:
        x (np.ndarray): Ground true set.
        y (np.ndarray): Prediction set.
        a (float): False negative penalty.
        b (float): False positive penalty.
        dist (callable): The distance function between predictions and ground truth.

    Returns:
        float: The loss value.
    """
    # Convert x and y to numpy arrays for efficient computation
    x = np.array(x)
    y = np.array(y)
    
    # Compute the pairwise distances between each x and each y
    # Shape: len(x) x len(y)
    distances = np.array([[dist(xi, yj) for yj in y] for xi in x])

    # For each y_j, determine the closest x_i's index
    # Shape: len(y)
    min_dist_idx = np.argmin(distances, axis=0)

    # For each x_i, count the number of y_j's that are assigned to it (size of S_xi)
    # Shape: len(x)
    counts = np.bincount(min_dist_idx, minlength=len(x))

    # For each x_i, calculate the minimum distance to its assigned y_j's
    # Shape: len(x)
    min_dist = np.array([distances[i, min_dist_idx == i].min() if counts[i] > 0 else 0 for i in range(len(x))])

    # Calculate the loss for x_i with non-zero S_xi
    counts_mask = counts > 0
    L = np.sum(min_dist[counts_mask] + b * (counts[counts_mask] - 1))

    # Add the penalty for x_i with zero S_xi
    L += np.sum(counts == 0) * a
    
    return L

# Example distance functions
def euclidean_distance(xi, yj):
    return np.linalg.norm(xi - yj)

def manhattan_distance(xi, yj):
    return np.sum(np.abs(xi - yj))

# Example usage:
x = [[1, 2], [3, 4], [7, 8]]  # Example x vector in 2D space
y = [[3, 3], [3, 5], [5, 6], [8, 9]]  # Example y vector in 2D space
a = 3  # Example value for parameter a
b = 3  # Example value for parameter b

# Use the Euclidean distance
L = loss_function(x, y, a, b, manhattan_distance)
L

12

In [69]:
def timestamp_distance(xi, yj):
    """Calculate the distance between two timestamps, handling both 1-dimensional and multi-dimensional cases."""
    # Check if the input is multi-dimensional (assuming list or numpy array with more than 1 element)
    if isinstance(xi, (list, np.ndarray)) and isinstance(yj, (list, np.ndarray)):
        if len(xi) > 1:  # Multi-dimensional case
            # Calculate the absolute difference for each dimension and sum the distances
            dimension_distances = [abs((xi[k] - yj[k]).total_seconds()) for k in range(len(xi))]
            return sum(dimension_distances)
        else:  # 1-dimensional case where the timestamp is wrapped in a list/array
            return abs((xi[0] - yj[0]).total_seconds())
    else:  # 1-dimensional case with direct timestamp input
        return abs((xi - yj).total_seconds())

In [70]:
# Example usage with timestamps
x = np.array([
    pd.Timestamp('2023-01-01 12:00:00'),
    pd.Timestamp('2023-01-01 13:00:00'),
    pd.Timestamp('2023-01-01 14:00:00')
])  # Example x vector of timestamps

y = np.array([
    pd.Timestamp('2023-01-01 12:00:05'),
    pd.Timestamp('2023-01-01 13:00:03'),
    pd.Timestamp('2023-01-01 13:00:01'),
    pd.Timestamp('2023-01-01 14:00:07')
])  # Example y vector of timestamps

L = loss_function(x, y, a, b, timestamp_distance)
L

16.0

In [72]:
# Example usage with timestamps
x = np.array([
    [pd.Timestamp('2023-01-01 12:00:00'), pd.Timestamp('2023-01-01 12:00:00')],
    [pd.Timestamp('2023-01-01 13:00:00'), pd.Timestamp('2023-01-01 13:00:00')],
    [pd.Timestamp('2023-01-01 14:00:00'), pd.Timestamp('2023-01-01 14:00:00')]
])  # Example x vector of timestamps

y = np.array([
    [pd.Timestamp('2023-01-01 12:00:05'), pd.Timestamp('2023-01-01 12:00:05')],
    [pd.Timestamp('2023-01-01 13:00:03'), pd.Timestamp('2023-01-01 13:00:03')],
    [pd.Timestamp('2023-01-01 13:00:01'), pd.Timestamp('2023-01-01 13:00:01')],
    [pd.Timestamp('2023-01-01 14:00:07'), pd.Timestamp('2023-01-01 14:00:07')]
])  # Example y vector of timestamps

L = loss_function(x, y, a, b, timestamp_distance)
L

29.0