In [1]:
import numpy as np
import os, sys
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [4]:
class ProbabilisticData:
    def __init__(self, values, probabilities):
        """
        Initialize probabilistic data with values and their associated probabilities.
        
        Parameters:
        - values: List of values (e.g., numbers, categories).
        - probabilities: List of probabilities corresponding to each value.
        """
        if len(values) != len(probabilities):
            raise ValueError("Values and probabilities must have the same length.")
        
        if not np.isclose(sum(probabilities), 1):
            raise ValueError("Probabilities must sum to 1.")
        
        self.values = np.array(values)
        self.probabilities = np.array(probabilities)

    def expected_value(self):
        """
        Calculate the expected value of the probabilistic data.
        
        Returns:
        - The expected value.
        """
        return np.sum(self.values * self.probabilities)

    def sample(self, size=1):
        """
        Sample from the probabilistic distribution.
        
        Parameters:
        - size: Number of samples to draw.
        
        Returns:
        - A list of sampled values.
        """
        return np.random.choice(self.values, size=size, p=self.probabilities)


In [5]:
# Example usage for classification
if __name__ == "__main__":
    # Define some probabilistic data for a binary classification scenario
    # Let's say we have two classes: 0 and 1
    true_labels = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    
    # Simulate probabilistic predictions
    predicted_probabilities = [
        ProbabilisticData([0, 1], [0.7, 0.3]),  # Predicted probabilities for the first sample
        ProbabilisticData([0, 1], [0.2, 0.8]),  
        ProbabilisticData([0, 1], [0.9, 0.1]),  
        ProbabilisticData([0, 1], [0.1, 0.9]),  
        ProbabilisticData([0, 1], [0.6, 0.4]),  
        ProbabilisticData([0, 1], [0.4, 0.6]),  
        ProbabilisticData([0, 1], [0.8, 0.2]),  
        ProbabilisticData([0, 1], [0.3, 0.7]),  
        ProbabilisticData([0, 1], [0.5, 0.5]),  
        ProbabilisticData([0, 1], [0.6, 0.4]),  
    ]
    
    # Convert probabilistic predictions to class labels
    predicted_labels = [1 if data.probabilities[1] > data.probabilities[0] else 0 for data in predicted_probabilities]
    
    # Calculate metrics
    accuracy = accuracy_score(true_labels, predicted_labels)
    precision = precision_score(true_labels, predicted_labels)
    recall = recall_score(true_labels, predicted_labels)
    f1 = f1_score(true_labels, predicted_labels)
    
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")


NameError: name 'accuracy_score' is not defined