In [1]:
import numpy as np
import pandas as pd


def generate_data(num_samples=100, output_file="arthritis_data.csv"):
    np.random.seed(42)  # for reproducibility

    # Generate random features
    weight = np.round(np.random.uniform(45, 120, num_samples), 1)  # kg
    height = np.round(np.random.uniform(150, 200, num_samples), 1)  # cm
    age = np.random.randint(20, 80, num_samples)  # years

    # True arthritis label (probability increases with age)
    arthritis_prob = (age - 20) / 100
    arthritis_true = np.random.binomial(1, arthritis_prob)

    # Predicted arthritis label (simulate classifier with ~80% accuracy)
    arthritis_pred = []
    for true_label in arthritis_true:
        if np.random.rand() < 0.8:  # 80% chance prediction = truth
            arthritis_pred.append(true_label)
        else:
            arthritis_pred.append(1 - true_label)

    # Create DataFrame
    df = pd.DataFrame(
        {
            "weight": weight,
            "height": height,
            "age": age,
            "arthritis_true": arthritis_true,
            "arthritis_pred": arthritis_pred,
        }
    )

    # Save to CSV
    df.to_csv(output_file, index=False)
    print(f"CSV file '{output_file}' generated with {num_samples} samples.")
    return df


# Example usage
df = generate_data(200)  # generates 200 rows


df

CSV file 'arthritis_data.csv' generated with 200 samples.


Unnamed: 0,weight,height,age,arthritis_true,arthritis_pred
0,73.1,182.1,60,1,1
1,116.3,154.2,49,1,1
2,99.9,158.1,36,0,0
3,89.9,194.9,68,0,0
4,56.7,180.3,39,1,1
...,...,...,...,...,...
195,71.2,196.5,48,0,0
196,99.4,192.9,32,0,0
197,112.3,171.4,65,1,0
198,111.5,187.5,54,0,1


In [3]:
from artifact_core.binary_classification import (
    BinaryClassificationEngine,
    BinaryClassificationScoreType,
    BinaryFeatureSpec,
)

class_spec = BinaryFeatureSpec(ls_categories=["0", "1"], positive_category="1")
true = df["arthritis_true"].to_dict()
predicted = df["arthritis_pred"].to_dict()
engine = BinaryClassificationEngine(resource_spec=class_spec)


In [4]:
engine.produce_classification_score(
    score_type=BinaryClassificationScoreType.ACCURACY, true=true, predicted=predicted
)

0.805

In [5]:
engine.produce_classification_score(
    score_type=BinaryClassificationScoreType.PRECISION, true=true, predicted=predicted
)

0.5588235294117647

In [6]:
engine.produce_classification_score(
    score_type=BinaryClassificationScoreType.RECALL, true=true, predicted=predicted
)

0.8085106382978723