In [1]:
import numpy as np
import pandas as pd
import random as rnd
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib
from utils import PCP
from sklearn.model_selection import train_test_split
from datasets import datasets
from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge
import time
import pickle

np.random.seed(123456)

In [2]:
dataset_base_path = "datasets/"
X_1, Y_1 = datasets.GetDataset("meps_19", dataset_base_path)
X_2, Y_2 = datasets.GetDataset("meps_20", dataset_base_path)
X = np.concatenate([X_1,X_2],axis=0)
Y = np.concatenate([Y_1,Y_2],axis=0)

In [None]:
time_pcp = np.zeros((30, 10))

for t1 in range(30):
    
    X_, X_test, Y_, Y_test = train_test_split(X, Y, test_size=1000)
    X_train, X_val, Y_train, Y_val = train_test_split(X_, Y_, test_size=10000)

    CF = Ridge(alpha=0.001).fit(X_train,Y_train)
    R_val = abs(Y_val - CF.predict(X_val))
    R_test = abs(Y_test - CF.predict(X_test))

    kf = KFold(n_splits=20, shuffle=True)

    R_train = np.zeros_like(Y_train)

    for train_index, test_index in kf.split(X_train):

        model = Ridge(alpha=0.001).fit(X_train[train_index],Y_train[train_index])
        R_train[test_index] = abs(Y_train[test_index] - model.predict(X_train[test_index]))

    alpha = 0.1
    PCP_model = PCP()
    PCP_model.train(X_train,R_train)
    t2 = 0
    for size in range(1000, 11000, 1000):
        start = time.time()
        q_pcp, coverage_pcp = PCP_model.calibrate(X_val[:size], R_val[:size], X_test, R_test,alpha)
        end = time.time()
        time_pcp[t1,t2] = end - start
        print(f"Time for size {size}: {time_pcp[t1,t2]} seconds")
        time.sleep(20)
        t2 = t2+1

In [4]:
time_pcp_ = np.median(time_pcp,0)[1:]

computation_times_minutes = [time / 60 for time in time_pcp_]

# Sample sizes for the x-axis
sample_size = np.array(range(2000, 11000, 1000))

# Plotting the data
plt.figure(layout='constrained', figsize=(15, 6))
plt.plot(sample_size, computation_times_minutes, marker='o', linestyle='-', color='tab:olive',linewidth=7,markersize=12)
plt.xlabel('Sample Size $n$',fontsize = 25)
plt.ylabel('Computational Time (minutes)',fontsize = 24)
plt.ylim([1.5,6.5])
plt.yticks(fontsize = 24)
#plt.yticks(np.array(range(1, 7, 1)),fontsize = 24)
plt.xticks(sample_size,fontsize = 24.5)
plt.grid(True)
plt.show()