In [None]:
import numpy as np
import pandas as pd
import nonconformist as nc

import pyodbc

db = "UCM"
table = "[dbo].[student_course_matrix]"

query = "SELECT [Student ID], [COR1003], [COR1004], [COR1002] FROM " + db + "." + table

conn = pyodbc.connect(
    'Driver={SQL Server};'
    'Server=DESKTOP-8LSE8HT;'
    'Database=UCM;'
    'Trusted_Connection=yes;'
)
df = pd.read_sql_query(query, conn)
df = pd.DataFrame(df, columns = df.columns)
df = df.drop('Student ID', axis = 1)


In [None]:
import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import load_diabetes

from nonconformist.base import RegressorAdapter
from nonconformist.icp import IcpRegressor
from nonconformist.nc import RegressorNc
from nonconformist.acp import AggregatedCp
from nonconformist.acp import RandomSubSampler, BootstrapSampler, CrossSampler
from nonconformist.evaluation import reg_mean_errors



# -----------------------------------------------------------------------------
# Experiment setup
# -----------------------------------------------------------------------------

course = 'COR1002'
# The target variable is 'quality'.
Y = df[course]
X =  df.drop(course, axis = 1)
# Split the data into train and test data:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)
# Build the model with the random forest regression algorithm:
truth = Y_test

idx = np.random.permutation(len(df))
train = idx[:int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

truth = df[course]
columns = df.columns #['min', 'max', 'truth']
significance = 0.95

train_x = X.loc[train,:].to_numpy()
train_y = Y.loc[train].to_numpy()

test_x = X.loc[test,:].to_numpy()
test_y = Y.loc[test].to_numpy()
truth = test_y

x,y = train_x, train_y

# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {  
    'ACP-RandomSubSampler'  : 
        AggregatedCp(
            IcpRegressor(
                RegressorNc(
                    RegressorAdapter(DecisionTreeRegressor()))),
                        RandomSubSampler()),
    'ACP-CrossSampler'      : 
        AggregatedCp(
            IcpRegressor(
                RegressorNc(
                    RegressorAdapter(DecisionTreeRegressor()))),
                        CrossSampler()),
    'ACP-BootstrapSampler'  : 
        AggregatedCp(
            IcpRegressor(
                RegressorNc(
                    RegressorAdapter(DecisionTreeRegressor()))),
                        BootstrapSampler())
}

# -----------------------------------------------------------------------------
# Train, predict and evaluate
# -----------------------------------------------------------------------------
for name, model in models.items():
    model.fit(train_x, train_y)
    prediction = model.predict(test_x)
    prediction_sign = model.predict(test_x,
                                    significance=significance)
    table = np.vstack((prediction_sign.T, truth)).T
    df = pd.DataFrame(table, columns=columns)
    print('\n{}'.format(name))
    print('Error rate: {}'.format(reg_mean_errors(prediction,
                                                  truth,
                                                  significance)))
    print(df)

In [None]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc



course = 'COR1002'
# The target variable is 'quality'.
Y = df[course]
X =  df.drop(course, axis = 1)
# Split the data into train and test data:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)
# Build the model with the random forest regression algorithm:
truth = Y_test

idx = np.random.permutation(len(df))
train = idx[:int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

truth = df[course]
columns = df.columns #['min', 'max', 'truth']
significance = 0.95

train_x = X.loc[train,:].to_numpy()
train_y = Y.loc[train].to_numpy()

test_x = X.loc[test,:].to_numpy()
test_y = Y.loc[test].to_numpy()
truth = test_y

x,y = train_x, train_y
# Test data
#data = load_iris()
#x, y = data.data, data.target

for i, y_ in enumerate(np.unique(y)):
    y[y == y_] = i

n_instances = y.size
idx = np.random.permutation(n_instances)

train_idx = idx[:int(n_instances / 3)]
cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
test_idx = idx[2 * int(n_instances / 3):]

nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
icp = IcpClassifier(nc)

icp.fit(x[train_idx, :], y[train_idx])
icp.calibrate(x[cal_idx, :], y[cal_idx])


print(pd.DataFrame(icp.predict_conf(x[test_idx, :]), columns=['Label', 'Confidence', 'Credibility']))

In [None]:

# Authors: Henrik Linusson

import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc, MarginErrFunc

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_iris()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()),
                                 MarginErrFunc()))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.9)
header = np.array(['c0','c1','c2','Truth'])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)

In [None]:

import numpy as np
import pandas as pd

from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.datasets import load_boston

from nonconformist.base import RegressorAdapter
from nonconformist.icp import IcpRegressor
from nonconformist.nc import RegressorNc, AbsErrorErrFunc, RegressorNormalizer

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
course = 'COR1002'
# The target variable is 'quality'.
Y = df[course]
X =  df.drop(course, axis = 1)
# Split the data into train and test data:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)
# Build the model with the random forest regression algorithm:
truth = Y_test

idx = np.random.permutation(len(df))
train = idx[:int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

truth = df[course]
columns = df.columns #['min', 'max', 'truth']
significance = 0.95

train_x = X.loc[train,:].to_numpy()
train_y = Y.loc[train].to_numpy()

test_x = X.loc[test,:].to_numpy()
test_y = Y.loc[test].to_numpy()
truth = test_y


idx = np.random.permutation(Y.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Without normalization
# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
underlying_model = RegressorAdapter(DecisionTreeRegressor(min_samples_leaf=5))
nc = RegressorNc(underlying_model, AbsErrorErrFunc())
icp = IcpRegressor(nc)
icp.fit(X[train, :], Y[train])
icp.calibrate(X[calibrate, :], Y[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(X[test, :], significance=0.1)
header = ['min','max','truth','size']
size = prediction[:, 1] - prediction[:, 0]
table = np.vstack([prediction.T, Y[test], size.T]).T
df = pd.DataFrame(table, columns=header)
print(df)

# -----------------------------------------------------------------------------
# With normalization
# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
underlying_model = RegressorAdapter(DecisionTreeRegressor(min_samples_leaf=5))
normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=1))
normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc())
nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
icp = IcpRegressor(nc)
icp.fit(X[train, :], Y[train])
icp.calibrate(X[calibrate, :], Y[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(Y[test, :], significance=0.9)
header = ['min','max','truth','size']
size = prediction[:, 1] - prediction[:, 0]
table = np.vstack([prediction.T, Y[test], size.T]).T
df = pd.DataFrame(table, columns=header)
print("test")