In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import warnings
import numpy as np
final = pd.read_csv('final_data.csv', delimiter=';')

import warnings

warnings.filterwarnings('ignore')

Preparing data for building machine learning models

In [None]:
def triple_split(data, t_size, v_size):
    return np.split(
        data.sample(frac=1, random_state=123),
        [int(len(data)*t_size/100),
         int(len(data)*(t_size+v_size)/100)]
    )

train_size = 70
val_size = 20

train, valid, test = triple_split(final, train_size, val_size)

In [None]:
X_train=train.drop(['target1', 'valuation_currency_code', 'valuation_amount', 'raised_amount', 'raised_currency_code', 'investment_rounds'], axis=1)
y_train=train['target1']

In [None]:
X_val=valid.drop(['target1', 'valuation_currency_code', 'valuation_amount', 'raised_amount', 'raised_currency_code', 'investment_rounds'], axis=1)
y_val=valid['target1']

In [None]:
X_test=test.drop(['target1', 'valuation_currency_code', 'valuation_amount', 'raised_amount', 'raised_currency_code', 'investment_rounds'], axis=1)
y_test=test['target1']

In [None]:
cols = X_train.columns

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

X_val = scaler.transform(X_val)

X_train = pd.DataFrame(X_train, columns=[cols])

X_test = pd.DataFrame(X_test, columns=[cols])

X_val = pd.DataFrame(X_val, columns=[cols])

In [None]:
X_train.to_csv ('X_unbalanced.csv')
y_train.to_csv ('y_unbalanced.csv')

Balanced data

In [None]:
import imblearn
from imblearn.under_sampling import TomekLinks
from imblearn.combine import SMOTETomek

In [None]:
tl = TomekLinks(sampling_strategy='majority')
X_tl, y_tl = tl.fit_resample(X_train, y_train)

X_tl = scaler.fit_transform(X_tl)
X_tl= pd.DataFrame(X_tl, columns=[cols])

X_tl.to_csv ('X_balanced by TL.csv')
y_tl.to_csv ('y_balanced by TL.csv')

In [None]:
smt = SMOTETomek(sampling_strategy='minority')
X_stl, y_stl= smt.fit_resample (X_train, y_train)

X_stl = scaler.fit_transform(X_stl)
X_stl= pd.DataFrame(X_stl, columns=[cols])

X_stl.to_csv ('X_balanced by STL.csv')
y_stl.to_csv ('y_balanced by STL.csv')

Choosing the train dataset

In [None]:
X_train=pd.read_csv('X_unbalanced.csv', delimiter=';')
y_train=pd.read_csv('y_unbalanced.csv', delimiter=';')

#X_train=pd.read_csv('X_balanced_by_TL.csv', delimiter=';')
#y_train=pd.read_csv('y_balanced_by_TL.csv', delimiter=';')

#X_train=pd.read_csv('X_balanced_by_STL.csv', delimiter=';')
#y_train=pd.read_csv('y_balanced_by_STL.csv', delimiter=';')


Linear regression

In [None]:
cols_fin = final.columns

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

final_std = scaler.fit_transform(final)

final_std = pd.DataFrame(final_std, columns=[cols_fin])

In [None]:
import statsmodels.api as sm

model_1 = sm.OLS(
    final_std["target1"],
    final_std[["category_code", "invested_companies", "funding_rounds", "funding_total_usd", "milestones", "relationships", "angel", "crowdfunding", "other", "post_ipo", "series_a", "series_b", "series_c", "venture", "number_of_merges", "fin_org_financed", "person_financed", "offices", "country_code", "new_york", "california"]],
    family=families.Binomial(),
).fit()
print(model_1.summary())

Logistic regression

In [None]:
from sklearn.linear_model import LogisticRegression


logreg = LogisticRegression(solver='liblinear', random_state=0)


logreg.fit(X_train, y_train)

In [None]:
y_pred_val = logreg.predict(X_val)

y_pred_val 

In [None]:
from sklearn.inspection import permutation_importance
imps = permutation_importance(logreg, X_train, y_train)
avg_importance = np.abs(imps.importances_mean)
avg_importance = pd.DataFrame.from_dict(avg_importance)
avg_importance.index=["category_code", "invested_companies", "funding_rounds", "funding_total_usd", "milestones", "relationships", "angel", "crowdfunding", "other", "post_ipo", "series_a", "series_b", "series_c", "venture", "number_of_merges", "fin_org_financed", "person_financed", "offices", "country_code", "new_york", "california"]
avg_importance.columns = ["Importance"]
avg_importance = avg_importance.sort_values("Importance", ascending=True)
avg_importance.plot(kind='barh', figsize=(10, 6))

In [None]:
from sklearn.metrics import accuracy_score

print('Model accuracy score: {0:0.4f}'. format(accuracy_score(y_val, y_pred_val)))

In [None]:
y_pred_train = logreg.predict(X_train)

print('Training-set accuracy score: {0:0.4f}'. format(accuracy_score(y_train, y_pred_train)))

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_val, y_pred_val)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[1,1])

print('\nTrue Negatives(TN) = ', cm[0,0])

print('\nFalse Positives(FP) = ', cm[1,0])

print('\nFalse Negatives(FN) = ', cm[0,1])

In [None]:
cm_matrix = pd.DataFrame(data=cm, columns=['Actual Negative:0', 'Actual Positive:1'], 
                                 index=['Predict Negative:0', 'Predict Positive:1'])

sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='YlGnBu')

In [None]:
TP = cm[1,1]
TN = cm[0,0]
FP = cm[1,0]
FN = cm[0,1]

In [None]:
classification_error = (FP + FN) / float(TP + TN + FP + FN)

print('Classification error : {0:0.4f}'.format(classification_error))

In [None]:
precision = TP / float(TP + FP)


print('Precision : {0:0.4f}'.format(precision))

In [None]:
recall = TP / float(TP + FN)

print('Recall or Sensitivity : {0:0.4f}'.format(recall))

In [None]:
false_positive_rate = FP / float(FP + TN)

print('False Positive Rate : {0:0.4f}'.format(false_positive_rate))

In [None]:
specificity = TN / (TN + FP)

print('Specificity : {0:0.4f}'.format(specificity))

In [None]:
from sklearn.metrics import f1_score

f1score = f1_score (y_val, y_pred_val)

print('f1-score : {0:0.4f}'.format(f1score))

In [None]:
from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_val, y_pred_val, pos_label = 1)

plt.figure(figsize=(6,4))

plt.plot(fpr, tpr, linewidth=2)

plt.plot([0,1], [0,1], 'k--' )

plt.rcParams['font.size'] = 12

plt.title('ROC curve for Lofistic regression for Predicting Success')

plt.xlabel('False Positive Rate (1 - Specificity)')

plt.ylabel('True Positive Rate (Sensitivity)')

plt.show()

In [None]:
from sklearn.metrics import roc_auc_score

ROC_AUC = roc_auc_score(y_val, y_pred_val)

print('ROC AUC : {:.4f}'.format(ROC_AUC))

Cross-validation on Logistic Regression

In [None]:
import statistics as stat
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.metrics import recall_score
from sklearn.metrics import make_scorer

cv_results = cross_validate(logreg, X_val, y_val, cv=9)

score_acc = stat.mean(cross_val_score(logreg, X_val, y_val, cv = 9, scoring = 'accuracy'))  
score_rec = stat.mean(cross_val_score(logreg, X_val, y_val, cv = 9, scoring = 'recall'))                
score_prec = stat.mean(cross_val_score(logreg, X_val, y_val, cv = 9, scoring = 'precision'))                
score_f1 = stat.mean(cross_val_score(logreg, X_val, y_val, cv = 9, scoring = 'f1'))                
score_RA = stat.mean(cross_val_score(logreg, X_val, y_val, cv = 9, scoring = 'roc_auc'))                

specificity = make_scorer(recall_score, pos_label=0)
score_spec = stat.mean (cross_val_score(logreg, X_val, y_val, cv=9, scoring = specificity))
score_FPR = 1 - score_spec
score_error = 1 - score_acc

Naive Bayes Classificator

In [None]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()

gnb.fit(X_train, y_train)

In [None]:
y_pred_val = gnb.predict(X_val)

y_pred_val

In [None]:
from sklearn.inspection import permutation_importance
imps = permutation_importance(gnb, X_train, y_train)
avg_importance = np.abs(imps.importances_mean)
avg_importance = pd.DataFrame.from_dict(avg_importance)
avg_importance.index=["category_code", "invested_companies", "funding_rounds", "funding_total_usd", "milestones", "relationships", "angel", "crowdfunding", "other", "post_ipo", "series_a", "series_b", "series_c", "venture", "number_of_merges", "fin_org_financed", "person_financed", "offices", "country_code", "new_york", "california"]
avg_importance.columns = ["Importance"]
avg_importance = avg_importance.sort_values("Importance", ascending=True)
avg_importance.plot(kind='barh', figsize=(10, 6))

In [None]:
from sklearn.metrics import accuracy_score

print('Model accuracy score: {0:0.4f}'. format(accuracy_score(y_val, y_pred_val)))

In [None]:
y_pred_train = gnb.predict(X_train)

y_pred_train

In [None]:
print('Training-set accuracy score: {0:0.4f}'. format(accuracy_score(y_train, y_pred_train)))

In [None]:
cm = confusion_matrix(y_val, y_pred_val)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[1,1])

print('\nTrue Negatives(TN) = ', cm[0,0])

print('\nFalse Positives(FP) = ', cm[1,0])

print('\nFalse Negatives(FN) = ', cm[0,1])

In [None]:
cm_matrix = pd.DataFrame(data=cm, columns=['Actual Negative:0', 'Actual Positive:1'], 
                                 index=['Predict Negative:0', 'Predict Positive:1'])

sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='YlGnBu')

In [None]:
TP = cm[1,1]
TN = cm[0,0]
FP = cm[1,0]
FN = cm[0,1]

In [None]:
classification_error = (FP + FN) / float(TP + TN + FP + FN)

print('Classification error : {0:0.4f}'.format(classification_error))

In [None]:
precision = TP / float(TP + FP)

print('Precision : {0:0.4f}'.format(precision))

In [None]:
recall = TP / float(TP + FN)

print('Recall or Sensitivity : {0:0.4f}'.format(recall))

In [None]:
false_positive_rate = FP / float(FP + TN)

print('False Positive Rate : {0:0.4f}'.format(false_positive_rate))

In [None]:
specificity = TN / (TN + FP)

print('Specificity : {0:0.4f}'.format(specificity))

In [None]:
f1score = f1_score (y_val, y_pred_val)

print('f1-score : {0:0.4f}'.format(f1score))

In [None]:
from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_val, y_pred_val, pos_label = 1)

plt.figure(figsize=(6,4))

plt.plot(fpr, tpr, linewidth=2)

plt.plot([0,1], [0,1], 'k--' )

plt.rcParams['font.size'] = 12

plt.title('ROC curve for Gaussian Naive Bayes Classifier for Predicting Success')

plt.xlabel('False Positive Rate (1 - Specificity)')

plt.ylabel('True Positive Rate (Sensitivity)')

plt.show()

In [None]:
from sklearn.metrics import roc_auc_score

ROC_AUC = roc_auc_score(y_val, y_pred_val)

print('ROC AUC : {:.4f}'.format(ROC_AUC))

Cross-validation on Naive Bayes Classifier

In [None]:
cv_results = cross_validate(gnb, X_val, y_val, cv=9)

score_acc = stat.mean(cross_val_score(gnb, X_val, y_val, cv = 9, scoring = 'accuracy'))  
score_rec = stat.mean(cross_val_score(gnb, X_val, y_val, cv = 9, scoring = 'recall'))                
score_prec = stat.mean(cross_val_score(gnb, X_val, y_val, cv = 9, scoring = 'precision'))                
score_f1 = stat.mean(cross_val_score(gnb, X_val, y_val, cv = 9, scoring = 'f1'))                
score_RA = stat.mean(cross_val_score(gnb, X_val, y_val, cv = 9, scoring = 'roc_auc'))                

specificity = make_scorer(recall_score, pos_label=0)
score_spec = stat.mean (cross_val_score(gnb, X_val, y_val, cv=9, scoring = specificity))
score_FPR = 1 - score_spec
score_error = 1 - score_acc

Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

clf_en = DecisionTreeClassifier(criterion='entropy', max_depth=16, random_state=42)

clf_en.fit(X_train, y_train)

In [None]:
from sklearn.inspection import permutation_importance
imps = permutation_importance(clf_en, X_train, y_train)
avg_importance = np.abs(imps.importances_mean)
avg_importance = pd.DataFrame.from_dict(avg_importance)
avg_importance.index=["category_code", "invested_companies", "funding_rounds", "funding_total_usd", "milestones", "relationships", "angel", "crowdfunding", "other", "post_ipo", "series_a", "series_b", "series_c", "venture", "number_of_merges", "fin_org_financed", "person_financed", "offices", "country_code", "new_york", "california"]
avg_importance.columns = ["Importance"]
avg_importance = avg_importance.sort_values("Importance", ascending=True)
avg_importance.plot(kind='barh', figsize=(10, 6))

In [None]:
y_pred_val = clf_en.predict(X_val) \

print('Model accuracy score with criterion entropy: {0:0.4f}'. format(accuracy_score(y_val, y_pred_val)))

In [None]:
y_pred_train = clf_en.predict(X_train)

print('Training-set accuracy score: {0:0.4f}'. format(accuracy_score(y_train, y_pred_train)))

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_val, y_pred_val)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[1,1])

print('\nTrue Negatives(TN) = ', cm[0,0])

print('\nFalse Positives(FP) = ', cm[1,0])

print('\nFalse Negatives(FN) = ', cm[0,1])

In [None]:
cm_matrix = pd.DataFrame(data=cm, columns=['Actual Negative:0', 'Actual Positive:1'], 
                                 index=['Predict Negative:0', 'Predict Positive:1'])

sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='YlGnBu')

In [None]:
TP = cm[1,1]
TN = cm[0,0]
FP = cm[1,0]
FN = cm[0,1]

In [None]:
classification_error = (FP + FN) / float(TP + TN + FP + FN)

print('Classification error : {0:0.4f}'.format(classification_error))

In [None]:
precision = TP / float(TP + FP)

print('Precision : {0:0.4f}'.format(precision))

In [None]:
recall = TP / float(TP + FN)

print('Recall or Sensitivity : {0:0.4f}'.format(recall))

In [None]:
false_positive_rate = FP / float(FP + TN)

print('False Positive Rate : {0:0.4f}'.format(false_positive_rate))

In [None]:
specificity = TN / (TN + FP)

print('Specificity : {0:0.4f}'.format(specificity))

In [None]:
f1score = f1_score (y_val, y_pred_val)

print('f1-score : {0:0.4f}'.format(f1score))

In [None]:
from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_val, y_pred_val, pos_label = 1)

plt.figure(figsize=(6,4))

plt.plot(fpr, tpr, linewidth=2)

plt.plot([0,1], [0,1], 'k--' )

plt.rcParams['font.size'] = 12

plt.title('ROC curve for Decision Tree for Predicting Success')

plt.xlabel('False Positive Rate (1 - Specificity)')

plt.ylabel('True Positive Rate (Sensitivity)')

plt.show()

In [None]:
from sklearn.metrics import roc_auc_score

ROC_AUC = roc_auc_score(y_val, y_pred_val)

print('ROC AUC : {:.4f}'.format(ROC_AUC))

Cross-validation on Decision Tree

In [None]:
cv_results = cross_validate(clf_en, X_val, y_val, cv=9)

score_acc = stat.mean(cross_val_score(clf_en, X_val, y_val, cv = 9, scoring = 'accuracy'))  
score_rec = stat.mean(cross_val_score(clf_en, X_val, y_val, cv = 9, scoring = 'recall'))                
score_prec = stat.mean(cross_val_score(clf_en, X_val, y_val, cv = 9, scoring = 'precision'))                
score_f1 = stat.mean(cross_val_score(clf_en, X_val, y_val, cv = 9, scoring = 'f1'))                
score_RA = stat.mean(cross_val_score(clf_en, X_val, y_val, cv = 9, scoring = 'roc_auc'))                

specificity = make_scorer(recall_score, pos_label=0)
score_spec = stat.mean (cross_val_score(clf_en, X_val, y_val, cv=9, scoring = specificity))
score_FPR = 1 - score_spec
score_error = 1 - score_acc

CatBoost

In [None]:
from catboost import CatBoostClassifier

model = CatBoostClassifier(iterations=1000, learning_rate=0.1, depth=16, loss_function='Logloss', random_state=42)

model.fit(X_train, y_train)

In [None]:
from sklearn.inspection import permutation_importance
imps = permutation_importance(model, X_train, y_train)
avg_importance = np.abs(imps.importances_mean)
avg_importance = pd.DataFrame.from_dict(avg_importance)
avg_importance.index=["category_code", "invested_companies", "funding_rounds", "funding_total_usd", "milestones", "relationships", "angel", "crowdfunding", "other", "post_ipo", "series_a", "series_b", "series_c", "venture", "number_of_merges", "fin_org_financed", "person_financed", "offices", "country_code", "new_york", "california"]
avg_importance.columns = ["Importance"]
avg_importance = avg_importance.sort_values("Importance", ascending=True)
avg_importance.plot(kind='barh', figsize=(10, 6))


In [None]:
y_pred_val = model.predict(X_val)

val_accuracy = accuracy_score(y_val, y_pred_val)
print(f"Val Accuracy: {val_accuracy:.4f}")

In [None]:
y_pred_train = model.predict(X_train)

train_accuracy = accuracy_score(y_train, y_pred_train)
print(f"Train Accuracy: {train_accuracy:.4f}")

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_val, y_pred_val)

print('Confusion matrix\n\n', cm)

print('\nTrue Positives(TP) = ', cm[1,1])

print('\nTrue Negatives(TN) = ', cm[0,0])

print('\nFalse Positives(FP) = ', cm[1,0])

print('\nFalse Negatives(FN) = ', cm[0,1])

In [None]:
cm_matrix = pd.DataFrame(data=cm, columns=['Actual Negative:0', 'Actual Positive:1'], 
                                 index=['Predict Negative:0', 'Predict Positive:1'])

sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='YlGnBu')

In [None]:
TP = cm[1,1]
TN = cm[0,0]
FP = cm[1,0]
FN = cm[0,1]

In [None]:
classification_error = (FP + FN) / float(TP + TN + FP + FN)

print('Classification error : {0:0.4f}'.format(classification_error))

In [None]:
precision = TP / float(TP + FP)

print('Precision : {0:0.4f}'.format(precision))

In [None]:
recall = TP / float(TP + FN)

print('Recall or Sensitivity : {0:0.4f}'.format(recall))

In [None]:
false_positive_rate = FP / float(FP + TN)

print('False Positive Rate : {0:0.4f}'.format(false_positive_rate))

In [None]:
specificity = TN / (TN + FP)

print('Specificity : {0:0.4f}'.format(specificity))

In [None]:
f1score = f1_score (y_val, y_pred_val)

print('f1-score : {0:0.4f}'.format(f1score))

In [None]:

from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_val, y_pred_val, pos_label = 1)

plt.figure(figsize=(6,4))

plt.plot(fpr, tpr, linewidth=2)

plt.plot([0,1], [0,1], 'k--' )

plt.rcParams['font.size'] = 12

plt.title('ROC curve for CatBoost for Predicting Success')

plt.xlabel('False Positive Rate (1 - Specificity)')

plt.ylabel('True Positive Rate (Sensitivity)')

plt.show()

In [None]:
from sklearn.metrics import roc_auc_score

ROC_AUC = roc_auc_score(y_val, y_pred_val)

print('ROC AUC : {:.4f}'.format(ROC_AUC))

Cross-validation on CatBoost

from catboost import Pool, cv

# Create a CatBoost Pool
catboost_pool = Pool(X_train, label=y_train)

# Define the parameters for the CatBoost model
params = {
	'iterations': 1000,
	'learning_rate': 0.01,
    'depth': 16,
	'loss_function': 'Logloss',
	'random_state': 42,
}

# Perform cross-validation using the cv function from CatBoost
cv_results, cv_model = cv(
	pool=catboost_pool,
	params=params,
	# Specify the number of folds for cross-validation
	fold_count=9, 
	# Print information during training
	verbose=False, 
	return_models=True
)

In [None]:
import statistics as stat
from sklearn.metrics import accuracy_score

def Accuracy_Score(cv_model,y_test):
  score ={}
  for i, model in enumerate(cv_model):
    # Make predictions on the test data
    y_pred = model.predict(X_val.values, prediction_type='Class')
    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_pred)
    score[i+1]=accuracy
    
  return score

data=Accuracy_Score(cv_model,y_test)
stat.mean(data.values())

In [None]:
def Error_Score(cv_model,y_test):
  error ={}
  for i, model in enumerate(cv_model):
    # Make predictions on the test data
    y_pred = model.predict(X_val.values, prediction_type='Class')
    # Calculate accuracy
    err = 1-accuracy_score(y_val, y_pred)
    error[i+1]=err
    
  return error
    
data=Error_Score(cv_model,y_test)
stat.mean(data.values())


In [None]:
from sklearn.metrics import recall_score

def Recall_Score(cv_model,y_test):
  recall = {}
  
  for i, model in enumerate(cv_model):
    # Make predictions on the test data
    y_pred = model.predict(X_val.values, prediction_type='Class')
    # Calculate accuracy
    rec = recall_score(y_val, y_pred)
    recall[i+1]=rec
    
  return recall
    
data=Recall_Score(cv_model,y_val)
stat.mean(data.values())

In [None]:
from sklearn.metrics import precision_score

def Precision_Score(cv_model,y_test):
  precision = {}
    
  for i, model in enumerate(cv_model):
    # Make predictions on the test data
    y_pred = model.predict(X_val.values, prediction_type='Class')
    # Calculate accuracy
    prec = precision_score(y_val, y_pred)
    precision[i+1]=prec
    
  return precision
    
data=Precision_Score(cv_model,y_val)
stat.mean(data.values())

In [None]:
from sklearn.metrics import f1_score

def f1_Score(cv_model,y_test):
  f1 = {}
  
  for i, model in enumerate(cv_model):
    # Make predictions on the test data
    y_pred = model.predict(X_val.values, prediction_type='Class')
    # Calculate accuracy
    f_1 = f1_score(y_val, y_pred)
    f1[i+1]=f_1
    
  return f1
    
data=f1_Score(cv_model,y_val)
stat.mean(data.values())

from sklearn.metrics import make_scorer
specificity = make_scorer(recall_score, pos_label=0)

def Specificity_Score(cv_model,y_test):
  spec_score = {}
  
  for i, model in enumerate(cv_model):
    # Make predictions on the test data
    y_pred = model.predict(X_val.values, prediction_type='Class')
    # Calculate accuracy
    spec = recall_score (y_val, y_pred, pos_label=0)
    spec_score[i+1]=spec
    
  return spec_score
    
data=Specificity_Score(cv_model,y_val)
stat.mean(data.values())

In [None]:
def FPR_Score(cv_model,y_test):
  false_positive_rate = {}
  
  for i, model in enumerate(cv_model):
    # Make predictions on the test data
    y_pred = model.predict(X_val.values, prediction_type='Class')
    # Calculate accuracy
    fpr = 1-recall_score(y_val, y_pred, pos_label=0)
    false_positive_rate[i+1]=fpr
    
  return false_positive_rate
    
data=FPR_Score(cv_model,y_val)
stat.mean(data.values())

In [None]:
from sklearn.metrics import roc_auc_score

def ROC_AUC_score(cv_model,y_test):
  RA_score ={}
  for i, model in enumerate(cv_model):
    # Make predictions on the test data
    y_pred = model.predict(X_val.values, prediction_type='Class')
    # Calculate accuracy
    accuracy = roc_auc_score(y_val, y_pred)
    RA_score[i+1]=accuracy
    
  return RA_score
    
data=ROC_AUC_score(cv_model,y_test)
stat.mean(data.values())