In [104]:
import nbformat
import numpy as np
from sklearn.metrics import (
    precision_score, recall_score, f1_score, balanced_accuracy_score, roc_auc_score, 
    average_precision_score
)
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import pandas as pd


In [105]:
%run LogRegCCD.ipynb

## Speech dataset

In [None]:
df = pd.read_csv('./data/speech.csv')
X = np.asmatrix(df.drop(columns='target'))
y = np.asarray(df['target'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
ccd_model = LogRegCCD(num_lambdas=10)
results, best_lambda = ccd_model.optimize_lambda(X_train, y_train, X_test, y_test, measure="f1")
ccd_model.fit(X_train, y_train, best_lambda)

ccd_probs = ccd_model.predict_proba(X_test)
ccd_preds = (ccd_probs >= 0.5).astype(int)
ccd_f1 = f1_score(y_test, ccd_preds)
print(f"LogRegCCD F1 Score: {ccd_f1:.4f}")

Fitting lmbda: 1.0


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285
Fitting lmbda: 0.1


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285
Fitting lmbda: 0.01


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285
Fitting lmbda: 0.001


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285
Fitting lmbda: 0.0001


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285


  return 1 / (1 + np.exp(-z))


LogRegCCD precision Score: 0.1200


In [None]:
# ccd_model.plot(results, measure="precision")

In [107]:
# ccd_model.plot_coefficients(results)

In [31]:
logreg = LogisticRegression(max_iter=1000, penalty='elasticnet', solver='saga', l1_ratio=0.9)
logreg.fit(np.asarray(X_train), y_train)
sklearn_preds = logreg.predict(np.asarray(X_test))
sklearn_f1 = f1_score(y_test, sklearn_preds)



## Arrhythmia dataset

In [108]:
df = pd.read_csv('./data/arrhythmia.csv')
X = np.asmatrix(df.drop(columns='target'))
y = np.asarray(df['target'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [109]:
ccd_model = LogRegCCD(num_lambdas=10)
results, best_lambda = ccd_model.optimize_lambda(X_train, y_train, X_test, y_test, measure="f1")
ccd_model.fit(X_train, y_train, best_lambda)

ccd_probs = ccd_model.predict_proba(X_test)
ccd_preds = (ccd_probs >= 0.5).astype(int)
ccd_f1 = f1_score(y_test, ccd_preds)
print(f"LogRegCCD F1 Score: {ccd_f1:.4f}")

Fitting lmbda: 1.0
F1 score: 0.0
Fitting lmbda: 0.35938136638046275
F1 score: 0.0
Fitting lmbda: 0.1291549665014884
F1 score: 0.0
Fitting lmbda: 0.046415888336127795
F1 score: 0.0
Fitting lmbda: 0.016681005372000592
F1 score: 0.0
Fitting lmbda: 0.005994842503189409
F1 score: 0.0
Fitting lmbda: 0.0021544346900318843
F1 score: 0.0
Fitting lmbda: 0.0007742636826811277
F1 score: 0.0
Fitting lmbda: 0.0002782559402207126
F1 score: 0.0
Fitting lmbda: 0.0001
F1 score: 0.0
LogRegCCD F1 Score: 0.0000


In [None]:
# ccd_model.plot(results, measure="precision")

In [None]:
# ccd_model.plot_coefficients(results)

In [None]:
logreg = LogisticRegression(max_iter=1000, penalty='elasticnet', solver='saga', l1_ratio=0.9)
logreg.fit(np.asarray(X_train), y_train)
sklearn_preds = logreg.predict(np.asarray(X_test))
sklearn_f1 = f1_score(y_test, sklearn_preds)



## Secom dataset

In [None]:
df = pd.read_csv('./data/secom.csv')
X = np.asmatrix(df.drop(columns='target'))
y = np.asarray(df['target'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
ccd_model = LogRegCCD(num_lambdas=10)
results, best_lambda = ccd_model.optimize_lambda(X_train, y_train, X_test, y_test, measure="f1")
ccd_model.fit(X_train, y_train, best_lambda)

ccd_probs = ccd_model.predict_proba(X_test)
ccd_preds = (ccd_probs >= 0.5).astype(int)
ccd_f1 = f1_score(y_test, ccd_preds)
print(f"LogRegCCD F1 Score: {ccd_f1:.4f}")

Fitting lmbda: 1.0


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285
Fitting lmbda: 0.1


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285
Fitting lmbda: 0.01


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285
Fitting lmbda: 0.001


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285
Fitting lmbda: 0.0001


  return 1 / (1 + np.exp(-z))


PRECISION score: 0.14285714285714285


  return 1 / (1 + np.exp(-z))


LogRegCCD precision Score: 0.1200


In [None]:
# ccd_model.plot(results, measure="precision")

In [None]:
# ccd_model.plot_coefficients(results)

In [None]:
logreg = LogisticRegression(max_iter=1000, penalty='elasticnet', solver='saga', l1_ratio=0.9)
logreg.fit(np.asarray(X_train), y_train)
sklearn_preds = logreg.predict(np.asarray(X_test))
sklearn_f1 = f1_score(y_test, sklearn_preds)

