In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
from sklearn.datasets import make_classification

In [None]:
X,y=make_classification(n_samples=1000, n_features=10 , n_classes=2, random_state=42)

In [None]:
pd.DataFrame(X)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.30,random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression
logistic=LogisticRegression()

In [None]:
logistic.fit(X_train , y_train)

In [None]:
y_pred=logistic.predict(X_test)
print(y_pred)

In [None]:
logistic.predict_proba(X_test)

In [None]:
from sklearn.metrics import accuracy_score , confusion_matrix , classification_report

In [None]:
score=accuracy_score(y_test , y_pred)
print(score)
cm=confusion_matrix(y_test,y_pred)
print(cm)
print(classification_report(y_test,y_pred))

## HyperParameter Tuning and Cross Validation 


In [None]:
model=LogisticRegression()
penalty=['l1','l2','elasticnet']
c_values=[100,10,1.0,0.1,0.01]
solver=['newton-cg','lbfgs','liblinear','sag','saga']

In [None]:
params=dict(penalty=penalty,C=c_values,solver=solver)

In [None]:
from sklearn.model_selection import StratifiedKFold
cv=StratifiedKFold()

In [None]:
##GridSearch CV
from sklearn.model_selection import GridSearchCV
grid=GridSearchCV(estimator=model , param_grid=params , scoring='accuracy',cv=cv,n_jobs=-1)


In [None]:
grid

In [None]:
grid.fit(X_train,y_train)

In [None]:
grid.best_params_

In [None]:
grid.best_score_

In [None]:
y_pred=grid.predict(X_test)

In [None]:
score=accuracy_score(y_pred , y_test)
print(score)
print(classification_report(y_pred , y_test))
print(confusion_matrix(y_pred,y_test))

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
randomcv=RandomizedSearchCV(estimator=model , param_distributions=params,cv=5,scoring='accuracy')

In [None]:
randomcv.fit(X_train,y_train)

In [None]:
randomcv.best_score_

In [None]:
randomcv.best_params_

In [None]:
y_pred=randomcv.predict(X_test)

In [None]:
score=accuracy_score(y_pred , y_test)
print(score)
print(classification_report(y_pred , y_test))
print(confusion_matrix(y_pred,y_test))

## LOGR for multi classification problem

In [None]:
X,y=make_classification(n_samples=1000 , n_features=10 , n_informative=3 ,n_classes=3 , random_state=15) 

In [None]:
X

In [None]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.30,random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression
logistic=LogisticRegression(multi_class='ovr')
logistic.fit(X_train,y_train)
y_pred=logistic.predict(X_test)

In [None]:
y_pred

In [None]:
score=accuracy_score(y_pred , y_test)
print(score)
print(classification_report(y_pred , y_test))
print(confusion_matrix(y_pred,y_test))

In [None]:
from collections import Counter
from sklearn.datasets import make_classification

In [None]:
X,y=make_classification(n_samples=10000 , n_features=2 , n_clusters_per_class =1,
                        n_redundant=0 , weights=[0.99] , random_state=10)

In [None]:
import seaborn as sns
import pandas as pd

# Assuming X is a 2D array/DataFrame with at least 2 columns, and y is a 1D array/Series
sns.scatterplot(x=pd.DataFrame(X)[0], y=pd.DataFrame(X)[1], hue=y)

In [None]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
penalty=['l1', 'l2' , 'elasticnet']
c_values=[100,10,1.0,0.1,0.01]
solver=['newton-cg','lbfgs','liblinear','sag','saga']
class_weight=[{0:w,1:y} for w in [1,10,50,100] for y in [1,10,50,100]]

In [None]:
params=dict(penalty=penalty,C=c_values, solver=solver, class_weight=class_weight)

In [None]:
params

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
cv=StratifiedKFold()
grid=GridSearchCV(estimator=model , param_grid=params , scoring = 'accuracy' , cv=cv)

In [None]:
grid.fit(X_train , y_train)

In [None]:
grid.best_params_

In [None]:
y_pred=grid.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score , classification_report , confusion_matrix

In [None]:
score=accuracy_score(y_pred , y_test)
print(score)
print(classification_report(y_pred , y_test))
print(confusion_matrix(y_pred,y_test))

In [None]:
from sklearn.datasets import make_classification 
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split 
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot

In [None]:
X,y=make_classification(n_samples=1000, n_classes=2 , random_state=1)


In [None]:
from sklearn.model_selection import train_test_split 
X_train , X_test , y_train ,y_test = train_test_split(X,y,test_size=0.25, random_state=42)

In [None]:
dummy_model_prob=[0 for _ in range(len(y_test))]
dummy_model_prob

In [None]:
model= LogisticRegression()
model.fit(X_train,y_train)

In [None]:
model_prob = model.predict_pro