In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

np.random.seed(0)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print(f"Coefficient: {model.coef_[0][0]}")
print(f"Intercept: {model.intercept_[0]}")

train_mse = mean_squared_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)

print(f"Training MSE: {train_mse}")
print(f"Training R2: {train_r2}")

plt.figure(figsize=(10, 6))

plt.scatter(X_test, y_test, color='green', label='Testing data')
plt.plot(X_test, y_test_pred, color='red', linewidth=2, label='Model')
plt.title('Testing Data')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

iris=datasets.load_iris()
X=iris.data[:,:2]
y=iris.target

X=X[y!=2]
y=y[y!=2]

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

clf=SVC(kernel='linear',C=0.1)

clf.fit(X_train,y_train)

plt.scatter(X[:,0],X[:,1],c=y,cmap='winter')
ax=plt.gca()
xlim=ax.get_xlim()
ylim=ax.get_ylim()

xx=np.linspace(xlim[0],xlim[1],30)
yy=np.linspace(ylim[0],ylim[1],30)
YY,XX=np.meshgrid(yy,xx)
xy=np.vstack([XX.ravel(),YY.ravel()]).T
Z=clf.decision_function(xy).reshape(XX.shape)

ax.contour(XX,YY,Z,colors='k',levels=[-1,0,1],alpha=0.5,
           linestyles=['--','-','--'])
plt.title("Linear SVM Decision Boundary")
plt.show()

In [None]:
from sklearn.datasets import make_circles
X,y=make_circles(n_samples=300,factor=.3,noise=.1)

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

clf=SVC(kernel='rbf',C=1,gamma=0.1)

clf.fit(X_train,y_train)

plt.scatter(X[:,0],X[:,1],c=y,cmap='winter')
ax=plt.gca()
xlim=ax.get_xlim()
ylim=ax.get_ylim()

xx=np.linspace(xlim[0],xlim[1],30)
yy=np.linspace(ylim[0],ylim[1],30)
YY,XX=np.meshgrid(yy,xx)
xy=np.vstack([XX.ravel(),YY.ravel()]).T
Z=clf.decision_function(xy).reshape(XX.shape)

ax.contour(XX,YY,Z,colors='k',levels=[-1,0,1],alpha=0.5,
           linestyles=['--','-','--'])
plt.title("Non-Linear SVM Decision Boundary with RBF Kernel")
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

from sklearn.datasets import make_moons
X,y=make_moons(n_samples=300,noise=0.2)

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

clf=SVC(kernel='poly',degree=3,C=1.0)

clf.fit(X_train,y_train)

plt.scatter(X[:,0],X[:,1],c=y,cmap='winter')
ax=plt.gca()
xlim=ax.get_xlim()
ylim=ax.get_ylim()

xx=np.linspace(xlim[0],xlim[1],30)
yy=np.linspace(ylim[0],ylim[1],30)
YY,XX=np.meshgrid(yy,xx)
xy=np.vstack([XX.ravel(),YY.ravel()]).T
Z=clf.decision_function(xy).reshape(XX.shape)

ax.contour(XX,YY,Z,colors='k',levels=[-1,0,1],alpha=0.5,
           linestyles=['--','-','--'])
plt.title("Non-Linear SVM Decision Boundary with Polynomial Kernel")
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC


iris=datasets.load_iris()
X=iris.data[:,:2]
y=iris.target

X=X[y!=2]
y=y[y!=2]

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

param_grid={
    'C':[0.1,1,10,100],
    'gamma':[1,0.1,0.01,0.001],
    'kernel':['rbf','poly','sigmoid']
}

svc=SVC()

grid_search=GridSearchCV(estimator=svc,param_grid=param_grid,cv=5,verbose=2,n_jobs=-1)

grid_search.fit(X_train,y_train)

print(grid_search.best_params_)
print(grid_search.best_estimator_)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv('/content/UniversalBank.csv')
df.head()

In [None]:
df.isnull().sum()

In [None]:
df1=df.drop(["ID","ZIP Code"],axis=1)
df1.head()

In [None]:
plt.figure(figsize=(15,8))
plt.title("Heatmap showing Correlation between all the features",fontsize=20)
sns.heatmap(df1.corr(),annot=True,cmap='mako')

In [None]:
zero_class=df1[df1.CreditCard==0]
zero_class.shape

In [None]:
one_class=df1[df1.CreditCard==1]
one_class.shape

In [None]:
plt.xlabel('Income')
plt.ylabel('Experience')
plt.scatter(zero_class['Income'],zero_class['Experience'],color='green',marker='+')
plt.scatter(one_class['Income'],one_class['Experience'],color='red',marker='.')

In [None]:
plt.xlabel('CCAvg')
plt.ylabel('Family')
plt.scatter(zero_class['CCAvg'],zero_class['Family'],color='blue',marker='+')
plt.scatter(one_class['CCAvg'],one_class['Family'],color='red',marker='.')

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaled=scaler.fit(df1.drop('CreditCard',axis=1)).transform(df1.drop('CreditCard',axis=1))
df_scaled=pd.DataFrame(scaled,columns=df1.columns[:-1])
df_scaled.head()

In [None]:
x=df_scaled
y=df1['CreditCard']

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
svc=SVC()
svc.fit(x_train,y_train)
y_pred=svc.predict(x_test)
print('Model accuracy:{0:0.3f}'.format(accuracy_score(y_test,y_pred)))

In [None]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_pred)
cm_matrix=pd.DataFrame(data=cm, columns=['Actual Positive:1', 'Actual Negative:0'],
                       index=['Predict Positive:1', 'Predict Negative:0'])
sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='mako')

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))

In [None]:
linear_classifier=SVC(kernel='linear').fit(x_train,y_train)
y_pred=linear_classifier.predict(x_test)
print('Model accuracy:{0:0.3f}'.format(accuracy_score(y_test,y_pred)))

In [None]:
cm=confusion_matrix(y_test,y_pred)
cm_matrix=pd.DataFrame(data=cm, columns=['Actual Positive:1', 'Actual Negative:0'],
                       index=['Predict Positive:1', 'Predict Negative:0'])
sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='mako')

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
rbf_svc=SVC(kernel='rbf').fit(x_train,y_train)
y_pred=rbf_svc.predict(x_test)
print('Model accuracy with rbf kernel:{0:0.3f}'.format(accuracy_score(y_test,y_pred)))

In [None]:
cm=confusion_matrix(y_test,y_pred)
cm_matrix=pd.DataFrame(data=cm, columns=['Actual Positive:1', 'Actual Negative:0'],
                       index=['Predict Positive:1', 'Predict Negative:0'])
sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='mako')

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
Poly_svc=SVC(kernel='poly').fit(x_train,y_train)
y_pred=Poly_svc.predict(x_test)
print('Model accuracy with polynomial kernel:{0:0.3f}'.format(accuracy_score(y_test,y_pred)))

In [None]:
cm=confusion_matrix(y_test,y_pred)
cm_matrix=pd.DataFrame(data=cm, columns=['Actual Positive:1', 'Actual Negative:0'],
                       index=['Predict Positive:1', 'Predict Negative:0'])
sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='mako')

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
Poly_svc=SVC(kernel='sigmoid').fit(x_train,y_train)
y_pred=Poly_svc.predict(x_test)
print('Model accuracy with polynomial kernel:{0:0.3f}'.format(accuracy_score(y_test,y_pred)))

In [None]:
cm=confusion_matrix(y_test,y_pred)
cm_matrix=pd.DataFrame(data=cm, columns=['Actual Positive:1', 'Actual Negative:0'],
                       index=['Predict Positive:1', 'Predict Negative:0'])
sns.heatmap(cm_matrix, annot=True, fmt='d', cmap='mako')

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
# Checking duplicate values
duplicate_rows = df1.duplicated()
print(f"Number of duplicate rows: {duplicate_rows.sum()}")
duplicates = df1[duplicate_rows]
print("Duplicate rows:")
print(duplicates)

In [None]:
# Removing duplicate values
df2 = df1.drop_duplicates()

In [None]:
# Checking duplicates again
df2.duplicated().sum()

In [None]:
df2.info()

In [None]:
df2.head()

In [None]:
# Checking missing(null) values again
round((df2.isnull().sum()/df2.shape[0])*100,3)

In [None]:
# Removing outliers from all rows
q1 = df2.quantile(0.25)
q3 = df2.quantile(0.75)

# Calculate IQR
iqr = q3 - q1

# Determine outliers
is_outlier = (df2 < (q1 - 1.5 * iqr)) | (df2 > (q3 + 1.5 * iqr))

# Drop outliers
df3 = df2[~is_outlier.any(axis=1)]

In [None]:
df3.head()

In [None]:
df.shape

In [None]:
df3.shape