In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer

In [0]:
dataset = load_breast_cancer()


In [0]:
type(dataset) #0 злокачественная 1-доброкачественная

In [0]:
df = pd.DataFrame(dataset.data, columns = dataset.feature_names)  
df['Target'] = dataset.target   

In [0]:
df.head()

In [0]:
df.shape

In [0]:
def plotFeatures(X, y):
  plt.figure(figsize = (8,8))
  plt.scatter(X[y==0][:,0], X[y==0][:,1], color ='b', label ='Malignant') #zlokacahestvennaya
  plt.scatter(X[y==1][:,0], X[y==1][:,1], color ='r', label ='Bening') #dobrocahestvennaya
  plt.legend()

In [0]:
import seaborn as sns
fig = plt.subplots(figsize =(15,15))
sns.heatmap(df.corr(), square =True, cbar = True, annot = True, annot_kws={'size':9})
plt.show()

In [0]:
X = dataset.data[:, 5:7]
y = dataset.target

In [0]:
def sigmoid(z):
  return 1/(1+np.exp(-z))

In [0]:
def predict(x, theta):
  return sigmoid(np.dot(x,theta))

In [0]:
sigmoid(5)

In [0]:
def calculateCost(h,y):
  return (-y*np.log(h)-(1-y)*np.log(1-h)).mean()

In [0]:
def plotDecisionBoundary(X,theta,y):
  #choose the boundaries of my grid
  #my input data varies between x1_min and x1_max
  #and x2_min and x2_max
  x1_min, x1_max = X[:,0].min(), X[:,0].max(),
  x2_min, x2_max = X[:,1].min(), X[:,1].max(),
  #create a grid for plotting the decision boundary
  xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
  grid = np.c_[xx1.ravel(), xx2.ravel()]
  probabilities = predict(grid, theta).reshape(xx1.shape)
  #alpha is not a learning rate
  #alpha decides the shape of the boundary
  plt.contourf(xx1,xx2,probabilities, alpha = 0.5, levels = 1)

In [0]:
plotFeatures(X,y)

In [0]:
def gradientDescentLogisticRegression(alpha = 0.001, iterations = 5001):
  costs = []
  theta=np.zeros(2)
  for i in range(iterations):
    pred = predict(X,theta)
    theta = theta - alpha*np.dot(X.T, (pred-y))/y.size
    J = calculateCost(pred, y)
    costs.append(J)
    if i%(iterations//5) == 0:
      print(f"Iteration: {i+1}, Cost = {J}, theta = {theta}")
      plotFeatures(X,y)
      plotDecisionBoundary(X,theta,y)
      plt.show()         
  print("Cost function plot: ")
  plt.plot(np.linspace(0,iterations-1, num = iterations), costs)
  plt.xlabel('No. of iterations')
  plt.ylabel('J')
  return theta

In [0]:
theta = gradientDescentLogisticRegression(alpha = 0.001,iterations = 300001)