In [None]:
import numpy as np
import requests
import zipfile
import io
import pandas as pd
# URL for the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases\
/00275/Bike-Sharing-Dataset.zip"
# Send a HTTP request to the URL of the webpage you want to access
response = requests.get(url)

# Create a ZipFile object from the response content
zip_file = zipfile.ZipFile(io.BytesIO(response.content))
# Extract the 'day.csv' or 'hour.csv' file from the ZipFile object
csv_file = zip_file.open('day.csv')
# Read the CSV data
data = pd.read_csv(csv_file)

In [None]:
class MyLinearRegression:
    def __init__(self):
        self.beta = None

    def fit(self, X, y):
        # Add a column of ones to X for the intercept term
        X = np.hstack([np.ones([X.shape[0], 1]), X])
        
        # Calculate the parameters beta
        self.beta = np.linalg.inv(X.T @ X) @ X.T @ y

    def predict(self, X):
        # Add a column of ones to X for the intercept term
        X = np.hstack([np.ones([X.shape[0], 1]), X])

        # Make predictions
        y_pred = X @ self.beta
        return y_pred

In [None]:
class MyLinearClassification(MyLinearRegression):

    def predict_proba(self, X):
        # Add a column of ones to X for the intercept term
        X = np.hstack([np.ones([X.shape[0], 1]), X])

        # Make predictions

    
    def predict(self, X, threshold=0.5):
        # Use predict_proba to predict the label 0 or 1

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

# Select two features related to weather and season
features = ['temp', 'windspeed']

target = 'cnt_more_than_4000'

# Define the target variable
data[target] = (data['cnt'] > 4000).astype(int)

# Define the feature matrix X and the target variable y
X = data[features]
y = data[target]

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the model on the training data
linear_classifier = MyLinearClassification()
linear_classifier.fit(X_train, y_train)

# Predict the class labels on the test set
y_pred = linear_classifier.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

In [None]:
from matplotlib.colors import ListedColormap

def draw_decision_boundary(
    X, y, model, file_name, 
    title="2-Class classification (temperature vs windspeed)", 
    x_label='temp',
    y_label='windspeed'):
    # Create color maps
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA'])
    cmap_bold = ListedColormap(['#FF0000', '#00FF00'])

    # Step size in the mesh
    h = .02

    # Calculate the min, max and limits
    x_min, x_max = X.values[:, 0].min() - 0.1, X.values[:, 0].max() + 0.1
    y_min, y_max = X.values[:, 1].min() - 0.1, X.values[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    # Predict the class for each mesh point
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

    # Create a mask for the two classes
    mask = y.values == 1

    # Plot the points for class 0
    plt.scatter(X.values[~mask, 0], X.values[~mask, 1], c=cmap_bold.colors[0], edgecolor='k', s=20, label='False')

    # Plot the points for class 1
    plt.scatter(X.values[mask, 0], X.values[mask, 1], c=cmap_bold.colors[1], edgecolor='k', s=20, label='True')

    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    # Add a legend
    plt.legend()

    plt.savefig(file_name)
# plt.show()

In [None]:
draw_decision_boundary(X=X, y=y, model=linear_classifier, file_name='pictures/lr_boundary.pdf')

In [None]:
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression
import matplotlib.pyplot as plt

def domain_comparasion():
    # Create the figure and axes objects
    fig, ax = plt.subplots(figsize=(8, 6))

    # Generate some synthetic data
    np.random.seed(0)
    X = np.random.randn(100, 1)
    y = (X[:, 0] > 0).astype(float)

    # Fit a linear regression model
    lin_reg = LinearRegression().fit(X, y)

    # Fit a logistic regression model
    log_reg = LogisticRegression().fit(X, y)

    # Generate some points to predict
    X_test = np.linspace(-3, 3, 1000).reshape(-1, 1)

    # Predict with both models
    y_pred_lin = lin_reg.predict(X_test)
    y_pred_log = log_reg.predict_proba(X_test)[:, 1]

    # Plot the actual data points
    sns.scatterplot(x=X[:, 0], y=y, ax=ax, color='blue', alpha=0.5, label='Data')

    # Plot the predictions of the linear regression model
    sns.lineplot(x=X_test[:, 0], y=y_pred_lin, ax=ax, color='red', alpha=0.8, label='Linear regression')

    # Plot the predictions of the logistic regression model
    sns.lineplot(x=X_test[:, 0], y=y_pred_log, ax=ax, color='green', alpha=0.8, label='Logistic regression')

    # Set the labels of the plot
    ax.set_xlabel('X')
    ax.set_ylabel('y')

    # Show the legend
    plt.legend()

    plt.savefig('pictures/logistic_vs_lr.pdf')


In [None]:
domain_comparasion()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Define the logit transformation function
def logit(p):
    return np.log(p / (1 - p))

# Generate a sequence of probabilities between 0 and 1
p = np.linspace(0.01, 0.99, 100)

# Compute the logit transformation for each probability
l_p = logit(p)

# Create the plot
plt.figure(figsize=(6, 4))
plt.plot(p, l_p, label=r'$l(p) = \log\left(\frac{p}{1-p}\right)$')
plt.xlabel('Probability (p)')
plt.ylabel('Logit transformation (l(p))')
plt.title('Logit Transformation Function')
plt.grid(True)
plt.legend()
plt.savefig('pictures/logit_func.pdf')

In [None]:
class MyLogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.beta = None

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def fit(self, X, y):
        # Add a column of ones for the bias term
        X = np.hstack([np.ones([X.shape[0], 1]), X])
        
        num_samples, num_features = X.shape

        # Initialize beta
        self.beta = np.ones(num_features)

        # Gradient descent
        for _ in range(self.num_iterations):
            # Compute prediction probability
            p = 
            # Compute gradients
            dw = 
            # Update beta
            self.beta = 

    def predict_proba(self, X):
        # Add a column of ones for the bias term
        X = np.hstack([np.ones([X.shape[0], 1]), X])
        
        return ?

    def predict(self, X, threshold=0.5):
        probabilities = self.predict_proba(X)
        return ?


In [None]:
# Fit the model on the training data
logistic_classifier = MyLogisticRegression()
logistic_classifier.fit(X_train, y_train)

# Predict the class labels on the test set
y_pred = logistic_classifier.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

In [None]:
# Comparing actual result to the predicted result
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
df.sample(20)

In [None]:
draw_decision_boundary(X=X, y=y, model=logistic_classifier, file_name='pictures/logistic_boundary.pdf')

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# Define the grid over the feature space
x_min, x_max = X['temp'].min() - .5, X['temp'].max() + .5
y_min, y_max = X['windspeed'].min() - .5, X['windspeed'].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))

# Predict probabilities on the grid
Z_linear = linear_classifier.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z_logistic = logistic_classifier.predict_proba(np.c_[xx.ravel(), yy.ravel()])

# Reshape the predicted probabilities for plotting
Z_linear = Z_linear.reshape(xx.shape)
Z_logistic = Z_logistic.reshape(xx.shape)

# Define the colormap for the plot
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA'])

# Plot the predicted probabilities
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.pcolormesh(xx, yy, Z_linear, cmap=cmap_light)
plt.scatter(X['temp'], X['windspeed'], c=y, edgecolor='k', s=20)
plt.title('Linear Regression')
plt.xlabel('temp')
plt.ylabel('windspeed')

plt.subplot(1, 2, 2)
plt.pcolormesh(xx, yy, Z_logistic, cmap=cmap_light)
plt.scatter(X['temp'], X['windspeed'], c=y, edgecolor='k', s=20)
plt.title('Logistic Regression')
plt.xlabel('temp')
plt.ylabel('windspeed')

plt.tight_layout()
plt.savefig('pictures/linear_vs_logistics_boundaries.pdf')


In [None]:
# Choose a single feature for simplicity
def plot_probabilities(linear_classifier, logistic_classifier, X, feature):
    X_single = X[[feature]]

    # Fit both models
    linear_classifier.fit(X_single, y)
    logistic_classifier.fit(X_single, y)

    # Generate a range of input values
    X_test = np.linspace(X_single.min(), X_single.max(), 1000)

    # Predict probabilities for each model
    proba_linear = linear_classifier.predict_proba(X_test)
    proba_logistic = logistic_classifier.predict_proba(X_test)

    # Create a plot
    plt.figure(figsize=(7, 6))
    plt.plot(X_test, proba_linear, label='Linear Regression')
    plt.plot(X_test, proba_logistic, label='Logistic Regression')
    plt.scatter(X_single, y, edgecolor='k', alpha=0.1)
    plt.xlabel(feature)
    plt.ylabel('Predicted Probability')
    plt.legend()
    plt.grid(True)
    plt.savefig('pictures/linear_vs_logistic_probabilities.pdf')


In [None]:
plot_probabilities(
    linear_classifier=linear_classifier, 
    logistic_classifier=logistic_classifier, 
    X=X, feature='temp')

In [None]:
# Define the XOR problem
X = np.array([[0, 0],[0, 1],[1, 0],[1, 1]])
y = np.array([0, 1, 1, 0])

# Fit a logistic regression model
model = MyLogisticRegression()
# model.beta = np.ones(X.shape[1]) 
model.fit(X, y)

# Predict the output for the inputs
predictions = model.predict(X)

# Display the predictions
print(predictions)

In [None]:
draw_decision_boundary(
    X=pd.DataFrame(X), y=pd.DataFrame(y)[0], model=model, 
    file_name='XOR_boundary.pdf',
    title='XOR classifcation',
    x_label='x_1',
    y_label='x_2')

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

class MyPerceptron:
    def __init__(self, learning_rate=10, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.model = Sequential([
            # You code here
            # Use only Dense(units=, input_dim=, activation='sigmoid')

        ])
        self.model.compile(
            loss='mean_squared_error', 
            optimizer=SGD(learning_rate=self.learning_rate))

    def fit(self, X, y):
        self.model.fit(X, y, epochs=self.num_iterations)

    def predict(self, X):
        y_pred = self.model.predict(X)
        return y_pred > 0.5


In [None]:
# Define the XOR problem
X = np.array([[0, 0],[0, 1],[1, 0],[1, 1]])
y = np.array([0, 1, 1, 0]).reshape(-1,1)

# Fit a logistic regression model
model = MyPerceptron()
# model.beta = np.ones(X.shape[1]) 
model.fit(X, y)

# Predict the output for the inputs
predictions = model.predict(X)

# Display the predictions
print(predictions)

In [None]:
draw_decision_boundary(X=pd.DataFrame(X), y=pd.DataFrame(y)[0], model=model, 
    file_name='XOR_boundary_nn.pdf',
    title='XOR classifcation',
    x_label='x_1',
    y_label='x_2')

In [None]:
from sklearn.linear_model import LinearRegression, LogisticRegression,\
    Perceptron, SGDClassifier, PassiveAggressiveClassifier

y_pred_list = []
for model in [LinearRegression(), LogisticRegression(), Perceptron(),\
              SGDClassifier(), PassiveAggressiveClassifier()]:
    model.fit(X_train,y_train)
    y_pred_list.append(model.predict(X_test))

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier

y_pred_list = []
for model in [DecisionTreeClassifier(), RandomForestClassifier(), SVC(),\
              KNeighborsClassifier(), GradientBoostingClassifier()]:
    model.fit(X_train,y_train)
    y_pred_list.append(model.predict(X_test))

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

y_pred_list = []
for model in [GaussianProcessRegressor(), MLPRegressor(), SVR(),\
              RandomForestRegressor(), GradientBoostingRegressor()]:
    model.fit(X_train,y_train)
    y_pred_list.append(model.predict(X_test))