<a href="https://colab.research.google.com/github/zrghassabi/machine-learning-interview/blob/main/ML_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

k-means from scratch

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def initialize_centroids(X, k):
    """Randomly initialize centroids from the dataset."""
    np.random.seed(42)  # for reproducibility
    random_indices = np.random.permutation(X.shape[0])
    centroids = X[random_indices[:k]]
    return centroids

def compute_distances(X, centroids):
    """Calculate the distance between each point in X and each centroid."""
    distances = np.zeros((X.shape[0], len(centroids)))
    for i, centroid in enumerate(centroids):
        distances[:, i] = np.linalg.norm(X - centroid, axis=1)
    return distances

def assign_clusters(distances):
    """Assign each data point to the nearest centroid."""
    return np.argmin(distances, axis=1)

def update_centroids(X, labels, k):
    """Update the centroid of each cluster to the mean of the assigned points."""
    new_centroids = np.zeros((k, X.shape[1]))
    for i in range(k):
        cluster_points = X[labels == i]
        if len(cluster_points) > 0:
            new_centroids[i] = cluster_points.mean(axis=0)
    return new_centroids

def k_means(X, k, max_iters=100):
    """Run the K-Means algorithm."""
    # Step 1: Initialize centroids
    centroids = initialize_centroids(X, k)

    for iteration in range(max_iters):
        # Step 2: Compute distances and assign clusters
        distances = compute_distances(X, centroids)
        labels = assign_clusters(distances)

        # Step 3: Update centroids
        new_centroids = update_centroids(X, labels, k)

        # Step 4: Check for convergence (if centroids do not change)
        if np.all(centroids == new_centroids):
            break

        centroids = new_centroids

    return centroids, labels

# Example usage
if __name__ == "__main__":
    # Generate synthetic data using sklearn
    from sklearn.datasets import make_blobs
    X, y = make_blobs(n_samples=300, centers=3, random_state=42)

    # Apply K-Means algorithm
    k = 3
    centroids, labels = k_means(X, k)

    # Plot the results
    plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', marker='o', edgecolor='k')
    plt.scatter(centroids[:, 0], centroids[:, 1], s=300, c='red', marker='X')
    plt.title('K-Means Clustering Results')
    plt.show()


Linear Regression from scratch

In [None]:
import numpy as np

# Linear Regression implementation
def predict(X, weights, bias):
    """Predict the output for the given input X using learned weights and bias."""
    return np.dot(X, weights) + bias

def compute_loss(y_true, y_pred):
    """Compute Mean Squared Error (MSE) as the loss function."""
    n_samples = len(y_true)
    return (1 / n_samples) * np.sum((y_pred - y_true) ** 2)

def gradient_descent(X, y, weights, bias, learning_rate):
    """Perform one step of gradient descent to update weights and bias."""
    n_samples = len(y)

    # Calculate predictions
    y_pred = predict(X, weights, bias)

    # Calculate gradients
    dW = (2 / n_samples) * np.dot(X.T, (y_pred - y))  # Gradient with respect to weights
    db = (2 / n_samples) * np.sum(y_pred - y)  # Gradient with respect to bias

    # Update weights and bias
    weights -= learning_rate * dW
    bias -= learning_rate * db

    return weights, bias

def train(X, y, learning_rate=0.01, n_iters=1000):
    """Train the Linear Regression model using gradient descent."""
    n_samples, n_features = X.shape

    # Initialize weights and bias
    weights = np.zeros(n_features)
    bias = 0

    # Gradient descent loop
    for i in range(n_iters):
        # Perform a single step of gradient descent
        weights, bias = gradient_descent(X, y, weights, bias, learning_rate)

        # Compute loss every 100 iterations
        if i % 100 == 0:
            y_pred = predict(X, weights, bias)
            loss = compute_loss(y, y_pred)
            print(f"Iteration {i}: Loss = {loss}")

    return weights, bias

# Example usage
if __name__ == "__main__":
    # Create some random data
    np.random.seed(42)
    X = 2 * np.random.rand(100, 1)
    y = 4 + 3 * X + np.random.randn(100, 1)

    # Train the model
    weights, bias = train(X, y, learning_rate=0.01, n_iters=1000)

    # Test the model
    X_test = np.array([[1.5]])
    prediction = predict(X_test, weights, bias)
    print(f"Prediction for input {X_test}: {prediction}")


In [None]:
import numpy as np

# Prediction function for a single data point
def predict_single(x, weights, bias):
    """Predict the output for a single data point using learned weights and bias."""
    y_pred = 0
    for i in range(len(weights)):
        y_pred += weights[i] * x[i]
    return y_pred + bias

# Loss function (Mean Squared Error)
def compute_loss(y_true, y_pred):
    """Compute Mean Squared Error (MSE) for a single prediction."""
    return (y_true - y_pred) ** 2

# Gradient descent for one step (updating weights and bias)
def gradient_descent(X, y, weights, bias, learning_rate):
    """Perform one step of gradient descent to update weights and bias."""
    n_samples = len(y)

    # Initialize gradients
    dW = np.zeros(len(weights))
    db = 0

    # Compute gradients manually for each point
    for i in range(n_samples):
        y_pred = predict_single(X[i], weights, bias)
        error = y_pred - y[i]

        # Update gradients for each weight and bias
        for j in range(len(weights)):
            dW[j] += (2 / n_samples) * error * X[i][j]
        db += (2 / n_samples) * error

    # Update weights and bias
    for j in range(len(weights)):
        weights[j] -= learning_rate * dW[j]
    bias -= learning_rate * db

    return weights, bias

# Training function for Linear Regression
def train(X, y, learning_rate=0.01, n_iters=1000):
    """Train the Linear Regression model using gradient descent."""



In [None]:
def compute_iou(box1, box2):
    """
    Compute the Intersection over Union (IoU) of two bounding boxes.

    Parameters:
    box1: list or array of coordinates [x1, y1, x2, y2]
    box2: list or array of coordinates [x1, y1, x2, y2]

    Returns:
    float: IoU score
    """
    # Get the coordinates of the intersection rectangle
    x_left = max(box1[0], box2[0])
    y_top = max(box1[1], box2[1])
    x_right = min(box1[2], box2[2])
    y_bottom = min(box1[3], box2[3])

    # Compute the area of intersection
    intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)

    # Compute the area of both bounding boxes
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

    # Compute the area of union
    union_area = box1_area + box2_area - intersection_area

    # Compute IoU
    iou = intersection_area / union_area if union_area != 0 else 0

    return iou
