Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: TheAlgorithms/Python
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: prajwal-38/Python
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 1 commit
  • 1 file changed
  • 1 contributor

Commits on Feb 3, 2025

  1. added ridge_regression.py

    prajwal-38 committed Feb 3, 2025
    Copy the full SHA
    6991fb0 View commit details
Showing with 100 additions and 0 deletions.
  1. +100 −0 machine_learning/ridge_regression.py
100 changes: 100 additions & 0 deletions machine_learning/ridge_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import numpy as np
from matplotlib import pyplot as plt
from sklearn import datasets

Check failure on line 3 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (I001)

machine_learning/ridge_regression.py:1:1: I001 Import block is un-sorted or un-formatted

# Ridge Regression function
# reference : https://en.wikipedia.org/wiki/Ridge_regression
def ridge_cost_function(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float:

Check failure on line 7 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (N803)

machine_learning/ridge_regression.py:7:25: N803 Argument name `X` should be lowercase

Check failure on line 7 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (E501)

machine_learning/ridge_regression.py:7:89: E501 Line too long (96 > 88)
"""
Compute the Ridge regression cost function with L2 regularization.
J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (α/2) * Σ θ_j^2 (for j=1 to n)

Check failure on line 11 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (RUF002)

machine_learning/ridge_regression.py:11:42: RUF002 Docstring contains ambiguous `α` (GREEK SMALL LETTER ALPHA). Did you mean `a` (LATIN SMALL LETTER A)?
Where:
- J(θ) is the cost function we aim to minimize
- m is the number of training examples
- hθ(x) = X * θ (prediction)
- y_i is the actual target value for example i
- α is the regularization parameter

Check failure on line 18 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (RUF002)

machine_learning/ridge_regression.py:18:10: RUF002 Docstring contains ambiguous `α` (GREEK SMALL LETTER ALPHA). Did you mean `a` (LATIN SMALL LETTER A)?
@param X: The feature matrix (m x n)
@param y: The target vector (m,)
@param theta: The parameters (weights) of the model (n,)
@param alpha: The regularization parameter
@returns: The computed cost value
"""
m = len(y)
predictions = np.dot(X, theta)
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum(theta[1:] ** 2)

Check failure on line 29 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (E501)

machine_learning/ridge_regression.py:29:89: E501 Line too long (96 > 88)
return cost

def ridge_gradient_descent(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray:

Check failure on line 32 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (N803)

machine_learning/ridge_regression.py:32:28: N803 Argument name `X` should be lowercase

Check failure on line 32 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (E501)

machine_learning/ridge_regression.py:32:89: E501 Line too long (147 > 88)
"""
Perform gradient descent to minimize the cost function and fit the Ridge regression model.

Check failure on line 34 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (E501)

machine_learning/ridge_regression.py:34:89: E501 Line too long (94 > 88)
@param X: The feature matrix (m x n)
@param y: The target vector (m,)
@param theta: The initial parameters (weights) of the model (n,)
@param alpha: The regularization parameter
@param learning_rate: The learning rate for gradient descent
@param max_iterations: The number of iterations for gradient descent
@returns: The optimized parameters (weights) of the model (n,)
"""
m = len(y)

for iteration in range(max_iterations):
predictions = np.dot(X, theta)
error = predictions - y

# calculate the gradient
gradient = (1 / m) * np.dot(X.T, error)
gradient[1:] += (alpha / m) * theta[1:]
theta -= learning_rate * gradient

if iteration % 100 == 0:
cost = ridge_cost_function(X, y, theta, alpha)
print(f"Iteration {iteration}, Cost: {cost}")

return theta



if __name__ == "__main__":
import doctest

Check failure on line 65 in machine_learning/ridge_regression.py

GitHub Actions / ruff

Ruff (F401)

machine_learning/ridge_regression.py:65:12: F401 `doctest` imported but unused

# Load California Housing dataset
california_housing = datasets.fetch_california_housing()
X = california_housing.data[:, :2] # 2 features for simplicity
y = california_housing.target
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

# Add a bias column (intercept) to X
X = np.c_[np.ones(X.shape[0]), X]

# Initialize parameters (theta)
theta_initial = np.zeros(X.shape[1])

# Set hyperparameters
alpha = 0.1
learning_rate = 0.01
max_iterations = 1000

optimized_theta = ridge_gradient_descent(X, y, theta_initial, alpha, learning_rate, max_iterations)
print(f"Optimized theta: {optimized_theta}")

# Prediction
def predict(X, theta):
return np.dot(X, theta)
y_pred = predict(X, optimized_theta)

# Plotting the results (here we visualize predicted vs actual values)
plt.figure(figsize=(10, 6))
plt.scatter(y, y_pred, color='b', label='Predictions vs Actual')
plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit')
plt.xlabel("Actual values")
plt.ylabel("Predicted values")
plt.title("Ridge Regression: Actual vs Predicted Values")
plt.legend()
plt.show()