# VotingRegressor 
The VotingRegressor is a meta-estimator in machine learning, part of the ensemble module in Scikit-learn. It combines the predictions from multiple regression models to form a final prediction by averaging their predictions. This method can lead to better performance compared to using a single model, as it leverages the strengths of various models to make more accurate predictions.

# How It Works
- Model Initialization: Multiple regression models are initialized and trained on the same dataset.
- Fitting the Ensemble: Each model is fitted to the data.
- Predicting: For new data, each model makes a prediction, and the final prediction is obtained by averaging the predictions of all the models.

## Simple Example

In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

# Generate a simple regression dataset
X, y = make_regression(n_samples=100, n_features=1, noise=0.1)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the base models
model1 = LinearRegression()
model2 = DecisionTreeRegressor()

# Combine the models into a Voting Regressor
voting_regressor = VotingRegressor(estimators=[('lr', model1), ('dt', model2)])

# Train the Voting Regressor
voting_regressor.fit(X_train, y_train)

# Make predictions
predictions = voting_regressor.predict(X_test)

# Print the predictions
print(predictions)


## Complex Example

In [None]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor

# Load a more complex dataset
data = load_boston()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the base models
model1 = LinearRegression()
model2 = Ridge(alpha=1.0)
model3 = DecisionTreeRegressor()
model4 = KNeighborsRegressor()

# Combine the models into a Voting Regressor
voting_regressor = VotingRegressor(estimators=[('lr', model1), ('ridge', model2), ('dt', model3), ('knn', model4)])

# Train the Voting Regressor
voting_regressor.fit(X_train, y_train)

# Make predictions
predictions = voting_regressor.predict(X_test)

# Print the predictions
print(predictions)


## Very Complex Example

In [None]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

# Load a very complex dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the base models
model1 = LinearRegression()
model2 = Ridge(alpha=1.0)
model3 = DecisionTreeRegressor()
model4 = KNeighborsRegressor()
model5 = SVR()

# Combine the models into a Voting Regressor
voting_regressor = VotingRegressor(estimators=[('lr', model1), ('ridge', model2), ('dt', model3), ('knn', model4), ('svr', model5)])

# Define a parameter grid for hyperparameter tuning
param_grid = {
    'ridge__alpha': [0.1, 1.0, 10.0],
    'dt__max_depth': [None, 10, 20],
    'knn__n_neighbors': [3, 5, 7],
    'svr__C': [0.1, 1.0, 10.0]
}

# Use GridSearchCV to find the best hyperparameters
grid_search = GridSearchCV(estimator=voting_regressor, param_grid=param_grid, cv=5, n_jobs=-1)

# Train the Voting Regressor with hyperparameter tuning
grid_search.fit(X_train, y_train)

# Make predictions
predictions = grid_search.best_estimator_.predict(X_test)

# Print the predictions
print(predictions)


## Test the examples

In [None]:
import unittest
import numpy as np
from sklearn.datasets import make_regression, load_boston, fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

### test_simple example

In [None]:
X, y = make_regression(n_samples=100, n_features=1, noise=0.1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model1 = LinearRegression()
model2 = DecisionTreeRegressor()
voting_regressor = VotingRegressor(estimators=[('lr', model1), ('dt', model2)])

voting_regressor.fit(X_train, y_train)
predictions = voting_regressor.predict(X_test)

scores = cross_val_score(voting_regressor, X, y, cv=5)

### test_complex example

In [None]:
data = load_boston()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model1 = LinearRegression()
model2 = Ridge(alpha=1.0)
model3 = DecisionTreeRegressor()
model4 = KNeighborsRegressor()
voting_regressor = VotingRegressor(estimators=[('lr', model1), ('ridge', model2), ('dt', model3), ('knn', model4)])

voting_regressor.fit(X_train, y_train)
predictions = voting_regressor.predict(X_test)

scores = cross_val_score(voting_regressor, X, y, cv=5)

### test_very_complex example

In [None]:
data = fetch_california_housing()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model1 = LinearRegression()
model2 = Ridge(alpha=1.0)
model3 = DecisionTreeRegressor()
model4 = KNeighborsRegressor()
model5 = SVR()
voting_regressor = VotingRegressor(estimators=[('lr', model1), ('ridge', model2), ('dt', model3), ('knn', model4), ('svr', model5)])

param_grid = {
    'ridge__alpha': [0.1, 1.0, 10.0],
    'dt__max_depth': [None, 10, 20],
    'knn__n_neighbors': [3, 5, 7],
    'svr__C': [0.1, 1.0, 10.0]
}

grid_search = GridSearchCV(estimator=voting_regressor, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

predictions = grid_search.best_estimator_.predict(X_test)

scores = cross_val_score(grid_search.best_estimator_, X, y, cv=5)

## VotingRegressor with BaggingRegressor

In [None]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import VotingRegressor, BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor

# Load the California housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define base models wrapped with BaggingRegressor
model1 = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=10, random_state=42)
model2 = BaggingRegressor(base_estimator=LinearRegression(), n_estimators=10, random_state=42)
model3 = BaggingRegressor(base_estimator=KNeighborsRegressor(), n_estimators=10, random_state=42)

# Combine the models into a VotingRegressor
voting_regressor = VotingRegressor(estimators=[('bag_dt', model1), ('bag_lr', model2), ('bag_knn', model3)])

# Train the VotingRegressor
voting_regressor.fit(X_train, y_train)

# Evaluate the performance using cross-validation
scores = cross_val_score(voting_regressor, X, y, cv=5)

# Print cross-validation scores
print("Cross-validation scores for VotingRegressor with BaggingRegressor:", scores)

# Make predictions on the test set
predictions = voting_regressor.predict(X_test)

# Print the predictions
print("Predictions:", predictions)


- We use BaggingRegressor with different base regressors (DecisionTree, LinearRegression, KNeighbors).
- These bagged models are combined into a VotingRegressor.
- We evaluate the regressor using cross-validation and print the predictions on the test set.

## BaggingRegressor with VotingRegressor

In [None]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import BaggingRegressor, VotingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor

# Load the California housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define base models for VotingRegressor
model1 = DecisionTreeRegressor()
model2 = LinearRegression()
model3 = KNeighborsRegressor()

# Combine the models into a VotingRegressor
voting_regressor = VotingRegressor(estimators=[('dt', model1), ('lr', model2), ('knn', model3)])

# Use the VotingRegressor as the base estimator for BaggingRegressor
bagging_regressor = BaggingRegressor(base_estimator=voting_regressor, n_estimators=10, random_state=42)

# Train the BaggingRegressor
bagging_regressor.fit(X_train, y_train)

# Evaluate the performance using cross-validation
scores = cross_val_score(bagging_regressor, X, y, cv=5)

# Print cross-validation scores
print("Cross-validation scores for BaggingRegressor with VotingRegressor:", scores)

# Make predictions on the test set
predictions = bagging_regressor.predict(X_test)

# Print the predictions
print("Predictions:", predictions)


- We create a VotingRegressor using DecisionTreeRegressor, LinearRegression, and KNeighborsRegressor.
- We then use this VotingRegressor as the base estimator for a BaggingRegressor.
- We train the BaggingRegressor, evaluate its performance using cross-validation, and make predictions on the test set.