<a href="https://colab.research.google.com/github/vaishak-krishnan/ML-Project-Solar-Panel-Tilt-and-Angle-Prediction/blob/main/ML_Project_Solar_Panel_Tilt_and_Angle_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from google.colab import drive
drive.mount('/content/drive')
# Load the dataset
df = pd.read_csv("/content/drive/MyDrive/solar_radiation_data_kerala_seasons.csv")

# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(df.head())

# Check for missing values
print("\nMissing values in the dataset:")
print(df.isnull().sum())

# Summary statistics
print("\nSummary statistics:")
print(df.describe())

# Explore correlations between variables
correlation_matrix = df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

# Explore distribution of variables
plt.figure(figsize=(15, 10))

plt.subplot(2, 2, 1)
sns.histplot(df['Relative Humidity (%)'], kde=True)
plt.title('Relative Humidity Distribution')

plt.subplot(2, 2, 2)
sns.histplot(df['Solar Zenith Angle'], kde=True)
plt.title('Solar Zenith Angle Distribution')

plt.subplot(2, 2, 3)
sns.histplot(df['Pressure'], kde=True)
plt.title('Pressure Distribution')

plt.subplot(2, 2, 4)
sns.histplot(df['Wind Speed'], kde=True)
plt.title('Wind Speed Distribution')

plt.tight_layout()
plt.show()

# Pre-processing (if required)
# For example, handling categorical variables or scaling numerical variables

# Convert categorical 'Season' variable to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['Season'], drop_first=True)

# Scale numerical variables if needed (using MinMaxScaler or StandardScaler)

# Save pre-processed data to a new CSV file
df.to_csv('preprocessed_solar_radiation_data_kerala.csv', index=False)
print(df.head())

Mounted at /content/drive
First few rows of the dataset:
    Latitude  Longitude  Season  Relative Humidity (%)  Solar Zenith Angle  \
0  12.286790  74.961267  Summer                  70.17              105.57   
1  12.267178  77.044225  Summer                  89.46              125.37   
2  11.945807  77.242545  Autumn                  84.29               85.53   
3   9.338162  74.846201  Summer                  84.40              109.30   
4   9.281156  75.384534  Winter                  70.86              132.42   

   Pressure  Wind Speed  Panel Angle  Capacity  
0     26.48        4.00        32.26    296.24  
1     28.76        3.06        33.21    252.63  
2     29.32        4.32        28.39    269.96  
3     31.55        3.63        27.55    258.97  
4     30.62        3.26        26.61    280.91  

Missing values in the dataset:
Latitude                 0
Longitude                0
Season                   0
Relative Humidity (%)    0
Solar Zenith Angle       0
Pressure     

ValueError: could not convert string to float: 'Summer'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Load the dataset
df = pd.read_csv("/content/drive/MyDrive/preprocessed_solar_radiation_data_kerala.csv")

# Compute correlation with Panel Angle
correlation_panel = df.corr()['Panel Angle'].sort_values(ascending=False)
print(correlation_panel)

Panel Angle              1.000000
Solar Zenith Angle       0.137451
Season_Spring            0.099797
Season_Summer            0.067726
Latitude                -0.016233
Longitude               -0.038334
Wind Speed              -0.041142
Relative Humidity (%)   -0.100716
Season_Winter           -0.104049
Pressure                -0.120670
Capacity                -0.133602
Name: Panel Angle, dtype: float64


In [None]:
from sklearn.ensemble import RandomForestRegressor

# Separate features and target variable
X = df.drop('Panel Angle', axis=1)
y = df['Panel Angle']

# Initialize Random Forest Regressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)

# Fit the model
rf.fit(X, y)

# Feature importance
feature_importance = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)
print(feature_importance)

Wind Speed               0.196905
Solar Zenith Angle       0.173421
Latitude                 0.132270
Pressure                 0.132253
Capacity                 0.128325
Longitude                0.099007
Relative Humidity (%)    0.091347
Season_Spring            0.019406
Season_Summer            0.015923
Season_Winter            0.011142
dtype: float64


In [None]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression  # or any other suitable model

# Initialize Linear Regression model
model = LinearRegression()

# Initialize RFE
rfe = RFE(model, n_features_to_select=7)  # Choose the number of features you want

# Fit RFE
rfe.fit(X, y)

# Selected features
selected_features = X.columns[rfe.support_]
print(selected_features)

Index(['Longitude', 'Relative Humidity (%)', 'Pressure', 'Wind Speed',
       'Season_Spring', 'Season_Summer', 'Season_Winter'],
      dtype='object')


In [None]:
from sklearn.model_selection import train_test_split

# Load the preprocessed dataset
df = pd.read_csv("/content/drive/MyDrive/preprocessed_solar_radiation_data_kerala.csv")

# Separate features and target variable
X = df.drop('Panel Angle', axis=1)
y = df['Panel Angle']

# Split the dataset into training (60%) and testing (40%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Display the shapes of the resulting sets
print("Training set - X shape:", X_train.shape, "y shape:", y_train.shape)
print("Testing set - X shape:", X_test.shape, "y shape:", y_test.shape)


Training set - X shape: (60, 10) y shape: (60,)
Testing set - X shape: (40, 10) y shape: (40,)


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model using the training data
model.fit(X_train, y_train)

# Predict on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared Score:", r2)

Mean Squared Error: 10.8465127614991
R-squared Score: -0.11912370663613125


In [None]:
threshold = 0.5  # Define your threshold for acceptable error

# Calculate absolute errors
absolute_errors = abs(y_test - y_pred)

# Calculate the percentage of predictions within the threshold
within_threshold = sum(1 for error in absolute_errors if error <= threshold)
accuracy_within_threshold = within_threshold / len(y_test) * 100

print(f"Accuracy within {threshold} units: {accuracy_within_threshold:.2f}%")


Accuracy within 0.5 units: 10.00%


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor

# Separate features and target variable
X = df.drop('Panel Angle', axis=1)
y = df['Panel Angle']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Gradient Boosting Regressor
gb_reg = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Fit the model
gb_reg.fit(X_train, y_train)

# Predictions
y_pred_train = gb_reg.predict(X_train)
y_pred_test = gb_reg.predict(X_test)

# Evaluate the model
train_rmse = mean_squared_error(y_train, y_pred_train, squared=False)
test_rmse = mean_squared_error(y_test, y_pred_test, squared=False)
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)

print("Train RMSE:", train_rmse)
print("Test RMSE:", test_rmse)
print("Train R^2 Score:", train_r2)
print("Test R^2 Score:", test_r2)


Train RMSE: 0.5683310817845851
Test RMSE: 3.8752727371272067
Train R^2 Score: 0.9691747476150827
Test R^2 Score: -0.6455633705906905


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor

# Load the preprocessed dataset
df = pd.read_csv('preprocessed_solar_radiation_data_kerala.csv')

# Separate features and target variable
X = df.drop('Panel Angle', axis=1)
y = df['Panel Angle']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define hyperparameters grid for Gradient Boosting Regressor
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7]
}

# Initialize Gradient Boosting Regressor
gb_reg = GradientBoostingRegressor(random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=gb_reg, param_grid=param_grid, cv=5, scoring='r2')

# Fit GridSearchCV to find the best hyperparameters
grid_search.fit(X, y)

# Get the best estimator
best_gb_reg = grid_search.best_estimator_

# Predictions
y_pred_train = best_gb_reg.predict(X_train)
y_pred_test = best_gb_reg.predict(X_test)

# Evaluate the model
train_rmse = mean_squared_error(y_train, y_pred_train, squared=False)
test_rmse = mean_squared_error(y_test, y_pred_test, squared=False)
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)

# Print results
print("Best Parameters:", grid_search.best_params_)
print("Train RMSE:", train_rmse)
print("Test RMSE:", test_rmse)
print("Train R^2 Score:", train_r2)
print("Test R^2 Score:", test_r2)


Best Parameters: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50}
Train RMSE: 2.8046268934309184
Test RMSE: 2.7041733949119084
Train R^2 Score: 0.24932143822743247
Test R^2 Score: 0.19872953134712557


In [None]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

# Initialize the Ridge Regression model
ridge = Ridge()

# Define a range of alpha values (regularization strength)
alphas = [0.01, 0.1, 1, 10, 100]  # Adjust this list as needed

# Hyperparameter grid
param_grid = {'alpha': alphas}

# Perform GridSearchCV to find the best alpha
grid_search = GridSearchCV(ridge, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best alpha and refit the model with the optimal parameter
best_alpha = grid_search.best_params_['alpha']
best_ridge = Ridge(alpha=best_alpha)
best_ridge.fit(X_train, y_train)

# Predict on the testing set
y_pred_ridge = best_ridge.predict(X_test)

# Evaluate the model
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

print("Best alpha:", best_alpha)
print("Mean Squared Error (Ridge):", mse_ridge)
print("R-squared Score (Ridge):", r2_ridge)

Best alpha: 100
Mean Squared Error (Ridge): 9.817065629477185
R-squared Score (Ridge): -0.0757014644845222


In [None]:
# Example latitude and longitude values (Kovalam)
new_latitude = 8.383230
new_longitude = 76.983566

# Create a DataFrame with new values for Latitude and Longitude
new_data = pd.DataFrame({
    'Latitude': [new_latitude],
    'Longitude': [new_longitude],
    'Capacity': [250],  # Placeholder value for Capacity
    'Pressure': [30.0],  # Placeholder value for Pressure
    'Relative Humidity (%)': [80],  # Placeholder value for Relative Humidity
    'Season_Spring': [0],  # Placeholder value for Season_Spring (0 or 1)
    'Season_Summer': [1],  # Placeholder value for Season_Summer (0 or 1)
    'Season_Winter': [0],  # Placeholder value for Season_Winter (0 or 1)
    'Solar Zenith Angle': [120.0],  # Placeholder value for Solar Zenith Angle
    'Wind Speed': [3.0]  # Placeholder value for Wind Speed
})

# Rearrange columns in new_data to match the order of features used during training
new_data = new_data[X_test.columns]

# Use the trained Ridge Regression model to predict Panel Angle for the new data
predicted_panel_angle = best_ridge.predict(new_data)

print("Predicted Panel Angle:", predicted_panel_angle[0])

Predicted Panel Angle: 30.82318876487257


In [None]:
# Example 2  latitude and longitude values (Edappaly)
new_latitude = 10.0261
new_longitude = 76.3125

# Create a DataFrame with new values for Latitude and Longitude
new_data = pd.DataFrame({
    'Latitude': [new_latitude],
    'Longitude': [new_longitude],
    'Capacity': [250],  # Placeholder value for Capacity
    'Pressure': [30.0],  # Placeholder value for Pressure
    'Relative Humidity (%)': [80],  # Placeholder value for Relative Humidity
    'Season_Spring': [0],  # Placeholder value for Season_Spring (0 or 1)
    'Season_Summer': [1],  # Placeholder value for Season_Summer (0 or 1)
    'Season_Winter': [0],  # Placeholder value for Season_Winter (0 or 1)
    'Solar Zenith Angle': [120.0],  # Placeholder value for Solar Zenith Angle
    'Wind Speed': [3.0]  # Placeholder value for Wind Speed
})

# Rearrange columns in new_data to match the order of features used during training
new_data = new_data[X_test.columns]

# Use the trained Ridge Regression model to predict Panel Angle for the new data
predicted_panel_angle = best_ridge.predict(new_data)

print("Predicted Panel Angle:", predicted_panel_angle[0])

Predicted Panel Angle: 30.79290926225083


In [None]:
# Example 3  latitude and longitude values (Kasaragod)
new_latitude = 12.4996
new_longitude = 74.9869

# Create a DataFrame with new values for Latitude and Longitude
new_data = pd.DataFrame({
    'Latitude': [new_latitude],
    'Longitude': [new_longitude],
    'Capacity': [250],  # Placeholder value for Capacity
    'Pressure': [30.0],  # Placeholder value for Pressure
    'Relative Humidity (%)': [80],  # Placeholder value for Relative Humidity
    'Season_Spring': [0],  # Placeholder value for Season_Spring (0 or 1)
    'Season_Summer': [1],  # Placeholder value for Season_Summer (0 or 1)
    'Season_Winter': [0],  # Placeholder value for Season_Winter (0 or 1)
    'Solar Zenith Angle': [120.0],  # Placeholder value for Solar Zenith Angle
    'Wind Speed': [3.0]  # Placeholder value for Wind Speed
})

# Rearrange columns in new_data to match the order of features used during training
new_data = new_data[X_test.columns]

# Use the trained Ridge Regression model to predict Panel Angle for the new data
predicted_panel_angle = best_ridge.predict(new_data)

print("Predicted Panel Angle:", predicted_panel_angle[0])

Predicted Panel Angle: 30.763028713641603


In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Define categories for Panel Angle (for demonstration purposes)
# You should adapt this based on your actual data distribution
# For example, you can use pandas' qcut to create categories
bins = [-float('inf'), 250, 300, float('inf')]
labels = ['Low', 'Medium', 'High']
df['Panel Angle Category'] = pd.cut(df['Panel Angle'], bins=bins, labels=labels)

# Separate features and target variable
X = df.drop(['Panel Angle', 'Panel Angle Category'], axis=1)
y = df['Panel Angle Category']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Initialize and train the Multinomial Naive Bayes classifier
model_nb = MultinomialNB()
model_nb.fit(X_train, y_train)

# Predict on the testing set
y_pred_nb = model_nb.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_nb)
print("Accuracy using MultinomialNB:", accuracy)

Accuracy using MultinomialNB: 1.0
