<a href="https://colab.research.google.com/github/vidhi2919/Farmify/blob/main/Biogas_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv('/content/biogas_prediction_tool_dataset.csv')

In [None]:
data.head()

Unnamed: 0,waste_type,crop_residue_weight_kg,manure_volume_liters,temperature_C,ph_level,retention_time_days,biogas_output_m3
0,0,496,54,42.476684,7.861214,30,65.821963
1,1,194,23,34.310675,6.994625,39,64.94864
2,0,250,39,30.223322,6.460502,38,58.012721
3,0,466,77,29.451992,6.089128,10,51.339301
4,0,261,27,55.263114,6.831284,29,51.815022


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
X = data.drop('biogas_output_m3', axis=1)
y = data['biogas_output_m3']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

In [None]:
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)


In [None]:
def evaluate_model(y_test, y_pred, model_name):
    print(f'{model_name} - Mean Squared Error: {mean_squared_error(y_test, y_pred)}')
    print(f'{model_name} - R-squared: {r2_score(y_test, y_pred)}')


evaluate_model(y_test, y_pred_dt, "Decision Tree")
evaluate_model(y_test, y_pred_rf, "Random Forest")
evaluate_model(y_test, y_pred_lr, "Linear Regression")

Decision Tree - Mean Squared Error: 42.82419459106673
Decision Tree - R-squared: 0.7801914713971582
Random Forest - Mean Squared Error: 18.742755475326124
Random Forest - R-squared: 0.9037969647220475
Linear Regression - Mean Squared Error: 9.00989309669435
Linear Regression - R-squared: 0.9537539149687495


We will be using LR as it shows the least mean-squared error i.e 9.009% and highest R-squared i.e 95.375 %

In [None]:
new_data = pd.DataFrame({
    'waste_type': [1],
    'crop_residue_weight_kg': [500],
    'manure_volume_liters': [200],
    'temperature_C': [35],
    'ph_level': [7.0],
    'retention_time_days': [20]
})
predicted_biogas = lr_model.predict(new_data)

print(f'Predicted biogas production (m^3): {predicted_biogas[0]}')

Predicted biogas production (m^3): 113.85186205062965


In [None]:
import pickle

In [None]:
with open('linear_regression_model.pkl', 'wb') as model_file:
    pickle.dump(lr_model, model_file)

print("Linear Regression model saved as linear_regression_model.pkl")

Linear Regression model saved as linear_regression_model.pkl


In [None]:
import pickle

# Assuming 'model' is your trained linear regression model
with open('linear_regression_model.pkl', 'wb') as f:
    pickle.dump(lr_model, f)

In [None]:
from google.colab import files

files.download('/content/linear_regression_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
print("Model Coefficients:", model.coef_)
print("Number of Features Expected:", len(model.coef_))


Model Coefficients: [15.20742851  0.01968812  0.29659487  0.499904   -2.23684636  0.5187981 ]
Number of Features Expected: 6


In [None]:
from joblib import dump, load

# Save a model
dump(lr_model, 'model.joblib')

# Load a model
model = load('model.joblib')


In [None]:
print("Input DataFrame shape:", new_data.shape)  # Should be (1, 6)
print("Input DataFrame:", new_data)


Input DataFrame shape: (1, 6)
Input DataFrame:    waste_type  crop_residue_weight_kg  manure_volume_liters  temperature_C  \
0           1                     500                   200             35   

   ph_level  retention_time_days  
0       7.0                   20  
