In [1]:
import joblib
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor


from sklearn.metrics import mean_squared_error, r2_score

import pandas as pd

## Data for training our model

In [2]:
data_cleaned = pd.read_csv('NO2_07112024_NoOutliers.csv')

data = data_cleaned[['Timestamp', 'Value']]
data['Timestamp'] = pd.to_datetime(data['Timestamp'])

# Check for missing values
data = data.dropna()

# Extract features and target variable
X = data.index.astype(int).values.reshape(-1, 1)  # Use timestamp as a feature
y = data['Value'].values

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Timestamp'] = pd.to_datetime(data['Timestamp'])


## Pickle for Random Forest

In [6]:

# Initialize the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, max_depth=None, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)


# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Squared Error: 2.80162171370069
R^2 Score: 0.7480909854987039


In [7]:

# Creating pickle for our trained Random Forest model
joblib.dump(rf_model, 'rf_model.pkl')

['rf_model.pkl']

## Pickle for GradientBoostRegressor

In [3]:

# Initialize the Gradient Boosting Regressor
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Train the model
gb_model.fit(X_train, y_train)




# Make predictions
y_pred = gb_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")


Mean Squared Error: 4.001585054084567
R^2 Score: 0.6401957685836054


In [5]:

# Creating pickle for our trained Gradient Boosting Regressor model
joblib.dump(gb_model, 'gb_model.pkl')

['gb_model.pkl']