In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import joblib


In [None]:
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv'
data = pd.read_csv(url)

df.head()


In [None]:
print(df.describe())
print(df.info())

In [None]:
print(df.isnull().sum())

In [None]:

df.drop_duplicates(inplace=True)
df.fillna(df.mean(), inplace=True)


In [None]:


df.hist(figsize=(10, 8))
plt.show()

sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()

In [None]:

X = df.drop(['next_day_max_temp', 'next_day_min_temp'], axis=1)
y_max = df['next_day_max_temp']
y_min = df['next_day_min_temp']

In [None]:

X_train_max, X_test_max, y_train_max, y_test_max = train_test_split(X, y_max, test_size=0.2, random_state=42)
X_train_min, X_test_min, y_train_min, y_test_min = train_test_split(X, y_min, test_size=0.2, random_state=42)

In [None]:

scaler = StandardScaler()
X_train_max = scaler.fit_transform(X_train_max)
X_test_max = scaler.transform(X_test_max)
X_train_min = scaler.fit_transform(X_train_min)
X_test_min = scaler.transform(X_test_min)


# Linear Regression for Max Temperature
linear_model_max = LinearRegression()
linear_model_max.fit(X_train_max, y_train_max)
y_pred_lin_max = linear_model_max.predict(X_test_max)

print("Linear Regression (Max Temp) - MAE:", mean_absolute_error(y_test_max, y_pred_lin_max))
print("Linear Regression (Max Temp) - MSE:", mean_squared_error(y_test_max, y_pred_lin_max))
print("Linear Regression (Max Temp) - R2:", r2_score(y_test_max, y_pred_lin_max))

In [None]:

# Decision Tree Regressor for Max Temperature
tree_model_max = DecisionTreeRegressor()
tree_model_max.fit(X_train_max, y_train_max)
y_pred_tree_max = tree_model_max.predict(X_test_max)

print("Decision Tree (Max Temp) - MAE:", mean_absolute_error(y_test_max, y_pred_tree_max))
print("Decision Tree (Max Temp) - MSE:", mean_squared_error(y_test_max, y_pred_tree_max))
print("Decision Tree (Max Temp) - R2:", r2_score(y_test_max, y_pred_tree_max))

In [None]:
# Random Forest Regressor for Max Temperature
forest_model_max = RandomForestRegressor()
forest_model_max.fit(X_train_max, y_train_max)
y_pred_forest_max = forest_model_max.predict(X_test_max)

print("Random Forest (Max Temp) - MAE:", mean_absolute_error(y_test_max, y_pred_forest_max))
print("Random Forest (Max Temp) - MSE:", mean_squared_error(y_test_max, y_pred_forest_max))
print("Random Forest (Max Temp) - R2:", r2_score(y_test_max, y_pred_forest_max))


In [None]:

# Linear Regression for Min Temperature
linear_model_min = LinearRegression()
linear_model_min.fit(X_train_min, y_train_min)
y_pred_lin_min = linear_model_min.predict(X_test_min)

print("Linear Regression (Min Temp) - MAE:", mean_absolute_error(y_test_min, y_pred_lin_min))
print("Linear Regression (Min Temp) - MSE:", mean_squared_error(y_test_min, y_pred_lin_min))
print("Linear Regression (Min Temp) - R2:", r2_score(y_test_min, y_pred_lin_min))

In [None]:


# Decision Tree Regressor for Min Temperature
tree_model_min = DecisionTreeRegressor()
tree_model_min.fit(X_train_min, y_train_min)
y_pred_tree_min = tree_model_min.predict(X_test_min)

print("Decision Tree (Min Temp) - MAE:", mean_absolute_error(y_test_min, y_pred_tree_min))
print("Decision Tree (Min Temp) - MSE:", mean_squared_error(y_test_min, y_pred_tree_min))
print("Decision Tree (Min Temp) - R2:", r2_score(y_test_min, y_pred_tree_min))

In [None]:


# Random Forest Regressor for Min Temperature
forest_model_min = RandomForestRegressor()
forest_model_min.fit(X_train_min, y_train_min)
y_pred_forest_min = forest_model_min.predict(X_test_min)

print("Random Forest (Min Temp) - MAE:", mean_absolute_error(y_test_min, y_pred_forest_min))
print("Random Forest (Min Temp) - MSE:", mean_squared_error(y_test_min, y_pred_forest_min))
print("Random Forest (Min Temp) - R2:", r2_score(y_test_min, y_pred_forest_min))


In [None]:

# Hyperparameter tuning for Random Forest Regressor (Max Temp)
param_grid_max = {'n_estimators': [50, 100, 200], 'max_depth': [3, 5, 7]}
grid_max = GridSearchCV(RandomForestRegressor(), param_grid_max, cv=5)
grid_max.fit(X_train_max, y_train_max)

print("Best parameters for Random Forest Regressor (Max Temp):", grid_max.best_params_)


In [None]:

# Hyperparameter tuning for Random Forest Regressor (Min Temp)
param_grid_min = {'n_estimators': [50, 100, 200], 'max_depth': [3, 5, 7]}
grid_min = GridSearchCV(RandomForestRegressor(), param_grid_min, cv=5)
grid_min.fit(X_train_min, y_train_min)

print("Best parameters for Random Forest Regressor (Min Temp):", grid_min.best_params_)


In [None]:

joblib.dump(grid_max.best_estimator_, 'best_max_temp_model.pkl')
joblib.dump(grid_min.best_estimator_, 'best_min_temp_model.pkl')
