In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the data

In [105]:
data = pd.read_csv("Weather_Data.csv")

# Inspect the data

In [4]:
data.head()

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,"Freezing Drizzle,Fog"
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,"Freezing Drizzle,Fog"
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog


In [5]:
data.count()

Date/Time           8784
Temp_C              8784
Dew Point Temp_C    8784
Rel Hum_%           8784
Wind Speed_km/h     8784
Visibility_km       8784
Press_kPa           8784
Weather             8784
dtype: int64

In [9]:
data.Weather.unique()

array(['Fog', 'Freezing Drizzle,Fog', 'Mostly Cloudy', 'Cloudy', 'Rain',
       'Rain Showers', 'Mainly Clear', 'Snow Showers', 'Snow', 'Clear',
       'Freezing Rain,Fog', 'Freezing Rain', 'Freezing Drizzle',
       'Rain,Snow', 'Moderate Snow', 'Freezing Drizzle,Snow',
       'Freezing Rain,Snow Grains', 'Snow,Blowing Snow', 'Freezing Fog',
       'Haze', 'Rain,Fog', 'Drizzle,Fog', 'Drizzle',
       'Freezing Drizzle,Haze', 'Freezing Rain,Haze', 'Snow,Haze',
       'Snow,Fog', 'Snow,Ice Pellets', 'Rain,Haze', 'Thunderstorms,Rain',
       'Thunderstorms,Rain Showers', 'Thunderstorms,Heavy Rain Showers',
       'Thunderstorms,Rain Showers,Fog', 'Thunderstorms',
       'Thunderstorms,Rain,Fog',
       'Thunderstorms,Moderate Rain Showers,Fog', 'Rain Showers,Fog',
       'Rain Showers,Snow Showers', 'Snow Pellets', 'Rain,Snow,Fog',
       'Moderate Rain,Fog', 'Freezing Rain,Ice Pellets,Fog',
       'Drizzle,Ice Pellets,Fog', 'Drizzle,Snow', 'Rain,Ice Pellets',
       'Drizzle,Snow,Fog', 

In [7]:
data.nunique()

Date/Time           8784
Temp_C               533
Dew Point Temp_C     489
Rel Hum_%             83
Wind Speed_km/h       34
Visibility_km         24
Press_kPa            518
Weather               50
dtype: int64

In [12]:
data.Weather.value_counts()

Weather
Mainly Clear                               2106
Mostly Cloudy                              2069
Cloudy                                     1728
Clear                                      1326
Snow                                        390
Rain                                        306
Rain Showers                                188
Fog                                         150
Rain,Fog                                    116
Drizzle,Fog                                  80
Snow Showers                                 60
Drizzle                                      41
Snow,Fog                                     37
Snow,Blowing Snow                            19
Rain,Snow                                    18
Thunderstorms,Rain Showers                   16
Haze                                         16
Drizzle,Snow,Fog                             15
Freezing Rain                                14
Freezing Drizzle,Snow                        11
Freezing Drizzle                

# Handle missing values

In [106]:
data = data.dropna()

In [107]:
# Replace data
data['Weather'] = data['Weather'].replace({'Freezing Rain,Snow Grains': 'Rain',
                                           'Thunderstorms,Heavy Rain Showers': 'Rain',
                                           'Rain,Snow Grains': 'Rain',
                                           'Rain,Ice Pellets': 'Rain',
                                           'Thunderstorms,Rain,Fog': 'Rain',
                                           'Drizzle,Ice Pellets,Fog': 'Fog',
                                           'Freezing Rain,Ice Pellets,Fog': 'Rain',
                                           'Moderate Rain,Fog': 'Rain',
                                           'Rain,Snow,Fog': 'Rain',
                                           'Snow Pellets': 'Snow',
                                           'Thunderstorms,Moderate Rain Showers,Fog': 'Rain',
                                           'Rain Showers,Fog': 'Rain',
                                           'Moderate Snow,Blowing Snow': 'Snow',
                                           'Thunderstorms': 'Rain',
                                           'Rain Showers,Snow Showers': 'Rain',
                                           'Drizzle,Snow': 'Snow',
                                           'Freezing Rain,Haze': 'Rain',
                                           'Thunderstorms,Rain Showers,Fog': 'Rain',
                                           'Thunderstorms,Rain': 'Rain',
                                           'Rain,Haze': 'Rain',
                                           'Freezing Drizzle,Haze': 'Fog',
                                           'Freezing Rain,Fog': 'Rain',
                                           'Rain,Snow,Ice Pellets': 'Rain',
                                           'Moderate Snow': 'Snow',
                                           'Snow Showers,Fog': 'Snow',
                                           'Freezing Fog': 'Fog',
                                           'Snow,Haze': 'Snow',
                                           'Freezing Drizzle,Fog': 'Fog',
                                           'Snow,Ice Pellets': 'Snow',
                                           'Freezing Drizzle': 'Rain',
                                           'Freezing Drizzle,Snow': 'Snow',
                                           'Rain Showers': 'Rain',
                                           'Rain,Fog': 'Fog',
                                           'Drizzle,Fog': 'Fog',
                                           'Snow Showers': 'Snow',
                                           'Drizzle': 'Fog',
                                           'Snow,Blowing Snow': 'Snow',
                                           'Snow,Fog': 'Snow',
                                           'Rain,Snow': 'Rain',
                                           'Thunderstorms,Rain Showers': 'Rain',
                                           'Freezing Rain': 'Rain',
                                           'Haze': 'Fog',
                                           'Drizzle,Snow,Fog': 'Snow',
                                           'Mainly Clear': 'Clear' ,
                                           'Mostly Cloudy': 'Cloudy'
                                            })

In [109]:
data.Weather.value_counts()

Weather
Cloudy    3797
Clear     3432
Rain       582
Snow       556
Fog        417
Name: count, dtype: int64

In [110]:
dataLE = data

In [111]:
dataLE.head()

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,Fog
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,Fog
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog


# One-hot encode the 'Weather' column

In [61]:
data_one_hot = pd.get_dummies(data['Weather'])
data = pd.concat([data, data_one_hot], axis=1)

In [62]:
data.head()

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather,Clear,Cloudy,Fog,Rain,Snow
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,Fog,False,False,True,False,False
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,Fog,False,False,True,False,False
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,Fog,False,False,True,False,False
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,Fog,False,False,True,False,False
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,Fog,False,False,True,False,False


# Drop the original 'Weather' column and any other columns you don't need

In [63]:
data = data.drop(['Weather', 'Date/Time'], axis=1)

# Feature and target variables

In [64]:
X = data.drop(data_one_hot.columns, axis=1)
y = data[data_one_hot.columns]

# Split the data into training and testing sets

In [96]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=34)

# Train a RandomForest model

In [97]:
model = RandomForestClassifier(n_estimators=200, random_state=34)
model.fit(X_train, y_train)

# Make predictions

In [98]:
y_pred = model.predict(X_test)

In [104]:
print("Predicted values:")
print(y_pred)
print("Actual values:")
print(y_test)

Predicted values:
[[ True False False False False]
 [False  True False False False]
 [False False False False False]
 ...
 [False  True False False False]
 [False  True False False False]
 [False  True False False False]]
Actual values:
      Clear  Cloudy    Fog   Rain   Snow
4847   True   False  False  False  False
4767  False    True  False  False  False
3693  False    True  False  False  False
619   False    True  False  False  False
2791  False    True  False  False  False
...     ...     ...    ...    ...    ...
7226  False    True  False  False  False
4683   True   False  False  False  False
4095  False    True  False  False  False
1111  False    True  False  False  False
7270  False    True  False  False  False

[1757 rows x 5 columns]


# Evaluate the model

In [99]:
accuracy = accuracy_score(y_test.values.argmax(axis=1), y_pred.argmax(axis=1))
print("Accuracy:", accuracy)

Accuracy: 0.7211155378486056


# Print classification report

In [100]:
print(classification_report(y_test.values.argmax(axis=1), y_pred.argmax(axis=1), target_names=data_one_hot.columns))

              precision    recall  f1-score   support

       Clear       0.67      0.78      0.73       675
      Cloudy       0.73      0.72      0.72       755
         Fog       0.85      0.85      0.85        99
        Rain       0.81      0.34      0.48       116
        Snow       0.93      0.66      0.77       112

    accuracy                           0.72      1757
   macro avg       0.80      0.67      0.71      1757
weighted avg       0.73      0.72      0.72      1757



# **LABEL ENCODER**

# Encode categorical variables (Weather)

In [112]:
le = LabelEncoder()
dataLE['Weather'] = le.fit_transform(dataLE['Weather'])

In [113]:
dataLE.head()

Unnamed: 0,Date/Time,Temp_C,Dew Point Temp_C,Rel Hum_%,Wind Speed_km/h,Visibility_km,Press_kPa,Weather
0,1/1/2012 0:00,-1.8,-3.9,86,4,8.0,101.24,2
1,1/1/2012 1:00,-1.8,-3.7,87,4,8.0,101.24,2
2,1/1/2012 2:00,-1.8,-3.4,89,7,4.0,101.26,2
3,1/1/2012 3:00,-1.5,-3.2,88,6,4.0,101.27,2
4,1/1/2012 4:00,-1.5,-3.3,88,7,4.8,101.23,2


# Feature and target variables

In [114]:
Xle = dataLE.drop(['Date/Time', 'Weather'], axis=1)
yle = dataLE['Weather']

# Split the data

In [122]:
Xle_train, Xle_test, yle_train, yle_test = train_test_split(Xle, yle, test_size=0.2, random_state=34)

# Train a RandomForest model

In [126]:
modelle = RandomForestClassifier(n_estimators=2000, random_state=34)
modelle.fit(Xle_train, yle_train)

# Make predictions

In [127]:
yle_pred = modelle.predict(Xle_test)

# Evaluate the model

In [128]:
print("Accuracy:", accuracy_score(yle_test, yle_pred))
print(classification_report(yle_test, yle_pred, target_names=le.classes_))

Accuracy: 0.7450199203187251
              precision    recall  f1-score   support

       Clear       0.76      0.76      0.76       675
      Cloudy       0.71      0.78      0.74       755
         Fog       0.84      0.88      0.86        99
        Rain       0.73      0.35      0.48       116
        Snow       0.87      0.73      0.80       112

    accuracy                           0.75      1757
   macro avg       0.78      0.70      0.73      1757
weighted avg       0.75      0.75      0.74      1757

