In [1]:
import numpy as np
import pandas as pd 

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import tqdm

In [2]:
df = pd.read_csv('/kaggle/input/clean-data-australia-rainfall/rainfall_data_australia.csv', index_col=None)

In [3]:
df.shape

(351587, 18)

In [4]:
df.head()

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2,2
1,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2,2
2,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2,2
3,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2,2
4,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2,2


In [5]:
df.isna().any()

MinTemp           True
MaxTemp           True
Rainfall         False
Evaporation       True
Sunshine         False
WindGustSpeed    False
WindSpeed9am      True
WindSpeed3pm      True
Humidity9am      False
Humidity3pm       True
Pressure9am       True
Pressure3pm       True
Cloud9am         False
Cloud3pm         False
Temp9am          False
Temp3pm           True
RainToday        False
RainTomorrow     False
dtype: bool

In [6]:
df = df.dropna()

In [7]:
df.isna().any()

MinTemp          False
MaxTemp          False
Rainfall         False
Evaporation      False
Sunshine         False
WindGustSpeed    False
WindSpeed9am     False
WindSpeed3pm     False
Humidity9am      False
Humidity3pm      False
Pressure9am      False
Pressure3pm      False
Cloud9am         False
Cloud3pm         False
Temp9am          False
Temp3pm          False
RainToday        False
RainTomorrow     False
dtype: bool

In [8]:
df.columns

Index(['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine',
       'WindGustSpeed', 'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am',
       'Humidity3pm', 'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm',
       'Temp9am', 'Temp3pm', 'RainToday', 'RainTomorrow'],
      dtype='object')

In [9]:
df['RainTomorrow'].unique()

array([2, 1, 3])

In [10]:
x_cols = ['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine',
       'WindGustSpeed', 'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am',
       'Humidity3pm', 'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm',
       'Temp9am', 'Temp3pm', 'RainToday']

In [11]:
x = df[x_cols]

y = df[['RainTomorrow']]

In [12]:
x

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday
0,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2
1,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2
2,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2
3,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2
4,17.9,35.2,0.0,12.0,12.3,48,6.0,20.0,20,13.0,1006.3,1004.4,2,5,26.6,33.4,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351582,20.2,31.7,0.0,5.6,10.7,30,15.0,7.0,73,32.0,1013.9,1009.7,6,5,25.4,31.0,2
351583,20.2,31.7,0.0,5.6,10.7,30,15.0,7.0,73,32.0,1013.9,1009.7,6,5,25.4,31.0,2
351584,20.2,31.7,0.0,5.6,10.7,30,15.0,7.0,73,32.0,1013.9,1009.7,6,5,25.4,31.0,2
351585,20.2,31.7,0.0,5.6,10.7,30,15.0,7.0,73,32.0,1013.9,1009.7,6,5,25.4,31.0,2


In [13]:
x.shape

(349416, 17)

In [14]:
y

Unnamed: 0,RainTomorrow
0,2
1,2
2,2
3,2
4,2
...,...
351582,2
351583,2
351584,2
351585,2


In [15]:
scaler = MinMaxScaler().fit(x)

x = scaler.transform(x)

In [16]:
x_ = pd.DataFrame(x)

In [17]:
X = torch.tensor(x_.values, dtype=torch.float32)

In [18]:
y = torch.tensor(y.values, dtype=torch.float32)

In [19]:
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True)

In [20]:
class Multiclass(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(17, 34)
        self.act = nn.ReLU()
        self.output = nn.Linear(34, 3)
        
    def forward(self, x):
        x = self.act(self.hidden(x))
        x = self.output(x)
        
        return x

In [21]:
model = Multiclass()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [22]:
n_epochs = 200
batch_size = 5
batches_per_epoch = len(x_train)

best_acc = -np.inf
best_weights = None
train_loss_hist = []
train_acc_hist = []
test_loss_hist = []
test_acc_hist = []