# Random Forest Prediction 


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.metrics import confusion_matrix

In [16]:
# read in dataset with date column parsed
df = pd.read_csv('cleanedWeatherAUS.csv',
    parse_dates=['Date'],
    index_col='Date')

# drop samples with missing target values
df = df[df['RainTomorrow'].notna()]

# impute missing feature values
imputer = SimpleImputer(strategy='mean')
imputed_df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns, index=df.index)


In [17]:
# split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(imputed_df.drop('RainTomorrow', axis=1), imputed_df['RainTomorrow'], test_size=0.2, random_state=42)


In [18]:
# Initialize the random forest classifier with hyperparameters
rfc = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)

# Train the random forest classifier on the training data
rfc.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = rfc.predict(X_test)


# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy Score:', accuracy)

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:', cm)


Accuracy Score: 0.8176818369311151
Confusion Matrix: [[21416   596     0]
 [ 4198  2219     3]
 [  454    53   153]]


In [24]:
# Calculate the precision, recall, and F1 score
from sklearn.metrics import precision_recall_fscore_support
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average="weighted")
print('Precision:', precision)
print('Recall:', recall)
print('F1 Score:', f1)

# Micro averaging for multiclass classification
from sklearn.metrics import roc_auc_score
y_prob = rfc.predict_proba(X_test)
auc = roc_auc_score(pd.get_dummies(y_test).values.ravel(), y_prob.ravel())
print('AUC-ROC Score (micro):', auc)

Precision: 0.8146000343951012
Recall: 0.8176818369311151
F1 Score: 0.7879990680372982
AUC-ROC Score (micro): 0.9432782642978333
