In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical


In [3]:
df = pd.read_csv('weather_data_09_to_16.csv')

# Drop rows with missing target
df = df.dropna(subset=['RainTomorrow'])

# Binary encode target: Yes -> 1, No -> 0
df['RainTomorrow'] = df['RainTomorrow'].map({'No': 0, 'Yes': 1})

# Select numeric + useful features
features = ['MinTemp', 'MaxTemp', 'Rainfall', 'Humidity9am', 'Humidity3pm', 'Pressure9am', 'Pressure3pm', 'Temp9am', 'Temp3pm']
df = df.dropna(subset=features)

X = df[features]
y = df['RainTomorrow']

In [5]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reshape to [samples, time steps, features]; here using 1 timestep
X_reshaped = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Split
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense

model = Sequential()

# First LSTM layer: use more units and return full sequence
model.add(LSTM(128, input_shape=(1, 9), return_sequences=True))
model.add(Dropout(0.3))

# Second LSTM layer: compress sequence to single vector
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))

# Dense layer before output
model.add(Dense(32, activation='relu'))

# Output layer
model.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Model summary
model.summary()



  super().__init__(**kwargs)


In [9]:
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/30
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8012 - loss: 0.4758 - val_accuracy: 0.8398 - val_loss: 0.3826
Epoch 2/30
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8406 - loss: 0.3799 - val_accuracy: 0.8403 - val_loss: 0.3772
Epoch 3/30
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8382 - loss: 0.3843 - val_accuracy: 0.8395 - val_loss: 0.3743
Epoch 4/30
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8406 - loss: 0.3763 - val_accuracy: 0.8410 - val_loss: 0.3738
Epoch 5/30
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8430 - loss: 0.3807 - val_accuracy: 0.8392 - val_loss: 0.3746
Epoch 6/30
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8427 - loss: 0.3769 - val_accuracy: 0.8412 - val_loss: 0.3728
Epoch 7/30
[1m731/731[0m 

In [11]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 725us/step - accuracy: 0.8372 - loss: 0.3759
Test Accuracy: 0.8424


In [13]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred = (model.predict(X_test) > 0.5).astype("int32")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[[4365  211]
 [ 710  558]]
              precision    recall  f1-score   support

           0       0.86      0.95      0.90      4576
           1       0.73      0.44      0.55      1268

    accuracy                           0.84      5844
   macro avg       0.79      0.70      0.73      5844
weighted avg       0.83      0.84      0.83      5844



In [17]:
y_probs = model.predict(X_test).flatten()  # Predict probabilities for test set
from sklearn.metrics import roc_auc_score

# y_probs = model.predict(X_test).flatten()  # Already predicted probabilities
auc_score = roc_auc_score(y_test, y_probs)
print(f"AUC: {auc_score:.4f}")

[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 689us/step
AUC: 0.8557


In [None]:
# 7 days timestep

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load data
df = pd.read_csv('weather_data_09_to_16.csv')

# Target column
df = df.dropna(subset=['RainTomorrow'])
df['RainTomorrow'] = df['RainTomorrow'].map({'No': 0, 'Yes': 1})

# Feature columns
features = ['MinTemp', 'MaxTemp', 'Rainfall', 'Humidity9am', 'Humidity3pm',
            'Pressure9am', 'Pressure3pm', 'Temp9am', 'Temp3pm']
df = df.dropna(subset=features)
df = df.reset_index(drop=True)


In [None]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(df[features])


In [None]:
X_seq = []
y_seq = []

sequence_length = 7  # 7 days input

for i in range(len(X_scaled) - sequence_length):
    X_seq.append(X_scaled[i:i+sequence_length])  # shape (7, 9)
    y_seq.append(df['RainTomorrow'].iloc[i + sequence_length])  # 8th day target

X_seq = np.array(X_seq)  # shape: (samples, 7, 9)
y_seq = np.array(y_seq)  # shape: (samples,)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)


In [None]:
model = Sequential()

# First LSTM layer: returns full sequence to stack
model.add(LSTM(128, input_shape=(7, 9), return_sequences=True))
model.add(Dropout(0.3))

# Second LSTM layer: compresses sequence to 64-dim vector
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))

# Dense hidden layer before output
model.add(Dense(32, activation='relu'))

# Final output layer for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Model summary
model.summary()


In [None]:
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))


In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

# Predict probabilities
y_pred_prob = model.predict(X_test)

# Convert probabilities to class labels
y_pred = (y_pred_prob > 0.5).astype("int32").flatten()

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['No Rain', 'Rain']))


In [None]:
# ===== 1. Import Libraries =====
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


In [None]:
# ===== 2. Load and Preprocess Data =====
df = pd.read_csv('weather_data_09_to_16.csv')

# Drop rows where target is missing
df = df.dropna(subset=['RainTomorrow'])

# Convert target to binary: Yes -> 1, No -> 0
df['RainTomorrow'] = df['RainTomorrow'].map({'No': 0, 'Yes': 1})

# Select numeric input features
features = ['MinTemp', 'MaxTemp', 'Rainfall', 'Humidity9am', 'Humidity3pm',
            'Pressure9am', 'Pressure3pm', 'Temp9am', 'Temp3pm']

# Drop rows with missing features
df = df.dropna(subset=features)
df = df.reset_index(drop=True)


In [None]:
# ===== 3. Normalize Features =====
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(df[features])


In [None]:
# ===== 4. Create 14-Day Rolling Sequences =====
X_seq = []
y_seq = []

sequence_length = 14  # 14 timesteps (days)

for i in range(len(X_scaled) - sequence_length):
    X_seq.append(X_scaled[i:i+sequence_length])  # shape: (14, 9)
    y_seq.append(df['RainTomorrow'].iloc[i + sequence_length])  # label: day 15

X_seq = np.array(X_seq)  # shape: (samples, 14, 9)
y_seq = np.array(y_seq)  # shape: (samples,)


In [None]:
# ===== 5. Train/Test Split =====
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)


In [None]:
# ===== 6. Build LSTM Model =====
model = Sequential()
model.add(LSTM(128, input_shape=(14, 9), return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


In [None]:
# ===== 7. Train the Model =====
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))


In [None]:
# ===== 8. Evaluate & Report =====
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Predictions
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32").flatten()

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['No Rain', 'Rain']))
