In [None]:
import pandas as pd
import numpy as np
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Input, Dense
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dense, Add

In [None]:
data = pd.read_csv('city_temperature.csv', low_memory = False)
data = data[(data['Country'] == 'US')]
data = data[['State', 'City', 'Month', 'Day', 'Year', 'AvgTemperature']]
data = data[((data['Month'] == 10) | ((data['Month'] == 9) & (data['Day'] == 30))) & (data['Year'] >= 2007)]
data = data[~data['State'].isin(['Alaska', 'Hawaii', 'Additional Territories', 'District of Columbia'])].reset_index(drop = True)

### Train: 2007-2012 (6 yrs)
### Validation: 2013-2016 (4 yrs)
### Test: 2017-2019 (3 yrs)

In [None]:
lat_lon = pd.read_csv('all_cities_lat_lon.csv')

In [None]:
final = data.drop_duplicates().merge(lat_lon, on = ['State', 'City'])

In [None]:
final[final['AvgTemperature'] == -99]

Unnamed: 0,State,City,Month,Day,Year,AvgTemperature,Latitude,Longitude
513,Arizona,Phoenix,10,1,2010,-99.0,33.448376,-112.074036
514,Arizona,Phoenix,10,2,2010,-99.0,33.448376,-112.074036
545,Arizona,Phoenix,10,1,2011,-99.0,33.448376,-112.074036
577,Arizona,Phoenix,10,1,2012,-99.0,33.448376,-112.074036
673,Arizona,Phoenix,10,1,2015,-99.0,33.448376,-112.074036
2383,Connecticut,Bridgeport,10,15,2016,-99.0,41.18639,-73.195557
2384,Connecticut,Bridgeport,10,16,2016,-99.0,41.18639,-73.195557
2385,Connecticut,Bridgeport,10,17,2016,-99.0,41.18639,-73.195557
2386,Connecticut,Bridgeport,10,18,2016,-99.0,41.18639,-73.195557
15589,South Dakota,Rapid City,10,5,2013,-99.0,44.080544,-103.231018


In [None]:
final = final[~final['State'].isin(['Arizona', 'Connecticut', 'South Dakota'])].reset_index(drop = True)

In [None]:
### Convert the masked temperature to ZERO

def generate_data(year_min, year_max):

    masked_temps = []
    masked_bools = []
    masked_idxs = []
    lag_inds = []
    coords = []
    target_temps = []

    for year in range(year_min, year_max + 1):
        for day in range(1, 32):

            temp = final[(final['Day'] == day) & (final['Month'] == 10) & (final['Year'] == year)].reset_index(drop = True)
            nrow = temp.shape[0]

            if day == 1:
                temp2 = final[(final['Day'] == 30) & (final['Month'] == 9) & (final['Year'] == year)].reset_index(drop = True)
            else:
                temp2 = final[(final['Day'] == day - 1) & (final['Month'] == 10) & (final['Year'] == year)].reset_index(drop = True)

            for i in range(nrow):
                masked_temp = list(temp['AvgTemperature'])
                target_temps.append(masked_temp[i])
                masked_temp[i] = 0
                masked_temp += list(temp2['AvgTemperature'])
                masked_temps.append(masked_temp)
                masked_idxs.append(i)
                masked_bool = [0] * (2 * nrow)
                masked_bool[i] = 1
                masked_bools.append(masked_bool)
                lag_ind = [0] * nrow + [1] * nrow
                lag_inds.append(lag_ind)
                coords.append(np.tile(np.array(temp[['Latitude', 'Longitude']]), (2, 1)))


    return np.array(masked_temps), np.array(masked_bools), np.array(masked_idxs), \
        np.array(lag_inds), np.array(coords), np.array(target_temps)

In [None]:
train_masked_temps, train_masked_bools, train_masked_idxs, train_lag_inds, train_coords, train_target_temps = \
    generate_data(2007, 2012)

In [None]:
val_masked_temps, val_masked_bools, val_masked_idxs, val_lag_inds, val_coords, val_target_temps = \
    generate_data(2013, 2016)

In [None]:
test_masked_temps, test_masked_bools, test_masked_idxs, test_lag_inds, test_coords, test_target_temps = \
    generate_data(2017, 2019)

In [None]:
test_lag_inds.shape

(4092, 88)

In [None]:

embedding_size = 32
masked_temp_input = layers.Input(shape=(2 * final['State'].nunique(),), dtype=tf.float32, name="masked_temp_input")
masked_bool_input = layers.Input(shape=(2 * final['State'].nunique(),), dtype=tf.float32, name="masked_bool_input")
masked_lag_input = layers.Input(shape=(2 * final['State'].nunique(),), dtype=tf.float32, name="masked_lag_input")
masked_idx_input = layers.Input(shape=(1,), dtype=tf.int32, name="masked_idx_input")
coords_input = layers.Input(shape=(2 * final['State'].nunique(), 2,), dtype=tf.float32, name="coords_input")

### Step 1: Masked temp input, convert to 32-dim embedding

class ExpandInput(layers.Layer):
    def __init__(self):
        super().__init__()
        self.dense_1 = layers.TimeDistributed(
            layers.Dense(128, activation='relu'), name="masked_dense_1"
        )
        self.dense_2 = layers.TimeDistributed(
            layers.Dense(embedding_size, activation='linear'), name="masked_dense_2"
        )

    def call(self, inputs):
        masked_temp_input = tf.expand_dims(inputs, axis=-1)
        masked_temp_hidden = self.dense_1(masked_temp_input)
        masked_temp_final = self.dense_2(masked_temp_hidden)
        return masked_temp_final

masked_temp_final = ExpandInput()(masked_temp_input)

### Step 2: Masked bool embedding
masked_bool_final = layers.Embedding(input_dim=2, output_dim=embedding_size)(masked_bool_input)

### Step 3: Weighted embeddings
class WeightedEmbeddings(layers.Layer):
    def call(self, inputs):
        masked_temp_final, masked_bool_final, masked_bool = inputs
        masked_bool_expanded = tf.expand_dims(masked_bool, axis=-1)
        return masked_temp_final * (1 - masked_bool_expanded) + masked_bool_final * masked_bool_expanded

weighted_embeddings = WeightedEmbeddings()([masked_temp_final, masked_bool_final, masked_bool_input])

### Step 4: Coordinates, convert to 32-dim embedding

class ExpandCoord(layers.Layer):
    def __init__(self):
        super().__init__()
        self.dense_1 = layers.TimeDistributed(
            layers.Dense(128, activation='relu'), name="coord_dense_1"
        )
        self.dense_2 = layers.TimeDistributed(
            layers.Dense(embedding_size, activation='linear'), name="coord_dense_2"
        )

    def call(self, inputs):
        coords_hidden = self.dense_1(inputs)
        coords_final = self.dense_2(coords_hidden)
        return coords_final

coords_final = ExpandCoord()(coords_input)

### Step 5: Masked lag embedding
masked_lag_final = layers.Embedding(input_dim=2, output_dim=embedding_size)(masked_lag_input)

### Step 6: Combined embeddings and apply dense

combined_embedding = layers.Concatenate()([weighted_embeddings, coords_final, masked_lag_final])
combined_embedding = layers.Dense(64, activation='relu')(combined_embedding)
combined_embedding = layers.Dense(32, activation='linear')(combined_embedding)

### Step 7: Apply multi-head attention with dense and layer normalization
def apply_attention_layers(embedding_input, num_layers, num_heads=2, key_dim=16, ff_dim=64):

    x = embedding_input
    for i in range(num_layers):
        # Multi-head attention layer
        attention_layer = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, name=f"multi_head_attention_{i+1}")
        attn_output = attention_layer(query=x, value=x, key=x)

        # Residual connection + Layer normalization
        x = Add()([x, attn_output])
        x = LayerNormalization(name=f"layer_norm_attn_{i+1}")(x)

        # Dense feedforward network
        ff_output = Dense(ff_dim, activation='relu', name=f"dense_ff_{i+1}")(x)
        ff_output = Dense(x.shape[-1], activation='linear', name=f"dense_ff_linear_{i+1}")(ff_output)

        # Residual connection + Layer normalization
        x = Add()([x, ff_output])
        x = LayerNormalization(name=f"layer_norm_ff_{i+1}")(x)

    return x

context_embedding = apply_attention_layers(combined_embedding, num_layers=4)

### Step 8: Take embedding corresponding to the masked token

class GatherLayer(layers.Layer):
    def call(self, inputs):
        context_embedding, masked_idx_input = inputs
        return tf.gather(context_embedding, indices=tf.squeeze(masked_idx_input, axis=-1), batch_dims=1)

extracted_embeddings = GatherLayer()([context_embedding, masked_idx_input])

### Step 9: Predict the temperature

hidden_layer = layers.Dense(128, activation='relu')(extracted_embeddings)
hidden_layer = layers.Dense(16, activation='relu')(hidden_layer)
output_layer_exp = layers.Dense(1, activation='linear')(hidden_layer)

model = Model(inputs=[masked_temp_input, masked_bool_input, masked_idx_input, masked_lag_input, coords_input],
              outputs=output_layer_exp)

model.compile(optimizer=Adam(learning_rate = 1e-3), loss='mean_squared_error')

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=200,
    restore_best_weights=True
)

# Fit the model with validation data and early stopping
history = model.fit(
    [train_masked_temps, train_masked_bools, train_masked_idxs, train_lag_inds, train_coords],
    train_target_temps,
    validation_data=([val_masked_temps, val_masked_bools, val_masked_idxs, val_lag_inds, val_coords], val_target_temps),
    epochs=200,
    batch_size=128,
    callbacks=[early_stopping]
)

Epoch 1/200
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 1s/step - loss: 2722.6147 - val_loss: 823.7673
Epoch 2/200
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 30ms/step - loss: 326.3911 - val_loss: 99.8120
Epoch 3/200
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - loss: 101.2029 - val_loss: 55.6138
Epoch 4/200
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 58.6472 - val_loss: 46.1459
Epoch 5/200
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - loss: 41.5299 - val_loss: 36.7678
Epoch 6/200
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step - loss: 39.3908 - val_loss: 28.9146
Epoch 7/200
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 34.8230 - val_loss: 28.5028
Epoch 8/200
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 32.0425 - val_loss: 24.2700
Epoch 9/200
[1m64

In [None]:
test_loss = model.evaluate([test_masked_temps, test_masked_bools, test_masked_idxs, test_lag_inds, test_coords], test_target_temps)

[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 127ms/step - loss: 7.9170


In [None]:
test_loss

9.247624397277832