# Melting Point Prediction with TPU

This notebook is designed to run on **Kaggle** or **Google Colab** with a TPU accelerator.

## 1. TPU Initialization

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

## 2. Load and Preprocess Data

In [None]:
# Load data (adjust paths if needed for Kaggle/Colab)
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
submission_df = pd.read_csv('sample_submission.csv')

# Features and Target
features = [c for c in train_df.columns if c not in ['id', 'SMILES', 'Tm']]
target = 'Tm'

X = train_df[features].values
y = train_df[target].values
X_test = test_df[features].values

# Scaling is critical for Neural Networks
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

# Split
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

## 3. Build Model (Deep Neural Network)

In [None]:
def build_model(input_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(512, activation='relu', input_dim=input_dim),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.3),
        
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.2),
        
        tf.keras.layers.Dense(1)  # Regression output
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mae',  # Mean Absolute Error as per competition metric
        metrics=['mae']
    )
    return model

with strategy.scope():
    model = build_model(X_train.shape[1])
    
model.summary()

## 4. Train

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_mae',
    patience=20,
    restore_best_weights=True,
    verbose=1
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=64 * strategy.num_replicas_in_sync,  # Scale batch size with TPU replicas
    callbacks=[early_stopping],
    verbose=1
)

## 5. Evaluation

In [None]:
plt.plot(history.history['loss'], label='Train MAE')
plt.plot(history.history['val_loss'], label='Val MAE')
plt.title('Model Training History')
plt.ylabel('MAE')
plt.xlabel('Epoch')
plt.legend()
plt.show()

val_preds = model.predict(X_val).flatten()
val_mae = np.mean(np.abs(y_val - val_preds))
print(f"Final Validation MAE: {val_mae:.4f}")

## 6. Submission

In [None]:
test_preds = model.predict(X_test_scaled).flatten()
submission_df['Tm'] = test_preds
submission_df.to_csv('submission_tpu.csv', index=False)
print("Saved submission_tpu.csv")