In [1]:
#import skl;ear,pandas,pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import pickle


In [None]:
#load dataset
data = pd.read_csv('C:\work\learnai\ANN\Churn_Modelling.csv')

#drop irrelevant columns such as rownumber,customerid,surname
data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)


In [3]:
#label encode gender
gender_encoder = LabelEncoder()
data['Gender'] = gender_encoder.fit_transform(data['Gender'])

#onehot encode geography
geo_encoder = OneHotEncoder()
geo_encoded = geo_encoder.fit_transform(data[['Geography']])
geo_df = pd.DataFrame(geo_encoded.toarray(), columns=geo_encoder.get_feature_names_out(['Geography']))
data = pd.concat([data, geo_df], axis=1)
data.drop(['Geography'], axis=1, inplace=True)


In [4]:
#Split data into features and target(estimatedsalary)
X = data.drop('EstimatedSalary', axis=1)
y = data['EstimatedSalary']


In [8]:
from sklearn.model_selection import train_test_split
#split the dataset into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [9]:
#feature scaling
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)


In [10]:
#save scaler into pickle format
scaler_file = 'scaler.pkl'
pickle.dump(sc, open(scaler_file, 'wb'))

#save encoder into pickle format
gender_encoder_file = 'gender_encoder.pkl'
pickle.dump(gender_encoder, open(gender_encoder_file, 'wb'))
geo_encoder_file = 'geo_encoder.pkl'
pickle.dump(geo_encoder, open(geo_encoder_file, 'wb'))


In [11]:
#now train model using ANN regression
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import datetime


In [None]:
#build ANN model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_absolute_error', optimizer='adam',metrics=['mae'])


In [None]:
model.summary()

In [14]:
#setupo tensorboard
log_dir = "regression_logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
callbacks = [tf.keras.callbacks.TensorBoard(log_dir=log_dir)]

#setup early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5,restore_best_weights=True)
callbacks.append(early_stopping)


In [None]:
#train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=callbacks)


In [16]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir regression_logs/fit

In [19]:
_, mae = model.evaluate(X_test, y_test)
print(f"Mean Absolute Error: {mae}")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 50098.8477 - mae: 50098.8477  
Mean Absolute Error: 49909.6953125


In [20]:
#save the model
model.save('regression_model.h5')


