# MLP with updated loss function v3.0

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [2]:
# preprocessing
exp_coeff_df = pd.read_csv("may_sep_exp_coeff.csv")
exp_coeff_df = exp_coeff_df.loc[exp_coeff_df['day']<=3]
exp_coeff_df = exp_coeff_df.drop(columns=['normalised_interpolated_facebook_interactions'])
exp_coeff_df = exp_coeff_df.dropna()
exp_coeff_df['text'] = exp_coeff_df['headline'] + ' ' + exp_coeff_df['summary']
exp_coeff_df['/1000_fitted_interpolated_facebook_interactions'] = exp_coeff_df['fitted_interpolated_facebook_interactions']/1000
filter_columns = ['day', 
                  # 'published', 
                  'text', 
                  # 'link', 
                  # 'domain', 
                  'facebook_interactions', 
                  # 'mean_monthly_visits',
                  'hours_since_published', 
                  'text_sentiment', 
                  'text_subjectivity',
                  # 'interpolated_facebook_interactions', 
                  '/1000_normalised_interpolated_facebook_interactions',
                  # '/100_normalised_interpolated_facebook_interactions', 
                  # '/10_normalised_interpolated_facebook_interactions',
                  # 'interpolated_exp_coeff_a', 
                  # 'interpolated_exp_coeff_b', 
                  # 'fitted_interpolated_facebook_interactions',
                  '/1000_fitted_interpolated_facebook_interactions',
                  '/1000_interpolated_exp_coeff_a', 
                  '/1000_interpolated_exp_coeff_b'] 
                  # '/100_interpolated_exp_coeff_a', 
                  # '/100_interpolated_exp_coeff_b', 
                  # '/10_interpolated_exp_coeff_a', 
                  # '/10_interpolated_exp_coeff_b'                  
exp_coeff_df = exp_coeff_df[filter_columns]

In [3]:
from statistics import mean
exp_model_df = exp_coeff_df.groupby(['text'])['hours_since_published', '/1000_interpolated_exp_coeff_a',
'/1000_interpolated_exp_coeff_b', 'text_sentiment','text_subjectivity',
'/1000_fitted_interpolated_facebook_interactions', '/1000_normalised_interpolated_facebook_interactions'].agg(list).reset_index()

exp_model_df['hours_since_published'] = [mean(i) for i in exp_model_df['hours_since_published']]
exp_model_df['text_sentiment'] = [mean(i) for i in exp_model_df['text_sentiment']]
exp_model_df['text_subjectivity'] = [mean(i) for i in exp_model_df['text_subjectivity']]
exp_model_df['/1000_interpolated_exp_coeff_a'] = [mean(i) for i in exp_model_df['/1000_interpolated_exp_coeff_a']]
exp_model_df['/1000_interpolated_exp_coeff_b'] = [mean(i) for i in exp_model_df['/1000_interpolated_exp_coeff_b']]


exp_model_df['day1_actual_interactions'] = [i[0] for i in exp_model_df['/1000_normalised_interpolated_facebook_interactions']]
exp_model_df['day2_actual_interactions'] = [i[1] for i in exp_model_df['/1000_normalised_interpolated_facebook_interactions']]
exp_model_df['day3_actual_interactions'] = [i[2] for i in exp_model_df['/1000_normalised_interpolated_facebook_interactions']]

exp_model_df['day1_fitted_interactions'] = [i[0] for i in exp_model_df['/1000_fitted_interpolated_facebook_interactions']]
exp_model_df['day2_fitted_interactions'] = [i[1] for i in exp_model_df['/1000_fitted_interpolated_facebook_interactions']]
exp_model_df['day3_fitted_interactions'] = [i[2] for i in exp_model_df['/1000_fitted_interpolated_facebook_interactions']]

  exp_model_df = exp_coeff_df.groupby(['text'])['hours_since_published', '/1000_interpolated_exp_coeff_a',


In [4]:
# train-test split
from sklearn.model_selection import train_test_split
from statistics import mean

def get_train_test_df(exp_model_df):

    all_train_links = []
    all_test_links = []
    
    links = exp_model_df['text'].unique()
    train_links, test_links = train_test_split(links, test_size = 0.2, random_state = 42)
    all_train_links.append(train_links)
    all_test_links.append(test_links)

    flat_all_train_links = [item for sublist in all_train_links for item in sublist]
    flat_all_test_links = [item for sublist in all_test_links for item in sublist]
    
    train_df = exp_model_df.loc[exp_model_df['text'].isin(flat_all_train_links)]    
    test_df = exp_model_df.loc[exp_model_df['text'].isin(flat_all_test_links)]

    return train_df, test_df

In [5]:
train_df, test_df = get_train_test_df(exp_model_df)

In [6]:
from sentence_transformers import SentenceTransformer
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
train_df['text_embedding'] = train_df['text'].apply(lambda x: sentence_model.encode(x))
test_df['text_embedding'] = test_df['text'].apply(lambda x: sentence_model.encode(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['text_embedding'] = train_df['text'].apply(lambda x: sentence_model.encode(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['text_embedding'] = test_df['text'].apply(lambda x: sentence_model.encode(x))


In [7]:
# train/test dataframes with text embeddings as columns
merged_train_df = train_df['text_embedding'].apply(pd.Series)
merge_train_columns = train_df.drop(['text_embedding'], axis='columns').columns.values
merged_train_df.columns = merged_train_df.columns.astype(str)
merged_train_df[merge_train_columns] = train_df[merge_train_columns]

merged_test_df = test_df['text_embedding'].apply(pd.Series)
merge_test_columns = test_df.drop(['text_embedding'], axis='columns').columns.values
merged_test_df.columns = merged_test_df.columns.astype(str)
merged_test_df[merge_test_columns] = test_df[merge_test_columns]

In [8]:
# Get train variables
non_feature_cols = ['text', 
                    '/1000_normalised_interpolated_facebook_interactions',
                    '/1000_fitted_interpolated_facebook_interactions',
                    '/1000_interpolated_exp_coeff_a', 
                    '/1000_interpolated_exp_coeff_b']

features_train = merged_train_df.drop(non_feature_cols, axis='columns').values
daily_interactions_train = merged_train_df[['day1_actual_interactions', 'day2_actual_interactions', 'day3_actual_interactions']].values
fitted_interactions_train = merged_train_df[['day1_fitted_interactions', 'day2_fitted_interactions', 'day3_fitted_interactions']].values
# actual_interactions_train = merged_train_df.explode('/1000_normalised_interpolated_facebook_interactions')['/1000_normalised_interpolated_facebook_interactions'].values.astype(float)
# fitted_interactions_train = merged_train_df.explode('/1000_fitted_interpolated_facebook_interactions')['/1000_fitted_interpolated_facebook_interactions'].values.astype(float)
# days_train = [len(i) for i in merged_train_df['/1000_normalised_interpolated_facebook_interactions']]

# Get test variables
features_test = merged_test_df.drop(non_feature_cols, axis='columns').values
daily_interactions_test = merged_test_df[['day1_actual_interactions', 'day2_actual_interactions', 'day3_actual_interactions']].values
fitted_interactions_test = merged_test_df[['day1_fitted_interactions', 'day2_fitted_interactions', 'day3_fitted_interactions']].values
# actual_interactions_test = merged_test_df.explode('/1000_normalised_interpolated_facebook_interactions')['/1000_normalised_interpolated_facebook_interactions'].values.astype(float)
# fitted_interactions_test = merged_test_df.explode('/1000_fitted_interpolated_facebook_interactions')['/1000_fitted_interpolated_facebook_interactions'].values.astype(float)
# days_test = [len(i) for i in merged_test_df['/1000_normalised_interpolated_facebook_interactions']]

In [59]:
# Define MLP Model
class MLPModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLPModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [60]:
# Define Custom Loss Function
def custom_loss(predicted_interactions, actual_interactions, fitted_interactions, a=1):
    mse1 = nn.MSELoss()(predicted_interactions, actual_interactions)
    mse2 = a*nn.MSELoss()(predicted_interactions, fitted_interactions)
    return mse1 + mse2

In [61]:
# Convert the split data to PyTorch tensors
features_train_tensor = torch.FloatTensor(features_train).requires_grad_()
daily_interactions_train_tensor = torch.FloatTensor(daily_interactions_train).requires_grad_()
fitted_interactions_train_tensor = torch.FloatTensor(fitted_interactions_train).requires_grad_()

features_test_tensor = torch.FloatTensor(features_test).requires_grad_()
daily_interactions_test_tensor = torch.FloatTensor(daily_interactions_test).requires_grad_()
fitted_interactions_test_tensor = torch.FloatTensor(fitted_interactions_test).requires_grad_()

In [62]:
# Combine features and coefficients into a single input tensor for training and testing
input_train_tensor = torch.cat((features_train_tensor, daily_interactions_train_tensor), dim=1)
input_test_tensor = torch.cat((features_test_tensor, daily_interactions_test_tensor), dim=1)

In [63]:
# Define the model, loss function, and optimizer
input_size = features_train.shape[1] + daily_interactions_train_tensor.shape[1] 
hidden_size = 64  # Adjust this based on specific needs
output_size = 3 # 3 days

In [64]:
model = MLPModel(input_size, hidden_size, output_size)
criterion = custom_loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Hyperparameter tune to get best `a`

In [65]:
# import itertools
# num_epochs = 100  # Adjust based on your specific needs

# # Define a range of hyperparameter values for 'a'
# a_values = [-0.1, 0, 0.1, 0.5, 1.0, 2.0]

# # Store the best hyperparameters and corresponding MSE
# best_a = None
# best_mse = float('inf')

# # Iterate over hyperparameter values
# for a in a_values:
#     # Define the model, loss function, and optimizer
#     model = MLPModel(input_size, hidden_size, output_size)
#     criterion1 = lambda pred, actual, fitted: custom_loss(pred, actual, fitted, a=a)
#     optimizer = optim.Adam(model.parameters(), lr=0.001)

#     # Training loop
#     for epoch in range(num_epochs):
#         train_predictions = model(input_train_tensor)
#         loss = criterion1(train_predictions, daily_interactions_train_tensor, fitted_interactions_train_tensor)
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#     # Evaluate on the test set
#     with torch.no_grad():
#         test_predictions = model(input_test_tensor)
#         mse_test = nn.MSELoss()(test_predictions, daily_interactions_test_tensor)

#         # Check if current hyperparameters are the best
#         if mse_test < best_mse:
#             best_mse = mse_test
#             best_a = a

# print(f'Best hyperparameter a: {best_a}')
# print(f'Best MSE on test set: {best_mse.item()}')

### Use best `a`

In [66]:
# Train the model
num_epochs = 100  # Adjust based on your specific needs

for epoch in range(num_epochs):
    # Forward pass
    train_predictions = model(input_train_tensor)

    # Calculate the loss
    loss = criterion(train_predictions, daily_interactions_train_tensor, fitted_interactions_train_tensor, 0)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')

Epoch [100/100], Loss: 0.07132197171449661


In [67]:
# Get test predictions
test_predictions = model(input_test_tensor)

In [68]:
test_predictions[0]

tensor([0.4032, 0.5439, 0.7550], grad_fn=<SelectBackward0>)

In [69]:
# Calculate the MSE between predicted Facebook interactions and actual/fitted interactions on the test set
mse_actual_test = nn.MSELoss()(torch.FloatTensor(test_predictions), daily_interactions_test_tensor)
mse_fitted_test = nn.MSELoss()(torch.FloatTensor(test_predictions), fitted_interactions_test_tensor)

print(f'MSE between predicted and actual interactions on test set: {mse_actual_test.item()}')
print(f'MSE between predicted and fitted interactions on test set: {mse_fitted_test.item()}')

abs_perc_errors = abs(test_predictions.detach().numpy() - daily_interactions_test_tensor.detach().numpy())/(daily_interactions_test_tensor.detach().numpy())*100
abs_mean_perc_errors = [np.mean(i) for i in abs_perc_errors]
print(f"Absolute mean error percentage: {np.mean(abs_mean_perc_errors)}%")
print(f"Range of absolute error: ({min(abs_mean_perc_errors)}%, {max(abs_mean_perc_errors)}%)")

perc_errors = (test_predictions.detach().numpy() - daily_interactions_test_tensor.detach().numpy())/(daily_interactions_test_tensor.detach().numpy())*100
mean_perc_errors = [np.mean(i) for i in perc_errors]
print(f"Mean error percentage: {np.mean(mean_perc_errors)}%")
print(f"Range of error: ({min(mean_perc_errors)}%, {max(mean_perc_errors)}%)")

MSE between predicted and actual interactions on test set: 0.17508111894130707
MSE between predicted and fitted interactions on test set: 5.957621097564697
Absolute mean error percentage: 32.617095947265625%
Range of absolute error: (1.1827205419540405%, 303.0917663574219%)
Mean error percentage: -1.476983904838562%
Range of error: (-113.73685455322266%, 272.83935546875%)


In [71]:
import plotly.express as px
fig = px.box(abs_mean_perc_errors)
fig.update_layout(title='Absolute Error (%): 3 days (lambda = 0)') 
fig.update_yaxes(title="Absolute Error (%)")
fig.update_xaxes(title="All absolute errors")
fig.show()

In [None]:
# perc_errors = (test_predictions.detach().numpy() - daily_interactions_test_tensor.detach().numpy())/(daily_interactions_test_tensor.detach().numpy())*100
# mean_perc_errors = [np.mean(i) for i in perc_errors]
# np.mean(mean_perc_errors)

In [None]:
# print(min(mean_perc_errors))
# print(max(mean_perc_errors))

In [None]:
# perc_errors = abs(test_predictions.detach().numpy() - daily_interactions_test_tensor.detach().numpy())/(daily_interactions_test_tensor.detach().numpy())*100
# mean_perc_errors = [np.mean(i) for i in perc_errors]
# print(f"Absolute mean error percentage: {np.mean(mean_perc_errors)}%")

In [None]:
# text embedding
# from sentence_transformers import SentenceTransformer
# sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
# exp_coeff_df['text_embedding'] = exp_coeff_df['text'].apply(lambda x: sentence_model.encode(x))

# change day categorical variable to dummies
# dummies = pd.get_dummies(exp_coeff_df.day).rename(columns={1:"day1", 2:"day2", 3:"day3", 4:"day4", 5:"day5", 6:"day6", 7:"day7", 8:"day8"}) 
# exp_coeff_df1 = pd.concat([exp_coeff_df, dummies], axis='columns')
# exp_coeff_df1 = exp_coeff_df1.drop(['day'], axis='columns')

# dataframe with text embeddings as columns
# merged_df = exp_coeff_df1['text_embedding'].apply(pd.Series)
# merge_columns = exp_coeff_df1.drop(['text_embedding'], axis='columns').columns.values
# merged_df.columns = merged_df.columns.astype(str)
# merged_df[merge_columns] = exp_coeff_df[merge_columns]

# train-test split
# from sklearn.model_selection import train_test_split
# from statistics import mean

# def get_train_test_df(exp_coeff_df):
#     exp_model_df = exp_coeff_df.copy()
#     unique_texts = exp_model_df['text'].unique()
#     train_texts, test_texts = train_test_split(unique_texts, test_size=0.2, random_state=42, shuffle=False)
#     train_df = exp_model_df.loc[exp_model_df['text'].isin(train_texts)]
#     test_df = exp_model_df.loc[exp_model_df['text'].isin(test_texts)]
#     return train_df, test_df

# Get train variables
# non_feature_cols = ['text', 
#                     'link', 
#                     'domain', 
#                     'facebook_interactions', 
#                     'mean_monthly_visits',
#                     '/1000_normalised_interpolated_facebook_interactions',
#                     '/1000_fitted_interpolated_facebook_interactions',
#                     '/1000_interpolated_exp_coeff_a', 
#                     '/1000_interpolated_exp_coeff_b']

# features_train = train_df.drop(non_feature_cols, axis='columns').values
# coefficients_train = train_df[['/1000_interpolated_exp_coeff_a', '/1000_interpolated_exp_coeff_b']].values
# actual_interactions_train = train_df['/1000_normalised_interpolated_facebook_interactions'].values
# fitted_interactions_train = train_df['/1000_fitted_interpolated_facebook_interactions'].values

# train_df, test_df = get_train_test_df(merged_df)

# # Get test variables
# features_test = test_df.drop(non_feature_cols, axis='columns').values
# coefficients_test = test_df[['/1000_interpolated_exp_coeff_a', '/1000_interpolated_exp_coeff_b']].values
# actual_interactions_test = test_df['/1000_normalised_interpolated_facebook_interactions'].values
# fitted_interactions_test = test_df['/1000_fitted_interpolated_facebook_interactions'].values