In [1]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error


In [2]:
data = pd.read_excel('Data.xlsx')

data.head()

Unnamed: 0,AdjustedMonth,Entity Who Posted,REACTIONS,COMMENTS,REPOSTS,IMPRESSIONS
0,2025-02-01,Andy Durman,4,1,0,246
1,2025-02-02,Karla Hoff,4,0,0,165
2,2025-02-03,Lightcast,5,0,2,161
3,2025-02-04,Andy Durman,6,0,0,211
4,2025-02-05,Lightcast,7,0,0,343


In [4]:
X = data[['REACTIONS', 'COMMENTS', 'REPOSTS']]
y = data['IMPRESSIONS']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
xgb_model = xgb.XGBRegressor(
    n_estimators=20, 
    max_depth=3, 
    learning_rate=0.3, 
    reg_alpha=1.0,
    reg_lambda=1.0,
    min_child_weight=3, 
    random_state=42
)

xgb_model.fit(X_train, y_train)

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [10]:
data['PREDICTED'] = xgb_model.predict(data[['REACTIONS', 'COMMENTS', 'REPOSTS']])

In [17]:
data.head(50)

Unnamed: 0,AdjustedMonth,Entity Who Posted,REACTIONS,COMMENTS,REPOSTS,IMPRESSIONS,PREDICTED
0,2025-02-01,Andy Durman,4,1,0,246,496.408112
1,2025-02-02,Karla Hoff,4,0,0,165,340.583496
2,2025-02-03,Lightcast,5,0,2,161,340.583496
3,2025-02-04,Andy Durman,6,0,0,211,340.583496
4,2025-02-05,Lightcast,7,0,0,343,340.583496
5,2025-02-06,Chris Laney,7,0,0,243,340.583496
6,2025-02-07,Andy Durman,8,0,0,317,340.583496
7,2025-02-08,Karla Hoff,10,0,0,557,377.82724
8,2025-02-09,Andy Durman,10,1,0,332,533.651855
9,2025-02-10,Mark Hanson,11,2,1,529,797.350403


In [16]:
total_imp = sum(data['IMPRESSIONS'])
total_pred = sum(data['PREDICTED'])

print(total_imp)
print(total_pred)

151721
150159.88934326172


In [14]:
# Make predictions
y_pred_train = xgb_model.predict(X_train)
y_pred_test = xgb_model.predict(X_test)

In [15]:
# Evaluate the model
train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
train_mae = mean_absolute_error(y_train, y_pred_train)
test_mae = mean_absolute_error(y_test, y_pred_test)

print("\n=== Model Performance ===")
print(f"Training RMSE: {train_rmse:.4f}")
print(f"Test RMSE: {test_rmse:.4f}")
print(f"Training R²: {train_r2:.4f}")
print(f"Test R²: {test_r2:.4f}")
print(f"Training MAE: {train_mae:.4f}")
print(f"\nTest MAE: {test_mae:.4f}")


=== Model Performance ===
Training RMSE: 897.5478
Test RMSE: 756.0575
Training R²: 0.9039
Test R²: 0.8072
Training MAE: 432.4618

Test MAE: 415.0421
