In [6]:
import pandas as pd
import numpy as np
from statsmodels.stats.proportion import proportions_ztest
from scipy.stats import ks_2samp

In [7]:
# A/B Testing
df_ad = pd.read_csv('ad_click_dataset.csv')
df_ad.dropna(inplace=True)

group_A = df_ad[df_ad['ad_position'] == 'Top']
group_B = df_ad[df_ad['ad_position'] == 'Bottom']

count_clicks_A = group_A['click'].sum()
count_clicks_B = group_B['click'].sum()
nobs_A = len(group_A)
nobs_B = len(group_B)

z_stat, p_val = proportions_ztest([count_clicks_A, count_clicks_B], [nobs_A, nobs_B])
print("A/B Testing Results")
print("Z-score:", z_stat, "| P-value:", p_val)
if p_val < 0.05:
    print("CTR difference is statistically significant.")
else:
    print("No statistically significant difference in click-through rates between Top and Bottom groups.")


A/B Testing Results
Z-score: -1.1365075404030447 | P-value: 0.2557442115851094
No significant difference in CTR.


There is no statistically significant difference in click-through rates between ads placed at the Top and Bottom positions. This suggests that ad position alone may not be a strong factor influencing user engagement, or the sample size may not be large enough to detect a subtle effect.

In [8]:
# Covariate Shift Detection
df_train = pd.read_csv('train.csv')
df_test1 = pd.read_csv('test1.csv')
df_test2 = pd.read_csv('test2.csv')

feature = 'NO2(GT)'  # Column to test
ks_stat_test1, p_val_test1 = ks_2samp(df_train[feature].dropna(), df_test1[feature].dropna())
ks_stat_test2, p_val_test2 = ks_2samp(df_train[feature].dropna(), df_test2[feature].dropna())

print("\nCovariate Shift Detection (Kolmogorov–Smirnov Test)")
print(f"Test1 vs Train => KS: {ks_stat_test1}, p: {p_val_test1}")
print(f"Test2 vs Train => KS: {ks_stat_test2}, p: {p_val_test2}")

alpha = 0.05
if p_val_test1 < alpha:
    print("test1.csv shows a significant shift.")
else:
    print("test1.csv does NOT show a significant shift.")

if p_val_test2 < alpha:
    print("test2.csv shows a significant shift.")
else:
    print("test2.csv does NOT show a significant shift.")


Covariate Shift Detection (Kolmogorov–Smirnov Test)
Test1 vs Train => KS: 0.0190625, p: 0.9721940612395358
Test2 vs Train => KS: 0.4075, p: 7.2019977111245e-96
test1.csv does NOT show a significant shift.
test2.csv shows a significant shift.


test2.csv exhibits a clear covariate shift relative to the training data in the NO2(GT) feature, indicating a potential mismatch between training and deployment conditions. In contrast, test1.csv aligns well with the training distribution and can be expected to generalize better under the same model.