In [None]:
cols_to_check = [
    'device_model', 'firmware_version', 'calibration_status',
    'battery_level', 'gps_accuracy', 'sensor_source', 'network_type'
]

for col in cols_to_check:
    if col in test.columns and col in inference.columns:
        plt.figure(figsize=(10, 4))
        test_counts = test[col].value_counts(normalize=True)
        inf_counts = inference[col].value_counts(normalize=True)
        df_compare = pd.DataFrame({'Test': test_counts, 'Inference': inf_counts}).fillna(0)
        df_compare.plot(kind='bar', ax=plt.gca())
        plt.title(f'Distribution of {col} in Test vs Inference')
        plt.ylabel('Proportion')
        plt.xlabel(col)
        plt.xticks(rotation=45)
        plt.show()

In [None]:
# An attemp of making a correlation heatmap in absulute values for better visability 
corr = train.select_dtypes(include='number').corr()

plt.figure(figsize=(12, 8))
sns.heatmap(
    corr, 
    cmap='coolwarm', 
    center=0, 
    vmin=-1, vmax=1, 
    annot=False, 
    linewidths=0.5,
    cbar_kws={'label': 'Correlation'}
)
plt.title('Feature Correlation Heatmap (Red = Strong, Blue = Weak)')
plt.show()

In [None]:
for feature in ['x_mean', 'y_mean', 'z_mean']:
    sns.histplot(train[feature], color='blue', label='train', kde=True, stat='density')
    sns.histplot(test[feature], color='green', label='test', kde=True, stat='density')
    sns.histplot(inference[feature], color='red', label='inference', kde=True, stat='density')
    plt.legend()
    plt.title(f'Comparison of {feature}')
    plt.show()

In [None]:
import numpy as np
from scipy.spatial.distance import jensenshannon

meta_cols = [
    'device_model', 'firmware_version', 'calibration_status',
    'battery_level', 'gps_accuracy', 'sensor_source', 'network_type'
]

for col in meta_cols:
    if col in test.columns and col in inference.columns:
        test_counts = test[col].value_counts(normalize=True)
        inf_counts = inference[col].value_counts(normalize=True)
        # Align indexes
        all_categories = set(test_counts.index).union(set(inf_counts.index))
        test_dist = np.array([test_counts.get(cat, 0) for cat in all_categories])
        inf_dist = np.array([inf_counts.get(cat, 0) for cat in all_categories])
        # JS divergence
        js_div = jensenshannon(test_dist, inf_dist)
        print(f"{col}: JS divergence = {js_div:.3f}")
        if js_div > 0.2:
            print(f"  -> Significant difference detected in {col}!\n")
