In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta

import statsmodels.api as sm
from statsmodels.formula.api import ols

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import gc
import sys

#Utilities
import warnings
from tqdm import tqdm
import time
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.4f' % x) # floats view format

## Globals

In [2]:
data_folder = '/home/vlad/DiplomaWork/data'
RS = 42

## Utilities

In [3]:
# Get execution time and data size
def execution_time(func):
    def wrapper(*args, **kwargs):
        t1 = time.time()
        res = func(*args, **kwargs)
        t2 = time.time()
        ex_mins = (t2 - t1)//60
        ex_sec =  (t2 - t1)%60
        print(f'Process time: {ex_mins} min, {int(ex_sec)} sec')
        if res is not None:
            print(f'Data size: {res.shape[0]} x {res.shape[1]}')
            return res
    return wrapper

# Check NaN ratio per column
def get_nan_ratio(df):
    length = df.shape[0]
    ratios = []
    for col in df.columns:
        nan_ratio = 100 * (1 - df[col].count() / length)
        ratios.append(nan_ratio)
    stat_df = pd.DataFrame()
    stat_df['Column'] = df.columns
    stat_df['NaN_ratio_prc'] = ratios
    return stat_df.sort_values(by = ['NaN_ratio_prc'], ascending = False)

## Data read

In [4]:
df = pd.read_csv(f'{data_folder}/PRE_ABT_POS.csv')
products = pd.read_csv(f'{data_folder}/PRODUCTS.csv')
products_ids = products['PRODUCT_ID'].values
df = df[df['product_id'].isin(products_ids)]
df = df.rename(columns = {'product_id': 'PRODUCT_ID'})

print(df.shape)
df.head()

(17394831, 24)


Unnamed: 0,PRODUCT_ID,LOCATION_id,customer_id,distr_channel_id,period_dt,PRICE_REG,PRICE_ACT,PRICE_PROMO,PRICE_DISCOUNT_RATE,PROMO_1001,PROMO_1002,PROMO_1003,PROMO_FLG,PROMO_FLG_DR,DEFICIT_FLG1,DEFICIT_FLG2,STOCK_QTY,TGT_QTY,TGT_QTY_R,NUM_AUTHORIZATION,out_part,ml_part,vf_part,bl_part
0,203945,4401,-6,-2,22514,81.0,81.0,81.0,0.0,0,0,0,0,0.0,0,0,,,,1,,13,16,2
1,203945,4401,-6,-2,22563,83.0,83.0,83.0,0.0,0,0,0,0,0.0,0,0,,,,1,,13,16,2
2,203945,4401,-6,-2,22576,83.0,83.0,83.0,0.0,0,0,0,0,0.0,0,0,,,,1,,13,16,2
3,203945,9502,-6,-2,22542,,,,0.0,0,0,0,0,0.0,0,0,,,,1,,5,42,2
4,203945,9502,-6,-2,22555,,,,0.0,0,0,0,0,0.0,0,0,,,,1,,5,42,2


## Preprocessing

In [5]:
def preprocess_date(df):
    df['POS_DT'] = pd.to_datetime(df['POS_DT'], format="%d%b%Y")
    df['MONTH'] = df['POS_DT'].dt.month
    df['YEAR'] = df['POS_DT'].dt.year
    return df.drop(columns = ['POS_DT'])

def add_days_to_date(date, days):
    added_date = pd.to_datetime(date) + timedelta(days=days)
    added_date = added_date.strftime("%Y-%m-%d")
    return added_date

def dates_giver(df):
    df['DATE'] = '01-01-1960'
    df['DATE'] = pd.to_datetime(df['DATE'], format="%d-%m-%Y")
    df['DATE'] = df.apply(lambda x: add_days_to_date(x['DATE'], x['period_dt']), axis = 1)
    data = df[['DATE', 'PRODUCT_ID', 'LOCATION_id', 'PRICE_REG', 'PRICE_PROMO', 'STOCK_QTY', 'TGT_QTY']]
    data['week_number'] = pd.to_datetime(data['DATE'], format = "%Y-%m-%d").dt.week
    data['month_number'] = pd.to_datetime(data['DATE'], format = "%Y-%m-%d").dt.month
    data['year'] = pd.to_datetime(data['DATE'], format = "%Y-%m-%d").dt.year
    data['day'] = pd.to_datetime(data['DATE'], format = "%Y-%m-%d").dt.day
    return data

@execution_time
def preprocessing(df):
    df = dates_giver(df)
    # data = data.drop(columns = ['DATE'])
    df = df[df['TGT_QTY'].notnull()]  
    df = df[df['STOCK_QTY'].notnull()]
    df = df.reset_index(drop = True)
    return df
    

In [6]:
df = preprocessing(df)
df.head()

Process time: 4.0 min, 49 sec
Data size: 13020968 x 11


Unnamed: 0,DATE,PRODUCT_ID,LOCATION_id,PRICE_REG,PRICE_PROMO,STOCK_QTY,TGT_QTY,week_number,month_number,year,day
0,2019-02-22,205880,145,105.0,94.5,2.0,0.0,8,2,2019,22
1,2019-03-16,205880,145,105.0,89.47,2.0,0.0,11,3,2019,16
2,2019-05-13,205880,145,105.0,94.5,2.0,0.0,20,5,2019,13
3,2019-07-28,205880,145,105.0,105.0,2.0,0.0,30,7,2019,28
4,2019-09-19,205880,145,105.0,105.0,2.0,0.0,38,9,2019,19


## Feature creation

In [7]:
def add_holidays(df):
    df['Feb23'] = ((df['month_number'] == 2) & (df['day'] == 23)).astype(int)
    df['May1'] = ((df['month_number'] == 5) & (df['day'] == 1)).astype(int)
    df['Jun12'] = ((df['month_number'] == 6) & (df['day'] == 12)).astype(int)
    df['Nov4'] = ((df['month_number'] == 11) & (df['day'] == 4)).astype(int)
    df['NY'] = ((df['month_number'] == 12) & (df['day'].isin([24, 25, 26, 27, 28, 29, 30, 31]))).astype(int)
    return df

def get_weekly_sales(df):
    df_sums = df.groupby(by = ['PRODUCT_ID', 'LOCATION_id', 'year','week_number']).sum().reset_index()
    df_sums = df_sums[['PRODUCT_ID', 'LOCATION_id', 'year','week_number', 'TGT_QTY']]
    return df_sums

def get_week_mean_price(df):
    df_means = df.groupby(by = ['PRODUCT_ID', 'LOCATION_id', 'year','week_number']).mean().reset_index()
    df_means = df_means[['PRODUCT_ID', 'LOCATION_id', 'year','week_number', 'PRICE_REG', 'PRICE_PROMO']]
    return df_means
    
def get_weekly_stocks(df):
    weekly_stocks = df.sort_values(by = ['PRODUCT_ID', 'LOCATION_id', 'DATE'])
    weekly_stocks = weekly_stocks[~weekly_stocks['STOCK_QTY'].isnull()]
    weekly_stocks = weekly_stocks.drop_duplicates(subset = ['PRODUCT_ID', 'LOCATION_id', 'week_number','year'], keep = 'first')
    weekly_stocks = weekly_stocks[['PRODUCT_ID', 'LOCATION_id', 'week_number','year', 'STOCK_QTY']]
    return weekly_stocks
    
def get_ohe_holidays(df):
    holi_cols = ['Feb23','May1','Jun12', 'Nov4', 'NY']
    df = add_holidays(df)
    holiweeks = df.groupby(by = ['year', 'week_number']).sum()
    holiweeks = holiweeks[holi_cols]
    for col in holi_cols:
        holiweeks[col] = (holiweeks[col] > 0).astype(int)
    holiweeks = holiweeks.reset_index()
    return holiweeks

def top_3_min_prices(df_means):
    agr_cols = ['LOCATION_id', 'year', 'week_number']
    to_agr_cols = ['PRICE_REG']
    grouped = df_means[agr_cols + to_agr_cols].groupby(by = agr_cols)['PRICE_REG']
    df_means['1st_min_price'] = grouped.transform(lambda x: x.nsmallest(1).min())
    df_means['2nd_min_price'] = grouped.transform(lambda x: x.nsmallest(2).min())
    df_means['3rd_min_price'] = grouped.transform(lambda x: x.nsmallest(3).min())
    return df_means
    
@execution_time
def agregate_data(df):
    df_sums = get_weekly_sales(df) # Total sales per week
    df_means = get_week_mean_price(df) # Mean price per week
    weekly_stocks = get_weekly_stocks(df) # Stock per week
    holiweeks = get_ohe_holidays(df) # Get OHE holidays
    df_means = top_3_min_prices(df_means) # Get 3 min prices of concurents per week 
    
    # Combining agr dfs together
    ped_df = df_sums.merge(df_means, how = 'left', on = ['PRODUCT_ID', 'LOCATION_id', 'year','week_number'])
    ped_df = ped_df.merge(holiweeks, how = 'left', on = ['year','week_number'])
    
    ped_df['PRICE_PROMO'] = ped_df['PRICE_PROMO'].fillna(ped_df['PRICE_REG'])
    ped_df['promo_discount_%'] = (1 - ped_df['PRICE_PROMO'] / ped_df['PRICE_REG']) * 100 # Discount Calculation
    return ped_df
    

In [8]:
ped_df = agregate_data(df)

ped_df = ped_df.dropna().reset_index(drop = True)
print('Non Nan Data size:', ped_df.shape)
ped_df.head()

Process time: 0.0 min, 37 sec
Data size: 1883081 x 16
Non Nan Data size: (1440304, 16)


Unnamed: 0,PRODUCT_ID,LOCATION_id,year,week_number,TGT_QTY,PRICE_REG,PRICE_PROMO,1st_min_price,2nd_min_price,3rd_min_price,Feb23,May1,Jun12,Nov4,NY,promo_discount_%
0,370,223,2018,1,0.0,166.25,154.9333,0.9913,0.9913,0.9913,0,0,0,0,1,6.807
1,370,223,2018,2,0.0,166.0,166.0,1.04,1.04,1.04,0,0,0,0,0,0.0
2,370,223,2018,3,1.0,166.0,149.4,1.04,1.04,1.04,0,0,0,0,0,10.0
3,370,223,2018,4,0.0,166.0,149.4,1.04,1.04,1.04,0,0,0,0,0,10.0
4,370,223,2018,5,0.0,166.0,149.4,0.76,0.76,0.76,0,0,0,0,0,10.0


## Pairs selection
- (Shop - product) pairs for experiments

In [9]:
def get_pairs_for_experiment(ped_df, quantile = 0.5):
    popular_pairs = ped_df.groupby(['PRODUCT_ID', 'LOCATION_id']).size().reset_index(name='counts')
    popular_pairs = popular_pairs.sort_values(by='counts', ascending=False).reset_index(drop=True)
    print('Pairs with count >', popular_pairs['counts'].quantile(quantile), 'selected')
    popular_pairs = popular_pairs[popular_pairs['counts'] > popular_pairs['counts'].quantile(quantile)]
    pairs_for_experiment = list(popular_pairs[['PRODUCT_ID', 'LOCATION_id']].to_records(index=False))
    popular_pairs = popular_pairs.rename(columns = {'PRODUCT_ID' : 'SKU', 'LOCATION_id' : 'Store'})
    print(f'Total pairs selected: {len(pairs_for_experiment)}')
    return popular_pairs, pairs_for_experiment

In [10]:
popular_pairs_df, pairs_for_experiment = get_pairs_for_experiment(ped_df, quantile = 0.5)

Pairs with count > 57.0 selected
Total pairs selected: 8962


In [11]:
ped_df

Unnamed: 0,PRODUCT_ID,LOCATION_id,year,week_number,TGT_QTY,PRICE_REG,PRICE_PROMO,1st_min_price,2nd_min_price,3rd_min_price,Feb23,May1,Jun12,Nov4,NY,promo_discount_%
0,370,223,2018,1,0.0000,166.2500,154.9333,0.9913,0.9913,0.9913,0,0,0,0,1,6.8070
1,370,223,2018,2,0.0000,166.0000,166.0000,1.0400,1.0400,1.0400,0,0,0,0,0,0.0000
2,370,223,2018,3,1.0000,166.0000,149.4000,1.0400,1.0400,1.0400,0,0,0,0,0,10.0000
3,370,223,2018,4,0.0000,166.0000,149.4000,1.0400,1.0400,1.0400,0,0,0,0,0,10.0000
4,370,223,2018,5,0.0000,166.0000,149.4000,0.7600,0.7600,0.7600,0,0,0,0,0,10.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1440299,5201142,7002,2021,32,0.0000,830.0000,830.0000,1.5400,1.5400,1.5400,0,0,0,0,0,0.0000
1440300,5300493,51,2021,32,1.0000,83.0000,83.0000,8.0000,8.0000,8.0000,0,0,0,0,0,0.0000
1440301,5500002,153,2021,31,1.0000,170.0000,170.0000,1.0800,1.0800,1.0800,0,0,0,0,0,0.0000
1440302,5500002,153,2021,32,0.0000,170.0000,170.0000,1.0800,1.0800,1.0800,0,0,0,0,0,0.0000


## Log-Log regression

In [12]:
def transform(x, ttype):
    '''
    Accepts vector and performs transformation on it based on ttype
    '''
    if ttype == 'log':
        return np.log1p(x)
    
    return np.array(x)

def coef(model):
    """
    return regression coefficient of model
    """
    return model.params[1] if len(model.params) > 1 else model.params[0]

def pvalue(model):
    """
    return regression coefficient's t-test p-value of model
    """
    return model.pvalues[1] if len(model.pvalues) > 1 else model.pvalues[0]

def elasticity(X, y, model, model_type):
    '''
    Computes elasticity of model based on X, y, model itself and model's type
    '''
    if model_type == 'lin-lin':
        return coef(model) * np.mean(X) / np.mean(y)
    elif model_type == 'log-lin':
        return coef(model) * np.mean(X)
    elif model_type == 'log-log':
        return coef(model)
    
def PEDmodel(X, y, model_type='lin-lin'):
    '''
    Makes PED model based on input data X, y and model type
    '''
    _types = model_type.split('-')
    
    act_x = transform(X, _types[1])
    act_x = sm.add_constant(act_x)
    
    act_y = transform(y, _types[0])
    return sm.OLS(act_y, act_x).fit()

def get_model_stats(X, y, model, model_type):
    '''
    returns model's statistics
        regression coefficient,
        R-squared,
        T-test p-value,
        elasticity
    '''
    elas = elasticity(X, y, model, model_type)
    return coef(model), model.rsquared, pvalue(model), elas

def GeneratePEDModels(dataset, model_type):
    col_list = ['PRICE_REG', 'Feb23','May1','Jun12','Nov4','NY']
    sku_id, store_id, elas, qty, ttpvalue = [], [], [], [], []
    iter_count = 0
    for good_id, shop_id in tqdm(pairs_for_experiment):
        data = dataset[(dataset['LOCATION_id'] == shop_id) & (dataset['PRODUCT_ID'] == good_id)]
        try: # TODO (Understand error)
            model = PEDmodel(data[col_list], data['TGT_QTY'], model_type)
            c, r2, tp, e = get_model_stats(data[col_list], data['TGT_QTY'], model, model_type)
            sku_id.append(good_id)
            store_id.append(shop_id)
            elas.append(e)
            ttpvalue.append(tp)
            qty.append(data['TGT_QTY'].sum() / data.shape[0])
        except:
            continue
    
    return pd.DataFrame({
        'SKU': sku_id,
        'Store': store_id,
        'Elasticity': elas,
        'Qty': qty,
        'P_value': ttpvalue
    })

def get_robust_pairs(elast_df):
    ok_elasts = len(elast_df[(elast_df['Elasticity'] > -10) & (elast_df['Elasticity'] < 0)])
    ok_pvals = len(elast_df[elast_df['P_value'] < 0.05])
    robust_elasts = elast_df[(elast_df['Elasticity'] > -10) & 
                             (elast_df['Elasticity'] < 0) & 
                             (elast_df['P_value'] < 0.05)]
    
    print('# Normal elasticities:', ok_elasts)
    print('# Normal p-values:', ok_pvals)
    print('# Robust elasticities:', len(robust_elasts))
    print('% Robust elasticities:', np.round((len(robust_elasts) / len(elast_df)) * 100, 2))
    return robust_elasts

In [13]:
elasts = GeneratePEDModels(ped_df, model_type = 'log-log')
elasts

100%|██████████| 8962/8962 [00:53<00:00, 166.91it/s]


Unnamed: 0,SKU,Store,Elasticity,Qty,P_value
0,370,223,-0.2740,0.0421,0.1692
1,181916,29,-0.5868,0.4000,0.0322
2,51786,148,0.0800,0.1263,0.6385
3,181621,22,-0.4174,0.4632,0.2326
4,51893,128,0.7414,0.4263,0.0135
...,...,...,...,...,...
8957,82619,194,3.1876,0.2759,0.0945
8958,137895,250,-0.6142,2.3793,0.5993
8959,5426,248,-0.0434,0.0862,0.6740
8960,53688,229,-0.2519,0.2586,0.3171


In [14]:
robust_elasts = get_robust_pairs(elasts)
robust_elasts = robust_elasts.reset_index()
elasts = elasts.reset_index()

# Normal elasticities: 4963
# Normal p-values: 1896
# Robust elasticities: 1021
% Robust elasticities: 11.39


In [15]:
non_robust_elasts = elasts[~elasts['index'].isin(robust_elasts['index'].to_list())]
del non_robust_elasts['index']
non_robust_elasts

Unnamed: 0,SKU,Store,Elasticity,Qty,P_value
0,370,223,-0.2740,0.0421,0.1692
2,51786,148,0.0800,0.1263,0.6385
3,181621,22,-0.4174,0.4632,0.2326
4,51893,128,0.7414,0.4263,0.0135
5,51939,154,-0.0250,0.0526,0.6303
...,...,...,...,...,...
8957,82619,194,3.1876,0.2759,0.0945
8958,137895,250,-0.6142,2.3793,0.5993
8959,5426,248,-0.0434,0.0862,0.6740
8960,53688,229,-0.2519,0.2586,0.3171


In [16]:
problematic_skus = non_robust_elasts['SKU'].unique()
len(problematic_skus)

4966

## Elasticity on products

In [17]:
ped_df2 = ped_df.copy()
ped_df2 = ped_df2[ped_df2['PRODUCT_ID'].isin(problematic_skus)].drop(columns = ['LOCATION_id'])
ped_df2.head()

Unnamed: 0,PRODUCT_ID,year,week_number,TGT_QTY,PRICE_REG,PRICE_PROMO,1st_min_price,2nd_min_price,3rd_min_price,Feb23,May1,Jun12,Nov4,NY,promo_discount_%
0,370,2018,1,0.0,166.25,154.9333,0.9913,0.9913,0.9913,0,0,0,0,1,6.807
1,370,2018,2,0.0,166.0,166.0,1.04,1.04,1.04,0,0,0,0,0,0.0
2,370,2018,3,1.0,166.0,149.4,1.04,1.04,1.04,0,0,0,0,0,10.0
3,370,2018,4,0.0,166.0,149.4,1.04,1.04,1.04,0,0,0,0,0,10.0
4,370,2018,5,0.0,166.0,149.4,0.76,0.76,0.76,0,0,0,0,0,10.0


In [18]:
def GeneratePEDModels_sku(dataset, model_type):
    col_list = ['PRICE_REG', 'Feb23','May1','Jun12','Nov4','NY']
    sku_id, elas, qty, ttpvalue = [], [], [], []
    iter_count = 0
    for good_id in tqdm(dataset['PRODUCT_ID'].unique()):
        data = dataset[dataset['PRODUCT_ID'] == good_id]
        model = PEDmodel(data[col_list], data['TGT_QTY'], model_type)
        c, r2, tp, e = get_model_stats(data[col_list], data['TGT_QTY'], model, model_type)
        sku_id.append(good_id)
        elas.append(e)
        ttpvalue.append(tp)
        qty.append(data['TGT_QTY'].sum() / data.shape[0])
    
    return pd.DataFrame({
        'SKU': sku_id,
        'Elasticity': elas,
        'Qty': qty,
        'P_value': ttpvalue
    })

In [19]:
new_elasts = GeneratePEDModels_sku(ped_df2, model_type = 'log-log')
new_robust_elasts = get_robust_pairs(new_elasts)

100%|██████████| 4966/4966 [00:21<00:00, 233.33it/s]

# Normal elasticities: 2601
# Normal p-values: 1442
# Robust elasticities: 697
% Robust elasticities: 14.04





## Polling on all data (for metrics)

In [20]:
ped_df3 = ped_df.copy()
new_elasts = GeneratePEDModels_sku(ped_df3, model_type = 'log-log')
new_robust_elasts = get_robust_pairs(new_elasts)
new_robust_elasts = new_robust_elasts.rename(columns = {'Elasticity': 'Elasticity_pooling'})

check = pd.read_csv('new_robust.csv')
check = check.rename(columns = {'Elasticity': 'Elasticity_xgb'})
check = check.merge(new_robust_elasts[['SKU', 'Elasticity_pooling']], how = 'left', on = ['SKU']).fillna(0)

100%|██████████| 7354/7354 [00:32<00:00, 228.01it/s]

# Normal elasticities: 3679
# Normal p-values: 1962
# Robust elasticities: 1052
% Robust elasticities: 14.31





## Final Metrics

In [22]:
def weighted_absolute_percentage_error(y_true, y_hat):
    wape = np.sum(np.abs(y_true - y_hat)) / np.sum(np.abs(y_true))
    return wape

def get_metrics(y_true, y_hat, method, gain):
    rmse = np.round(np.sqrt(mean_squared_error(y_true, y_hat)), 3)
    mae = np.round(mean_absolute_error(y_true, y_hat), 3)
    mape = np.round(mean_absolute_percentage_error(y_true, y_hat), 3)
    wape_ = np.round(weighted_absolute_percentage_error(y_true, y_hat), 3)
    metrics = pd.DataFrame(data = {'Metric' : ['RMSE', 'MAE', 'MAPE', 'WAPE'],
                                   'Score' : [rmse, mae, mape, wape_],
                                   'Method' : method,
                                   'Gain' : gain})
    return metrics

pooling_metrics = get_metrics(check['Elasticity_before'], check['Elasticity_pooling'], 'Pooling', 14.04)
boosting_metrics = get_metrics(check['Elasticity_before'], check['Elasticity_xgb'], 'XGB Segmentation', 72.32)
recent_research_metrics = pd.DataFrame(data = {'Metric' : ['RMSE', 'MAE', 'MAPE', 'WAPE'],
                                               'Score' : [1.77, 1.43, np.nan, 0.79],
                                               'Method' : 'TSNE',
                                               'Gain' : 62.76})


In [23]:
boosting_metrics

Unnamed: 0,Metric,Score,Method,Gain
0,RMSE,1.288,XGB Segmentation,72.32
1,MAE,0.768,XGB Segmentation,72.32
2,MAPE,0.497,XGB Segmentation,72.32
3,WAPE,0.495,XGB Segmentation,72.32


In [24]:
pooling_metrics

Unnamed: 0,Metric,Score,Method,Gain
0,RMSE,1.532,Pooling,14.04
1,MAE,0.888,Pooling,14.04
2,MAPE,0.546,Pooling,14.04
3,WAPE,0.572,Pooling,14.04


In [25]:
recent_research_metrics

Unnamed: 0,Metric,Score,Method,Gain
0,RMSE,1.77,TSNE,62.76
1,MAE,1.43,TSNE,62.76
2,MAPE,,TSNE,62.76
3,WAPE,0.79,TSNE,62.76


In [26]:
all_metrics = boosting_metrics.copy()
all_metrics = all_metrics.append(pooling_metrics.append(recent_research_metrics)).reset_index(drop = True)
all_metrics

Unnamed: 0,Metric,Score,Method,Gain
0,RMSE,1.288,XGB Segmentation,72.32
1,MAE,0.768,XGB Segmentation,72.32
2,MAPE,0.497,XGB Segmentation,72.32
3,WAPE,0.495,XGB Segmentation,72.32
4,RMSE,1.532,Pooling,14.04
5,MAE,0.888,Pooling,14.04
6,MAPE,0.546,Pooling,14.04
7,WAPE,0.572,Pooling,14.04
8,RMSE,1.77,TSNE,62.76
9,MAE,1.43,TSNE,62.76


In [31]:
fig = px.scatter(all_metrics,
                 x="Gain", y="Score", color="Method",
                 symbol = 'Metric', title = 'Visual metrics comparison')
fig.show()