In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('efficiency_result_processed.csv')

In [4]:
df = df[df['allocator'] == 'LRU']

In [8]:
df[df['miss_ratio_reduction_from_disabled'] < -0.05]\
    .sort_values(by='miss_ratio_reduction_from_disabled')[['trace_name', 'wsr',
                                                           'rebalance_strategy', 'miss_ratio', 'miss_ratio_reduction_from_disabled']]

Unnamed: 0,trace_name,wsr,rebalance_strategy,miss_ratio,miss_ratio_reduction_from_disabled
1817,twitter_cluster43,0.1,eviction-rate,0.264679,-0.20141
2088,twitter_cluster5,0.01,eviction-rate,0.366673,-0.136705
2044,twitter_cluster49,0.01,hits,0.497902,-0.111038
1111,twitter_cluster27,0.01,eviction-rate,0.715686,-0.08548
2041,twitter_cluster49,0.01,eviction-rate,0.465548,-0.078684
1796,twitter_cluster43,0.01,eviction-rate,0.326978,-0.074763
849,twitter_cluster19,0.1,eviction-rate,0.315759,-0.072342
1199,twitter_cluster29,0.01,eviction-rate,0.173706,-0.072039
2428,wiki_2019t,0.01,eviction-rate,0.650405,-0.071711
1220,twitter_cluster29,0.1,eviction-rate,0.109,-0.061504


In [None]:
"""
df group by wsr and trace_name, find the (wsr, trace_name) pairs 
where the miss_ratio of rebalance_strategy = marginal-hits is lower than 
rebalance_strategy = hits eviction-rate tail-age
"""

In [13]:
# Group by wsr and trace_name to compare strategies
strategies_to_compare = ['marginal-hits', 'hits', 'eviction-rate', 'tail-age', 'disabled']

# Filter for only the strategies we want to compare
comparison_df = df[df['rebalance_strategy'].isin(strategies_to_compare)].copy()

# Group by wsr and trace_name, then check if marginal-hits has lowest miss_ratio
grouped = comparison_df.groupby(['wsr', 'trace_name'])

favorable_pairs = []

for (wsr, trace_name), group in grouped:
    # Check if we have all 5 strategies for this combination
    available_strategies = set(group['rebalance_strategy'].unique())
    
    if len(available_strategies) == 5:  # All strategies present
        # Get miss ratios for each strategy
        strategy_miss_ratios = group.set_index('rebalance_strategy')['miss_ratio']
        strategy_rebalanced_slabs = group.set_index('rebalance_strategy')['rebalanced_slabs']
        
        marginal_hits_mr = strategy_miss_ratios['marginal-hits']
        other_strategies_mr = strategy_miss_ratios[['hits', 'eviction-rate', 'tail-age', 'disabled']]
        
        # Check if marginal-hits is better (lower) than all other four
        if (marginal_hits_mr < other_strategies_mr).all():
            favorable_pairs.append({
                'wsr': wsr,
                'trace_name': trace_name,
                'marginal_hits_mr': marginal_hits_mr,
                'marginal_hits_rs': strategy_rebalanced_slabs['marginal-hits'],
                'hits_mr': strategy_miss_ratios['hits'],
                'hits_rs': strategy_rebalanced_slabs['hits'],
                'eviction_rate_mr': strategy_miss_ratios['eviction-rate'],
                'eviction_rate_rs': strategy_rebalanced_slabs['eviction-rate'],
                'tail_age_mr': strategy_miss_ratios['tail-age'],
                'tail_age_rs': strategy_rebalanced_slabs['tail-age'],
                'disabled_mr': strategy_miss_ratios['disabled'],
                'disabled_rs': strategy_rebalanced_slabs['disabled']
            })

# Convert to DataFrame for better display
favorable_df = pd.DataFrame(favorable_pairs)

print(f"Found {len(favorable_pairs)} (wsr, trace_name) pairs where marginal-hits outperforms all other strategies:")
print()
favorable_df

Found 23 (wsr, trace_name) pairs where marginal-hits outperforms all other strategies:



Unnamed: 0,wsr,trace_name,marginal_hits_mr,marginal_hits_rs,hits_mr,hits_rs,eviction_rate_mr,eviction_rate_rs,tail_age_mr,tail_age_rs,disabled_mr,disabled_rs
0,0.01,meta_202401_kv,0.110586,23786,0.120679,16394,0.11784,248,0.155271,978,0.153689,0
1,0.01,meta_reag,0.388816,808,0.401073,445,0.411753,808,0.415377,701,0.415231,0
2,0.01,meta_rnha,0.654199,1661,0.671962,420,0.681801,1661,0.691019,1432,0.69301,0
3,0.01,meta_rprn,0.546044,1576,0.564783,494,0.573869,1576,0.578409,921,0.578613,0
4,0.01,twitter_cluster10,0.499899,21,0.499899,2,0.499899,2,0.499899,2,0.499899,0
5,0.01,twitter_cluster19,0.508894,40066,0.511749,550,0.526081,235,0.515876,574,0.575939,0
6,0.01,twitter_cluster29,0.087921,138075,0.088028,4074,0.173706,1682,0.098001,623,0.101667,0
7,0.01,twitter_cluster43,0.167366,180739,0.181841,739,0.326978,1502,0.205203,128,0.252214,0
8,0.01,twitter_cluster50,0.288502,2792,0.291835,375,0.327253,1917,0.608521,65,0.616099,0
9,0.01,twitter_cluster53,0.181067,4929,0.230266,123,0.237636,227,0.313274,1,0.311465,0


In [None]:
# Calculate performance improvements of marginal-hits over other strategies
if not favorable_df.empty:
    print("Performance improvements (miss ratio reduction) when using marginal-hits:")
    print()
    
    favorable_df['improvement_over_hits'] = (favorable_df['hits_mr'] - favorable_df['marginal_hits_mr']) / favorable_df['hits_mr'] * 100
    favorable_df['improvement_over_eviction_rate'] = (favorable_df['eviction_rate_mr'] - favorable_df['marginal_hits_mr']) / favorable_df['eviction_rate_mr'] * 100
    favorable_df['improvement_over_tail_age'] = (favorable_df['tail_age_mr'] - favorable_df['marginal_hits_mr']) / favorable_df['tail_age_mr'] * 100
    favorable_df['improvement_over_disabled'] = (favorable_df['disabled_mr'] - favorable_df['marginal_hits_mr']) / favorable_df['disabled_mr'] * 100
    
    # Show the improvements
    improvement_cols = ['wsr', 'trace_name', 'improvement_over_hits', 'improvement_over_eviction_rate', 'improvement_over_tail_age', 'improvement_over_disabled']
    print(favorable_df[improvement_cols].round(2))
    
    print(f"\nSummary statistics of improvements (%):")
    print(f"Over hits: mean={favorable_df['improvement_over_hits'].mean():.2f}%, max={favorable_df['improvement_over_hits'].max():.2f}%")
    print(f"Over eviction-rate: mean={favorable_df['improvement_over_eviction_rate'].mean():.2f}%, max={favorable_df['improvement_over_eviction_rate'].max():.2f}%")
    print(f"Over tail-age: mean={favorable_df['improvement_over_tail_age'].mean():.2f}%, max={favorable_df['improvement_over_tail_age'].max():.2f}%")
    print(f"Over disabled: mean={favorable_df['improvement_over_disabled'].mean():.2f}%, max={favorable_df['improvement_over_disabled'].max():.2f}%")
else:
    print("No cases found where marginal-hits outperforms all other strategies.")

In [None]:
# Display miss ratios and rebalanced slabs for all strategies
if not favorable_df.empty:
    print("Miss ratios and rebalanced slabs for all strategies in favorable cases:")
    print()
    
    # Select columns to display - interleave MR and RS for each strategy
    display_cols = ['wsr', 'trace_name', 
                   'marginal_hits_mr', 'marginal_hits_rs',
                   'hits_mr', 'hits_rs',
                   'eviction_rate_mr', 'eviction_rate_rs',
                   'tail_age_mr', 'tail_age_rs',
                   'disabled_mr', 'disabled_rs']
    
    display_df = favorable_df[display_cols].copy()
    
    # Round the values for better readability
    for col in display_df.columns:
        if col not in ['wsr', 'trace_name']:
            if 'mr' in col:
                display_df[col] = display_df[col].round(6)  # Miss ratios to 6 decimal places
            elif 'rs' in col:
                display_df[col] = display_df[col].round(0).astype(int)  # Rebalanced slabs as integers
    
    print(display_df.to_string(index=False))
    
    print(f"\nRebalanced slabs statistics comparison:")
    strategies = ['marginal_hits', 'hits', 'eviction_rate', 'tail_age', 'disabled']
    
    for strategy in strategies:
        rs_col = f'{strategy}_rs'
        if rs_col in favorable_df.columns:
            mean_rs = favorable_df[rs_col].mean()
            median_rs = favorable_df[rs_col].median()
            min_rs = favorable_df[rs_col].min()
            max_rs = favorable_df[rs_col].max()
            print(f"{strategy.replace('_', '-')}: mean={mean_rs:.0f}, median={median_rs:.0f}, min={min_rs:.0f}, max={max_rs:.0f}")
else:
    print("No favorable cases found to display rebalanced slabs data.")