In [8]:
import os
import re
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from typing import List, Dict, Set, Any
import json

def load_material_colors(filepath='material-colors.json'):
    """Load material design colors from JSON file."""
    with open(filepath, 'r') as f:
        colors = json.load(f)
    return colors

class LatencyExperimentAnalyzer:
    def __init__(self, base_path: str, fixed_params: Dict[str, Set[str]], subplot_params: List[str]):
        self.base_path = base_path
        self.fixed_params = fixed_params
        self.subplot_params = subplot_params
        self.varying_params = [
            'PARALLEL_DESIGN',
            'HEAVY_QUERY_RATE'
        ]
        
        # Load colors once at initialization
        with open('/home/vinh/Q32024/CuckooHeavyKeeper/notebooks/material-colors.json') as f:
            self.material_colors = json.load(f)

    def get_design_style(self, design: str, query_rate: str) -> dict:
        """Returns color by query rate and line pattern by design"""
        query_colors = {
            '0.000000': 'red',
            '1.000000': 'blue',
            '5.000000': 'yellow',
            '10.000000': 'green'
        }
        
        design_patterns = {
            'GLOBAL_HASHMAP': 'solid',
            'QPOPSS': 'dot'
        }
        
        return {
            'color': self.material_colors[query_colors[query_rate]]['500'],
            'dash': design_patterns[design]
        }
        
    def parse_experiment_path(self, path: str) -> Dict[str, Any]:
        """Extract experiment parameters from path."""
        params = {}
        parts = path.split(os.sep)
        for part in parts:
            if '=' in part:
                key, value = part.split('=')
                params[key] = value
        return params

    def extract_latency(self, file_content: str) -> float:
        """Extract average latency value from file content."""
        pattern = r"Raw latencies \(ns\): ([\d,]+)"
        match = re.search(pattern, file_content)
        if match:
            latencies = [int(x) for x in match.group(1).split(',')]
            return sum(latencies) / len(latencies) / 1000  # Convert to microseconds
        return 0.0

    def read_latency_file(self, folder_path: str) -> float:
        """Read heavyhitter file and extract latency value."""
        for filename in os.listdir(folder_path):
            if filename.endswith('_heavyhitter.json'):
                file_path = os.path.join(folder_path, filename)
                with open(file_path, 'r') as f:
                    content = f.read()
                    return self.extract_latency(content)
        return 0.0

    def matches_fixed_params(self, params: Dict[str, str]) -> bool:
        """Check if experiment parameters match the fixed parameters."""
        for key, values in self.fixed_params.items():
            if key in params and params[key] not in values:
                return False
        return True

    def analyze_latency_experiments(self) -> List[Dict[str, Any]]:
        """Analyze all experiments and prepare visualization data."""
        results = []
        
        for root, dirs, files in os.walk(self.base_path):
            if any(f.endswith('_heavyhitter.json') for f in files):
                params = self.parse_experiment_path(root)
                
                if not self.matches_fixed_params(params):
                    continue
                latency = self.read_latency_file(root)
                if latency > 0:
                    params['latency'] = latency
                    results.append(params)
        
        return results

    def create_latency_visualization(self, results: List[Dict[str, Any]]) -> go.Figure:
        """Create line chart visualization."""
        grouped_results = {}
        for result in results:
            key_params = tuple(
                (k, v) for k, v in result.items()
                if k in self.subplot_params
            )
            group_key = frozenset(key_params)
            if group_key not in grouped_results:
                grouped_results[group_key] = []
            grouped_results[group_key].append(result)

        fig = make_subplots(
            rows=len(grouped_results),
            cols=1,
            subplot_titles=[', '.join(f"{k}={v}" for k, v in dict(key).items())
                          for key in grouped_results.keys()],
            vertical_spacing=0.1
        )

        row = 1
        for group_key, group_results in grouped_results.items():
            design_rate_groups = {}
            
            for result in group_results:
                combo = (result['PARARLLEL_DESIGN'], result['HEAVY_QUERY_RATE'])
                if combo not in design_rate_groups:
                    design_rate_groups[combo] = []
                design_rate_groups[combo].append(result)

            for combo, combo_results in design_rate_groups.items():
                sorted_results = sorted(combo_results, key=lambda x: int(x['NUM_THREADS']))
                
                x_values = [int(r['NUM_THREADS']) for r in sorted_results]
                y_values = [r['latency'] for r in sorted_results]
                
                name = f"Design={combo[0]}, Query Rate={combo[1]}"
                
                style = self.get_design_style(
                    design=combo[0],
                    query_rate=combo[1]
                )
                
                fig.add_trace(
                    go.Scatter(
                        x=x_values,
                        y=y_values,
                        name=name,
                        mode='lines+markers',
                        line=dict(
                            color=style['color'],
                            dash=style['dash']
                        ),
                        marker=dict(
                            size=10,
                            color=style['color']
                        ),
                        legendgroup=name,
                        showlegend=(row == 1),
                        hovertemplate="Threads: %{x}<br>Latency: %{y:.2f} μs<extra></extra>"
                    ),
                    row=row,
                    col=1
                )
            
            row += 1

        fig.update_layout(
            height=400 * len(grouped_results),
            title_text="Latency Analysis",
            showlegend=True,
            template="plotly_white",
            legend=dict(
                yanchor="top",
                y=0.99,
                xanchor="left",
                x=1.01
            )
        )
        
        for i in range(len(grouped_results)):
            fig.update_xaxes(
                title_text="Number of Threads",
                row=i+1,
                col=1,
                tickmode='array',
                tickvals=sorted([int(x) for x in self.fixed_params['NUM_THREADS']])
            )
            fig.update_yaxes(
                title_text="Latency (μs)",
                row=i+1,
                col=1,
                rangemode='tozero',
                tickformat='.0f'
            )

        return fig

# Example usage
fixed_params = {
    'DIST_PARAM': {'1.500000'},
    'NUM_THREADS': {'10', '20', '30', '40', '50', '60', '70'},
    'THETA': {'0.000050'},
    'PARALLEL_DESIGN': {'GLOBAL_HASHMAP', 'QPOPSS'},
    'HEAVY_QUERY_RATE': {'0.000000', '1.000000', '10.000000'}
}

subplot_params = ['DIST_PARAM', 'THETA', 'DIST_PARAM']

analyzer = LatencyExperimentAnalyzer(
    base_path="/home/vinh/Q32024/CuckooHeavyKeeper/latency_2024_11_11",
    fixed_params=fixed_params,
    subplot_params=subplot_params
)

# results = analyzer.analyze_latency_experiments()
fig = analyzer.create_latency_visualization(results)
fig.show()

figure_path = '/home/vinh/Q32024/CuckooHeavyKeeper/latency_2024_11_11/figures'
os.makedirs(figure_path, exist_ok=True)
fig.update_layout(
                height=400,  # Single row height
                width=1000 ,
            )
fig.write_image(os.path.join(figure_path, 'par_latency_plot.pdf'))

In [3]:
results

[]