In [None]:
#@title optionally Clone Project
#!git clone https://github.com/twobob/llama2.c.git
#%cd llama2.c

In [None]:
 =================== BUILD EXECUTABLES ===================

In [None]:
#@title Build
!make winall

In [None]:
 =================== SETUP SYSTEM ===================

In [3]:
import ipywidgets as widgets
from ipywidgets import widgets, VBox, HBox, Layout, Box, Button
from IPython.display import display, clear_output
from datetime import datetime
import os
import re
import requests
import subprocess
from IPython.display import HTML



local_storage_directory = "../out/"
os.makedirs(local_storage_directory, exist_ok=True)  # Create directory if not exists
# Global variable to store the selected model file path
model_file_path = ""
output_widget = widgets.Output()

def download_model(selected_model):
    global model_file_path
    with output_widget:
        clear_output(wait=True)
        print(f"Starting download for {selected_model}...") # Debug message
        model_mapping = {
            "model15M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin",
            "model42M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories42M.bin",
            "model110M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.bin",
        }
        download_url = model_mapping[selected_model]
        print(f"download_url: {download_url}")

        model_file_path = os.path.join(local_storage_directory, selected_model + ".bin")
        print(f"Model file path: {model_file_path}")

        if not os.path.exists(model_file_path):
            print(f"File does not exist. Starting download...") # Debug message
            response = requests.get(download_url)
            with open(model_file_path, 'wb') as file:
                file.write(response.content)
            print(f"{model_file_path} downloaded successfully.")
        else:
            print(f"{model_file_path} already exists. No need to download.")

# Create a dropdown widget to select the model
model_dropdown = widgets.Dropdown(
    options=["model15M", "model42M", "model110M"],
    value="model110M",
    description="Choose model:"
)

# Button to trigger the download
download_button = widgets.Button(description="Download Model")

def on_download_button_clicked(b):
    selected_model = model_dropdown.value
    download_model(selected_model)

download_button.on_click(on_download_button_clicked)

# Display the widget along with the output widget
display(model_dropdown, download_button, output_widget)


Dropdown(description='Choose model:', index=2, options=('model15M', 'model42M', 'model110M'), value='model110M…

Button(description='Download Model', style=ButtonStyle())

Output()

In [None]:
 =================== RUN OUTPUT ===================

In [17]:
output_widget_story = widgets.Output() # Create an Output widget for the story

# Define control width
control_width = '400px'
inbox_folder = './inbox'
# Read the model files from the local storage directory
model_files = [f for f in os.listdir(local_storage_directory) if re.match(r'model\d{2,3}M\.bin', f)]
model_files.append("ALL")
model_file_paths =[]

# Create a dropdown widget to select the model
model_dropdown = widgets.Dropdown(
    options=model_files,
    value=model_files[0],
    description="Choose model:",
    width=control_width
)

# Create a dropdown widget to select the build
build_dropdown = widgets.Dropdown(
    options=["CLANG", "GCC", "MINGW", "MSVC", "ALL"],
    value="CLANG",
    description="Choose build:",
    width=control_width
)

# Create a label widget
results_label = widgets.Label(value="")

num_threads_used = 1 # default single multiplier

def generate_single_story(build, model_file_path, temperature, seed, steps, prompt, top_p, print_tokens, save_file_bool, save_log_bool, group_log_bool, single_bos, dirname, num_threads):
    if (seed==0):
        seed = int("000" + datetime.now().strftime("%M%S"))

    # Map the selected build to the corresponding command
    build_mapping = {
        "CLANG": "run",
        "GCC": "rungcc",
        "MINGW": "runmingw",
        "MSVC": "runmsvc"
    }
    run_command = build_mapping[build]

    cmd = f'{run_command} {model_file_path} -t {temperature} -s {seed} -n {steps} -i "{prompt}" -p {top_p} -k {int(print_tokens)} -f {int(save_file_bool)} -l {int(save_log_bool)} -g {int(group_log_bool)} -b {int(single_bos)} -d "{dirname}"'

    env = os.environ.copy()
    env["OMP_NUM_THREADS"] = str(num_threads)

    with output_widget_story:
        if build != "ALL":
            clear_output(wait=True)
        print(f"Running command for {build} build:\n{cmd}")
        result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
        output = result.stdout.decode('utf-8')
        error_output = result.stderr.decode('utf-8')
        print(f"Generated Story:\n{output}")
        results_label.value  += output
        if error_output:
            print(f" Output:\n{error_output}")
            results_label.value  += error_output 

def generate_stories(build, model, temperature, seed, steps, prompt, top_p, dirname, num_threads, 
                     print_tokens=True, save_file_bool=False, save_log_bool=True, group_log_bool=True, single_bos=True):
    results_label.value  += '' 
    # If "ALL" is selected, iterate over the individual builds
    builds = ["GCC", "MINGW", "MSVC", "CLANG"] if build == "ALL" else [build]
    
    # If "ALL" is selected, iterate over all models; otherwise, use the selected model
    model_file_paths = [f"{local_storage_directory}/{m}" for m in model_files if model == "ALL" or m == model]

    all_model = f"{local_storage_directory}/ALL"
    
    # Remove "ALL" from the list of model_file_paths if present
    if all_model in model_file_paths:
        model_file_paths.remove(all_model)
    
    for selected_build in builds:
        for model_file_path in model_file_paths:
            generate_single_story(selected_build, model_file_path, temperature, seed, steps, prompt, top_p, 
                                  print_tokens, save_file_bool, save_log_bool, group_log_bool, single_bos, dirname, num_threads)

# Define a custom CSS class with the desired width for the widget labels
custom_css = """
<style>
    .custom-label-width label {
        width: 150px !important;
    }
</style>
"""

# Display the custom CSS in the notebook
display(HTML(custom_css))

# Function to wrap widgets with custom label width class
def wrap_with_custom_label(widget):
    return Box([widget], layout=Layout(display="flex"), _dom_classes=["custom-label-width"])

# Create widgets with the adjusted control width
temperature_slider = widgets.FloatSlider(value=1.0, min=0.0, max=1, step=0.05, description="Temperature:", layout=Layout(width=control_width), continuous_update=False)
seed_slider = widgets.IntSlider(value=0, min=0, max=10000, step=1, description="Random Seed:", layout=Layout(width=control_width), continuous_update=False)
steps_slider = widgets.IntSlider(value=0, min=0, max=1024, step=32, description="Number of Steps:", layout=Layout(width=control_width), continuous_update=False)
prompt_text = widgets.Text(value="Eloquently", description="Prompt:", layout=Layout(width=control_width), continuous_update=False)
top_p_slider = widgets.FloatSlider(value=1.0, min=0.0, max=1.0, step=0.05, description="Top P:", layout=Layout(width=control_width), continuous_update=False)
num_threads_slider = widgets.IntSlider(value=4, min=1, max=16, step=1, description="Number of Threads:", layout=Layout(width=control_width), continuous_update=False)
dirname_text = widgets.Text(value="inbox", description="Dirname:", layout=Layout(width=control_width), continuous_update=False)
print_tokens_checkbox = widgets.Checkbox(value=True, description="Print Tokens:", layout=Layout(width=control_width))
save_file_bool_checkbox = widgets.Checkbox(value=False, description="Save File:", layout=Layout(width=control_width))
save_log_bool_checkbox = widgets.Checkbox(value=True, description="Save Log:", layout=Layout(width=control_width))
group_log_bool_checkbox = widgets.Checkbox(value=True, description="Save Log in groups:", layout=Layout(width=control_width))
single_bos_checkbox = widgets.Checkbox(value=True, description="Single BOS:", layout=Layout(width=control_width))

build_dropdown = widgets.Dropdown(options=["ALL", "GCC", "MINGW", "MSVC", "CLANG"], description="Choose build:")
build_box = wrap_with_custom_label(build_dropdown)
temperature_box = wrap_with_custom_label(temperature_slider)
seed_box = wrap_with_custom_label(seed_slider)
seed_box = wrap_with_custom_label(seed_slider)
steps_box = wrap_with_custom_label(steps_slider)
prompt_text_box = wrap_with_custom_label(prompt_text)
top_p_box = wrap_with_custom_label(top_p_slider)
num_threads_box = wrap_with_custom_label(num_threads_slider)
dirname_text_box = wrap_with_custom_label(dirname_text)
print_tokens_checkbox_box = wrap_with_custom_label(print_tokens_checkbox)
save_file_bool_checkbox_box = wrap_with_custom_label(save_file_bool_checkbox)
save_log_bool_checkbox_box = wrap_with_custom_label(save_log_bool_checkbox)
group_log_bool_checkbox_box = wrap_with_custom_label(group_log_bool_checkbox)
single_bos_checkbox_box = wrap_with_custom_label(single_bos_checkbox)

widgets_to_display = [
    build_box,
    wrap_with_custom_label(model_dropdown), # Add the model dropdown
    temperature_box,
    seed_box,
    steps_box,
    prompt_text_box,
    top_p_box,
    dirname_text_box,
    num_threads_box,
    print_tokens_checkbox_box,
    save_file_bool_checkbox_box,
    save_log_bool_checkbox_box,
    group_log_bool_checkbox_box,
    single_bos_checkbox_box,
    results_label
]

# Button to trigger the generation manually
generate_button = widgets.Button(description="Generate Stories",layout=Layout(width=control_width))

def on_generate_button_clicked(b):
    generate_stories(build=build_dropdown.value, 
                     model=model_dropdown.value, # Add the selected model
                     temperature=temperature_slider.value, 
                     seed=seed_slider.value, 
                     steps=steps_slider.value, 
                     prompt=prompt_text.value, 
                     top_p=top_p_slider.value, 
                     dirname=dirname_text.value, 
                     num_threads=num_threads_slider.value,
                     print_tokens=print_tokens_checkbox.value, 
                     save_file_bool=save_file_bool_checkbox.value, 
                     save_log_bool=save_log_bool_checkbox.value, 
                     group_log_bool=group_log_bool_checkbox.value, 
                     single_bos=single_bos_checkbox.value)

generate_button.on_click(on_generate_button_clicked)


def run_reports(_=None): # _ is a placeholder for the button's click event object
    if os.path.exists(inbox_folder):
        # Combine the widgets in a layout
        interactive_widget = widgets.VBox(widgets_to_display + [generate_button])
        
        # Display the widgets
        display(interactive_widget, output_widget_story)
    else:
        print(f"The folder '{inbox_folder}' does not exist. Please make sure the folder is available.")

button_run_reports = Button(description="Run Reports")
button_run_reports.on_click(run_reports)

#display(button_run_reports)

# Call run_reports function to execute the code block initially
run_reports()



VBox(children=(Box(children=(Dropdown(description='Choose build:', options=('ALL', 'GCC', 'MINGW', 'MSVC', 'CL…

Output()

 =================== REPORTS LOGGING ===================

In [16]:
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import make_interp_spline
from scipy.interpolate import interp1d
import seaborn as sns
import matplotlib.colors as mcolors
from IPython.display import display
import ipywidgets as widgets
import plotly.express as px
import pandas as pd
import glob
import os

def calculate_tokens_per_second_filtered(cumulative_time):
    global num_threads_used

    time_diffs_seconds = np.diff(cumulative_time) / 1000
    time_diffs_seconds_filtered = time_diffs_seconds[time_diffs_seconds != 0]
    tokens_per_second_unadjusted = 1 / time_diffs_seconds_filtered

    # Multiplying the tokens per second by the number of threads used  / APROXIMATION
    tokens_per_second = tokens_per_second_unadjusted * num_threads_used

    return tokens_per_second

def process_log_file_per_run_filtered(data_frame):
    runs_tokens_per_second = []
    labels = []
    for index, row in data_frame.iterrows():
        build_type, model_size, _ = row[0].split('-')
        run_order = index + 1
        label = f"{build_type} - {model_size}M - Run {run_order}"
        labels.append(label)
        timestamps = row[1:].dropna().values
        cumulative_time = np.cumsum(np.diff(timestamps))
        tokens_per_second = calculate_tokens_per_second_filtered(cumulative_time)
        runs_tokens_per_second.append(tokens_per_second)
    return runs_tokens_per_second, labels

def smooth_data(data, window_size=1):
    data_series = pd.Series(data)
    smoothed_series = data_series.rolling(window=window_size, min_periods=1).mean()
    return smoothed_series.values

def plot_smoothed_tokens_per_second(file_name, tokens_per_second_data, labels, window_size=1, usePlotly=False):
    # Custom palette for red-green colorblindness
    colorblind_safe_palette = sns.color_palette([
        "#0000FF", # Blue
        "#FF00FF", # Fuchsia
        "#FFA500", # Orange
        "#808080", # Gray
        "#800080", # Purple
        "#00FFFF", # Aqua
    ])   
    if usePlotly:
        # Plotly code
        fig_data = []
        group_colors = {}
        colors = iter(colorblind_safe_palette)
        for run_index, run_data in enumerate(tokens_per_second_data):
            label = labels[run_index]
            build_type, model_size, _ = label.split(' - ')
            group_key = f"{build_type} - {model_size}"
            if group_key not in group_colors:
                group_colors[group_key] = mcolors.to_hex(next(colors)) # Convert to hex
            color = group_colors[group_key]
            smoothed_data = smooth_data(run_data, window_size)
            x_values = np.linspace(0, len(smoothed_data) - 1, len(smoothed_data))
            spl = make_interp_spline(x_values, smoothed_data, k=3)
            x_smooth = np.linspace(x_values.min(), x_values.max(), 300)
            y_smooth = spl(x_smooth)
            fig_data.append((x_smooth, y_smooth, label, color, group_key))

        df = pd.DataFrame([{
            'x': x, 'y': y, 'label': label, 'color': color, 'group_key': group_key
        } for x_vals, y_vals, label, color, group_key in fig_data for x, y in zip(x_vals, y_vals)])

        fig = px.line(df, x='x', y='y', color='group_key', line_dash_sequence=['solid'],
                      hover_data=['label'], title=f"Smoothed Tokens per Second for {file_name}",
                      labels={'x': 'Timestamp', 'y': 'Smoothed Tokens per Second'},
                      color_discrete_map={group_key: color for _, _, _, color, group_key in fig_data})

        fig.show()
    else:
        # Matplotlib code
        plt.figure(figsize=(10, 6))
        group_colors = {}
        colors = iter(colorblind_safe_palette)
        for run_index, run_data in enumerate(tokens_per_second_data):
            label = labels[run_index]
            build_type, model_size, _ = label.split(' - ')
            group_key = f"{build_type} - {model_size}"
            if group_key not in group_colors:
                group_colors[group_key] = next(colors)
            color = group_colors[group_key]
            smoothed_data = smooth_data(run_data, window_size)
            x_values = np.linspace(0, len(smoothed_data) - 1, len(smoothed_data))
            spl = make_interp_spline(x_values, smoothed_data, k=3)
            x_smooth = np.linspace(x_values.min(), x_values.max(), 300)
            y_smooth = spl(x_smooth)
            plt.plot(x_smooth, y_smooth, label=group_key if group_key not in plt.gca().get_legend_handles_labels()[1] else "", color=color)
        plt.title(f"Smoothed Tokens per Second for {file_name}")
        plt.xlabel("Timestamp")
        plt.ylabel("Smoothed Tokens per Second")
        plt.legend()
        plt.show()

def interactive_plot(file_name, tokens_per_second_data, labels, usePlotly=False):
    def plot_with_smoothing(smoothing_level):
        plot_smoothed_tokens_per_second(file_name, tokens_per_second_data, labels, window_size=smoothing_level, usePlotly=usePlotly)
    smoothing_levels = [1, 2, 4, 8, 16, 32, 64]
    smoothing_dropdown = widgets.Dropdown(options=smoothing_levels, value=64, description='Smoothing:')
    interactive_widget = widgets.interactive(plot_with_smoothing, smoothing_level=smoothing_dropdown)
    display(interactive_widget)

inbox_folder = './inbox'

def interactive_general_plot(tokens_per_second_data, labels, file_name):
    def general_plot(plot_type):
        colorblind_safe_palette = sns.color_palette([
            "#0000FF", # Blue
            "#FF00FF", # Fuchsia
            "#FFA500", # Orange
            "#808080", # Gray
            "#800080", # Purple
            "#00FFFF", # Aqua
        ])  
        x_values = np.linspace(0, len(tokens_per_second_data[0]) - 1, len(tokens_per_second_data[0]))

        plt.figure(figsize=(10, 6))

        if plot_type == 'Box':
            plt.boxplot(tokens_per_second_data)
        else:
            group_colors = {}
            colors = iter(colorblind_safe_palette)
            for label in labels:
                build_type, model_size, _ = label.split(' - ')
                group_key = f"{build_type} - {model_size}"
                if group_key not in group_colors:
                    group_colors[group_key] = next(colors)

            custom_lines = [plt.Line2D([0], [0], color=color, lw=4) for color in group_colors.values()]

            for run_index, run_data in enumerate(tokens_per_second_data):
                if len(x_values) != len(run_data):
                    if len(x_values) < len(run_data):
                        f = interp1d(range(len(run_data)), run_data, fill_value="extrapolate")
                        run_data = f(np.linspace(0, len(run_data) - 1, len(x_values)))
                    else:
                        f = interp1d(range(len(x_values)), x_values, fill_value="extrapolate")
                        x_values = f(np.linspace(0, len(x_values) - 1, len(run_data)))

                label = labels[run_index]
                build_type, model_size, _ = label.split(' - ')
                group_key = f"{build_type} - {model_size}"
                color = group_colors[group_key]
                
                if plot_type == 'Line':
                    plt.plot(x_values, run_data, color=color)
                elif plot_type == 'Scatter':
                    plt.scatter(x_values, run_data, color=color)
                elif plot_type == 'Histogram':
                    plt.hist(run_data, bins=20, alpha=0.5, color=color)

        if plot_type != 'Box':
            plt.legend(custom_lines, group_colors.keys())

        plt.title(f"{plot_type} Plot for {file_name}")
        plt.xlabel("Timestamp")
        plt.ylabel("Tokens per Second")
        plt.show()

    plot_types = ['Line', 'Scatter', 'Histogram', 'Box']
    plot_type_dropdown = widgets.Dropdown(options=plot_types, description='Plot Type:')
    interactive_widget = widgets.interactive(general_plot, plot_type=plot_type_dropdown)
    display(interactive_widget)

if os.path.exists(inbox_folder):
    csv_files = glob.glob(os.path.join(inbox_folder, '*.csv'))
    csv_data = {}
    for file_path in csv_files:
        file_name = os.path.basename(file_path)
        data_list = []
        with open(file_path, 'r') as file:
            for line in file:
                if line.strip():
                    row_data = line.strip().split(',')
                    row_data[1:] = [float(x) if x.strip() and x.replace('.', '', 1).isdigit() else float('nan') for x in row_data[1:]]
                    data_list.append(row_data)
        csv_data[file_name] = pd.DataFrame(data_list)

    analysis_results = {}
    for file_name, data in csv_data.items():
        tokens_per_second_data, labels = process_log_file_per_run_filtered(data)
        analysis_results[file_name] = (tokens_per_second_data, labels)

    for file_name, (tokens_per_second_data, labels) in analysis_results.items():
        interactive_plot(file_name, tokens_per_second_data, labels, usePlotly=False) # Set to True to use Plotly

    for file_name, (tokens_per_second_data, labels) in analysis_results.items():
        interactive_general_plot(tokens_per_second_data, labels, file_name)

    # TODO:
    #for file_name, (tokens_per_second_data, labels) in analysis_results.items():
    #    interactive_plot(file_name, tokens_per_second_data, labels, usePlotly=True) # Set to True to use Plotly

else:
    print(f"The folder '{inbox_folder}' does not exist. Please make sure the folder is available.")


#display(button_run_reports)


interactive(children=(Dropdown(description='Smoothing:', index=6, options=(1, 2, 4, 8, 16, 32, 64), value=64),…

interactive(children=(Dropdown(description='Smoothing:', index=6, options=(1, 2, 4, 8, 16, 32, 64), value=64),…

interactive(children=(Dropdown(description='Smoothing:', index=6, options=(1, 2, 4, 8, 16, 32, 64), value=64),…

interactive(children=(Dropdown(description='Plot Type:', options=('Line', 'Scatter', 'Histogram', 'Box'), valu…

interactive(children=(Dropdown(description='Plot Type:', options=('Line', 'Scatter', 'Histogram', 'Box'), valu…

interactive(children=(Dropdown(description='Plot Type:', options=('Line', 'Scatter', 'Histogram', 'Box'), valu…