# GPU Cost Analysis

In [1]:
from datasets import load_dataset

ds = load_dataset("allenai/WildChat-1M")

In [84]:
from pathlib import Path

import pandas as pd
import requests

path = Path("data/raw-lmsys-data.json")
# we use the latest data
url = "https://storage.googleapis.com/arena_external_data/public/clean_battle_20240814_public.json"

if not path.exists():
    response = requests.get(url)

    with path.open("wb") as file:
        file.write(response.content)

# load the JSON data from the local file
with path.open() as file:
    battles = pd.read_json(file).sort_values(ascending=True, by=["tstamp"])

In [89]:
from datetime import datetime

tses = [datetime.utcfromtimestamp(ts) for ts in battles.tstamp.tolist()]

In [92]:
battles["dt"] = tses

In [102]:
counts = battles.iloc[-300000:].groupby("model_a").count().sort_values("model_b").model_b

In [105]:
pop = counts / counts.sum()

In [114]:
new_df = counts.reset_index()

In [115]:
new_df

Unnamed: 0,model_a,model_b
0,gemma-1.1-7b-it,49
1,phi-3-mini-4k-instruct,396
2,glm-4-0520,1224
3,phi-3-small-8k-instruct,1386
4,deepseek-coder-v2-0724,2109
5,mistral-large-2402,2269
6,command-r,2343
7,yi-1.5-34b-chat,2386
8,nemotron-4-340b-instruct,2588
9,gpt-3.5-turbo-0125,2743


In [22]:
rows = []

for idx in range(3000):  # a bit more than one day
    rows.append(ds["train"][idx])

In [34]:
from collections import Counter

import tiktoken

encoding = tiktoken.encoding_for_model("gpt-4")
user_counter = Counter()
turn_text_lengths = []

for row in rows:
    conversation = row["conversation"]
    hashed_ip = row["hashed_ip"]
    user_counter[hashed_ip] += len(conversation) // 2

    for turn in conversation:
        turn_text_lengths.append(len(encoding.encode(turn["content"])))

In [35]:
import numpy as np

np.mean(list(user_counter.values()))  # average number of Qs per day per user

7.508445945945946

In [36]:
2 * np.mean(turn_text_lengths)  # average dialogue length (multiply by two for in + out)

479.64409448818895

In [118]:
import pandas as pd

df = pd.read_csv("data/table.csv")
full_df = pd.read_csv("data/full-table.csv")

In [119]:
full_df[full_df.License == "Proprietary"].Votes.sum() / full_df.Votes.sum()

0.6798251708171085

In [120]:
prop_df = full_df[full_df.License == "Proprietary"]

In [121]:
prop_df["Popularity"] = prop_df.Votes / prop_df.Votes.sum() * 100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prop_df['Popularity'] = prop_df.Votes / prop_df.Votes.sum() * 100


In [122]:
prop_df = prop_df.sort_values("Popularity", ascending=False).iloc[:25]

In [123]:
prop_df["PPM"] = [
    15.0 * 0.8 + 75.0 * 0.2,  # Claude 3 Opus
    3.0 * 0.8 + 15.0 * 0.2,  # Claude 3 Sonnet
    0.25 * 0.8 + 1.25 * 0.2,  # Claude 3 Haiku
    5.0 * 0.8 + 15.0 * 0.2,  # GPT-4-1106-preview (estimated)
    5.0 * 0.8 + 15.0 * 0.2,  # GPT-4-0125-preview (estimated)
    10.0 * 0.8 + 30.0 * 0.2,  # GPT-4-Turbo-2024-04-09
    4,  # GPT-4o-2024-05-13
    3.50 * 0.8 + 10.0 * 0.2,  # Gemini-1.5-Pro-001
    3.0 * 0.8 + 6.0 * 0.2,  # GPT-3.5-Turbo-0125
    None,  # Reka-Core-20240501 (not available)
    0.35 * 0.8 + 0.35 * 0.2,  # Gemini-1.5-Flash-001
    10.0 * 0.8 + 30.0 * 0.2,  # GPT-4-0314
    3.50 * 0.8 + 10.0 * 0.2,  # Gemini-1.5-Pro-Preview-0409
    3.50 * 0.8 + 10.0 * 0.2,  # Gemini Advanced App (2024-05-14)
    None,  # Yi-Large-preview (not available)
    3.0 * 0.8 + 15.0 * 0.2,  # Claude 3.5 Sonnet
    8.0 * 0.8 + 24.0 * 0.2,  # Claude-2.1
    None,  # Qwen-Max-0428 (not available)
    3.50 * 0.8 + 15.0 * 0.2,  # Gemini-1.5-Pro-Exp-0801
    0.3,  # GPT-4o-mini-2024-07-18
    4,  # ChatGPT-4o-latest (2024-08-08)
    0.25 * 0.8 + 1.25 * 0.2,  # Claude-Instant-1
    None,  # Reka-Flash-Preview-20240611 (not available)
    None,  # Yi-Large (not available)
    5.0 * 0.8 + 15.0 * 0.2,  # GPT-4o-2024-08-06
]

In [124]:
prop_df["Popularity"] = prop_df.Popularity / prop_df.Popularity.sum()

In [125]:
prop_df["cost weight"] = prop_df.PPM * prop_df.Popularity

In [132]:
prop_df.sort_values("cost weight", ascending=False)

Unnamed: 0,Rank,Model,Arena_Score,95% CI,Votes,Organization,License,Knowledge_Cutoff,Popularity,PPM,cost weight
17,14,Claude 3 Opus,1248.0,+3/-3,158077.0,Anthropic,Proprietary,2023/8,0.032336,27.0,2.924139
13,10,GPT-4-Turbo-2024-04-09,1257.0,+2/-3,86648.0,OpenAI,Proprietary,2023/12,0.023633,14.0,0.831098
36,35,GPT-4-0314,1186.0,+3/-3,55962.0,OpenAI,Proprietary,2021/9,0.015263,14.0,0.536768
16,14,GPT-4-1106-preview,1251.0,+3/-3,93540.0,OpenAI,Proprietary,2023/4,0.076538,7.0,0.448602
31,28,Claude 3 Sonnet,1201.0,+3/-2,113042.0,Anthropic,Proprietary,2023/8,0.184992,5.4,0.418215
19,15,GPT-4-0125-preview,1245.0,+3/-3,86921.0,OpenAI,Proprietary,2023/12,0.071122,7.0,0.416858
50,61,Claude-2.1,1118.0,+4/-3,37685.0,Anthropic,Proprietary,Unknown,0.030835,11.2,0.289169
10,8,Gemini-1.5-Pro-001,1260.0,+3/-2,72623.0,Google,Proprietary,2023/11,0.059423,4.8,0.238826
3,3,GPT-4o-2024-05-13,1286.0,+3/-3,80741.0,OpenAI,Proprietary,2023/10,0.099099,4.0,0.221269
5,5,Claude 3.5 Sonnet,1271.0,+3/-3,51097.0,Anthropic,Proprietary,2024/4,0.04181,5.4,0.18904


In [127]:
prop_df.at[17, "Popularity"] /= 4
prop_df.at[31, "Popularity"] *= 2
prop_df.at[13, "Popularity"] /= 3
prop_df.at[36, "Popularity"] /= 3
prop_df.at[3, "Popularity"] *= 1.5

In [135]:
prop_df.at[31, "Popularity"] /= 1.5

In [128]:
prop_df.at[3, "PPM"] = 4
prop_df.at[4, "PPM"] = 0.3
prop_df.at[9, "PPM"] = 4
prop_df.at[0, "PPM"] = 4

In [129]:
prop_df = prop_df.dropna()

In [130]:
prop_df["Popularity"] = prop_df.Popularity / prop_df.Popularity.sum()

In [137]:
(prop_df.Popularity * prop_df.PPM).sum()

5.289747645114814

prop_df.sort_values('Popularity', ascending=False)

In [71]:
df.sort_values("NPBillions")

Unnamed: 0,Rank,Model,Arena_Score,95% CI,Votes,Organization,License,Knowledge_Cutoff,NPBillions
51,133,Gemma-2b-it,799.0,+10/-12,2443.0,Meta,Non-commercial,2023/2,2.0
31,101,Gemma-1.1-2b-it,1020.0,+4/-6,11351.0,Google,Gemma license,2024/2,2.0
11,58,Gemma-2-2b-it,1132.0,+5/-5,18780.0,Google,Gemma license,2024/7,2.0
36,114,Gemma-2b-it,990.0,+9/-9,4924.0,Google,Gemma license,2024/2,2.0
20,69,Gemma-1.1-2b-it,1099.0,+9/-8,6658.0,AllenAI/UW,AI2 ImpACT Low-risk,2023/11,2.0
47,128,FastChat-T5-3B,879.0,+9/-11,4993.0,Tsinghua,Apache 2.0,2023/3,3.0
28,101,Phi-3-Mini-128k-Instruct,1037.0,+5/-3,21592.0,Microsoft,MIT,2023/10,3.8
37,119,Qwen1.5-4B-Chat,990.0,+9/-9,4924.0,Google,Gemma license,2024/2,4.0
42,123,ChatGLM2-6B,928.0,+12/-10,4016.0,MosaicML,Apache-2.0,2023/5,6.0
46,127,ChatGLM-6B,894.0,+8/-7,6381.0,OpenAssistant,Non-commercial,2023/4,6.0


In [40]:
df = df[df.NPBillions < 100]

In [56]:
def weighted_median(values, weights):
    i = np.argsort(values).astype(int)
    c = np.cumsum(np.array(weights)[i])
    return values[i[np.searchsorted(c, 0.5 * c[-1])]]


weighted_median(df.NPBillions.astype(int).tolist(), (df.Votes / df.Votes.sum()).tolist())

14

In [61]:
df["Popularity"] = df.Votes / df.Votes.sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Popularity'] = df.Votes / df.Votes.sum()


In [66]:
df["NPRound"] = ((df.NPBillions.astype(int) + 10) / 10).astype(int) * 10

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['NPRound'] = ((df.NPBillions.astype(int) + 10) / 10).astype(int) * 10


In [68]:
df.groupby("NPRound").sum("weights")

Unnamed: 0_level_0,Rank,Arena_Score,Votes,NPBillions,weights,Popularity
NPRound,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10,2328,23167.0,211930.0,125.8,0.374245,0.374245
20,1244,11832.0,103528.0,157.0,0.182819,0.182819
30,23,1218.0,30040.0,27.0,0.053047,0.053047
40,419,5359.0,51845.0,163.0,0.091553,0.091553
60,65,1114.0,76039.0,56.0,0.134276,0.134276
80,140,6040.0,89368.0,352.0,0.157814,0.157814
100,23,1213.0,3537.0,94.0,0.006246,0.006246
