In [1]:

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import json

In [2]:
from transformers import AutoTokenizer
tokenizer_path = "./tokenizer/Qwen/Qwen2.5-72B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(
    tokenizer_path,
    trust_remote_code=True,
    local_files_only=True
)

In [3]:

def count_tokens_from_local(text: str) -> int:
    if not text:
        return 0

    return len(tokenizer.encode(text, add_special_tokens=False))


In [4]:


# 使用示例

text = "Hello world! 你好世界！"
print(count_tokens_from_local(text))

7


In [5]:
model_params_dict = {
    'qwen/qwq-32b': {
        'total_params': '32B',
        'architecture': 'Dense',
        'activated_params': '32B'
    },
    'google/gemini-2.0-flash-exp:free': {
        'total_params': '32B',
        'architecture': 'Dense',
        'activated_params': '32B'
    },
    'google/gemini-2.0-flash-thinking-exp:free': {
        'total_params': '32B',
        'architecture': 'Dense',
        'activated_params': '32B'
    },
    'deepseek/deepseek-chat': {
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'deepseek/deepseek-r1-distill-llama-70b': {
        'total_params': '70B',
        'architecture': 'Dense',
        'activated_params': '70B'
    },
    'qwen/qwq-32b-preview': {
        'total_params': '32B',
        'architecture': 'Dense',
        'activated_params': '32B'
    },
    'anthropic/claude-3.7-sonnet:thinking': { # Estimated
        'total_params': '175B',
        'architecture': 'Dense',
        'activated_params': '175B'
    },
    'google/gemini-2.0-pro-exp-02-05:free': { # Estimated
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'deepseek/deepseek-r1': {
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'anthropic/claude-3.5-haiku-20241022': { # Estimated
        'total_params': '20B',
        'architecture': 'Dense',
        'activated_params': '20B'
    },
    'deepseek/deepseek-r1-distill-qwen-32b': {
        'total_params': '32B',
        'architecture': 'Dense',
        'activated_params': '32B'
    },
    'openai/o1': { # Estimated
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'openai/gpt-4.5-preview': { # Estimated
        'total_params': '500B',
        'architecture': 'Dense',
        'activated_params': '500B'
    },
    'mistralai/mistral-large-2411': {
        'total_params': '123B',
        'architecture': 'Dense',
        'activated_params': '123B'
    },
    'openai/o3-mini-high': { # Estimated
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
     'amazon/nova-lite-v1': { # Estimated
        'total_params': '8B',
        'architecture': 'Dense',
        'activated_params': '8B'
    },
    'sammcj/qwen2.5-dracarys2-72b:Q4_K_M': {
        'total_params': '72B',
        'architecture': 'Dense',
        'activated_params': '72B'
    },
    'meta-llama/llama-3.1-405b-instruct': {
        'total_params': '405B',
        'architecture': 'Dense',
        'activated_params': '405B'
    },
    'openai/o3-mini': { # Estimated
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'openai/gpt-4o-2024-11-20': { # Estimated
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'openai/gpt-4o-mini': { # Estimated
        'total_params': '7B',
        'architecture': 'Dense',
        'activated_params': '7B'
    },
    'google/gemma-2-27b-it': {
        'total_params': '27B',
        'architecture': 'Dense',
        'activated_params': '27B'
    },
    'mistralai/mistral-small-24b-instruct-2501': {
        'total_params': '24B',
        'architecture': 'Dense',
        'activated_params': '24B'
    },
    'openai/gpt-4-turbo': { # Estimated
        'total_params': '20B',
        'architecture': 'Dense',
        'activated_params': '20B'
    },
    'mistralai/mistral-small': {
        'total_params': '24B',
        'architecture': 'Dense',
        'activated_params': '24B'
    },
    'openai/o1-mini': { # Estimated
        'total_params': '7B',
        'architecture': 'Dense',
        'activated_params': '7B'
    },
    'meta-llama/llama-3.1-70b-instruct': {
        'total_params': '70B',
        'architecture': 'Dense',
        'activated_params': '70B'
    },
    'qwen/qwen-2.5-72b-instruct': {
        'total_params': '72B',
        'architecture': 'Dense',
        'activated_params': '72B'
    },
    'x-ai/grok-2-1212': { # Estimated
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'google/gemini-2.0-flash-lite-001': { # Estimated
        'total_params': '8B',
        'architecture': 'Dense',
        'activated_params': '8B'
    },
    'qwen/qwen-2.5-coder-32b-instruct': {
        'total_params': '32B',
        'architecture': 'Dense',
        'activated_params': '32B'
    },
    'qwen/qwen-2.5-7b-instruct': {
        'total_params': '7B',
        'architecture': 'Dense',
        'activated_params': '7B'
    },
    'qwen/qwen-max': { # Estimated
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'anthropic/claude-3.5-sonnet': { # Estimated
        'total_params': '175B',
        'architecture': 'Dense',
        'activated_params': '175B'
    },
    'step-2-16k-202411': { # Estimated
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'microsoft/phi-4': {
        'total_params': '14B',
        'architecture': 'Dense',
        'activated_params': '14B'
    },
    'google/gemini-pro-1.5': { # Estimated
        'total_params': '671B',
        'architecture': 'MoE',
        'activated_params': '37B'
    },
    'meta-llama/llama-3.3-70b-instruct': {
        'total_params': '70B',
        'architecture': 'Dense',
        'activated_params': '70B'
     },
    'anthropic/claude-3.7-sonnet': { # Estimated
        'total_params': '175B',
        'architecture': 'Dense',
        'activated_params': '175B'
    },
    'anthropic/claude-3-opus': { # Estimated
        'total_params': '500B',
        'architecture': 'Dense',
        'activated_params': '500B'
    },
    'amazon/nova-pro-v1': { # Estimated
        'total_params': '70B',
        'architecture': 'Dense',
        'activated_params': '70B'
    }
}


# print(json.dumps(model_params_dict, indent=4))

In [6]:
def ecologits_calculator_per100_output_token(active_parameters, total_parameters):
    # data from:
    # https://huggingface.co/spaces/genai-impact/ecologits-calculator
    res = {          # (Wh  , gCO2eq)
        ('70B','70B'): (1.25, 0.762),
        ('32B','32B'): (0.714, 0.437),
        ('37B','671B'): (4.7, 2.88),
        ('175B','175B'): (5.44, 3.32),
        ('20B','20B'): (0.545, 0.335),
        ('123B','123B'): (1.99, 1.22),
        ('8B','8B'): (0.377, 0.232),
        ('72B','72B'): (1.27, 0.779),
        ('405B','405B'): (23.8, 14.5),
        ('7B','7B'): (0.363, 0.223),
        ('27B', '27B'): (0.643, 0.394),
        ('24B', '24B'): (0.601, 0.369),
        ('14B', '14B'): (0.461, 0.283),
        ('500B', '500B'): (29.1, 17.8),
        
    }
    return res[(active_parameters, total_parameters)]

In [7]:
for k,v in model_params_dict.items():
    # print(k)
    active = v['activated_params']
    
    total = v['total_params']
    res = ecologits_calculator_per100_output_token(active, total)
    # print(res)
    
    energy_Wh = res[0]
    GHG_emissions = res[1]
    
    print(k.ljust(50),energy_Wh, 'Wh'.ljust(20),GHG_emissions, 'gCO2eq'.ljust(20))

qwen/qwq-32b                                       0.714 Wh                   0.437 gCO2eq              
google/gemini-2.0-flash-exp:free                   0.714 Wh                   0.437 gCO2eq              
google/gemini-2.0-flash-thinking-exp:free          0.714 Wh                   0.437 gCO2eq              
deepseek/deepseek-chat                             4.7 Wh                   2.88 gCO2eq              
deepseek/deepseek-r1-distill-llama-70b             1.25 Wh                   0.762 gCO2eq              
qwen/qwq-32b-preview                               0.714 Wh                   0.437 gCO2eq              
anthropic/claude-3.7-sonnet:thinking               5.44 Wh                   3.32 gCO2eq              
google/gemini-2.0-pro-exp-02-05:free               4.7 Wh                   2.88 gCO2eq              
deepseek/deepseek-r1                               4.7 Wh                   2.88 gCO2eq              
anthropic/claude-3.5-haiku-20241022                0.545 Wh        

In [8]:
df = pd.read_csv('./csvs/liveideabench_hf.csv')

In [9]:
df['critique_reasoning']

0                                                       NaN
1         Let me evaluate this AI-CAD plug-in idea for r...
2                                                       NaN
3                                                       NaN
4                                                       NaN
                                ...                        
286487    Let's break down this idea and evaluate it acr...
286488                                                  NaN
286489                                                  NaN
286490                                                  NaN
286491                                                  NaN
Name: critique_reasoning, Length: 286492, dtype: object

In [10]:
df['parsed_scores']

0         {'originality': 8, 'feasibility': 6, 'clarity'...
1         {'originality': 7, 'feasibility': 6, 'clarity'...
2         {'originality': 9, 'feasibility': 6, 'clarity'...
3         {'originality': 7, 'feasibility': 6, 'clarity'...
4         {'originality': 8, 'feasibility': 6, 'clarity'...
                                ...                        
286487    {'originality': 4, 'feasibility': 6, 'clarity'...
286488    {'originality': 5, 'feasibility': 6, 'clarity'...
286489    {'originality': 8, 'feasibility': 7, 'clarity'...
286490    {'originality': 7, 'feasibility': 5, 'clarity'...
286491    {'originality': 6, 'feasibility': 5, 'clarity'...
Name: parsed_scores, Length: 286492, dtype: object

In [11]:
df.columns

Index(['keywords', 'idea_model', 'critic_model', 'idea', 'raw_critique',
       'parsed_scores', 'critique_reasoning', 'full_response',
       'first_was_rejected', 'first_reject_response', 'idea_length_in_char',
       'idea_length_in_words', 'scores', 'originality', 'feasibility',
       'clarity', 'fluency', 'avg', 'full_response_length'],
      dtype='object')

In [12]:
df['idea'].unique().__len__()

95738

In [13]:
df.__len__()

286492

# CO2 emission of generating ideas

In [14]:
from tqdm import tqdm

def count_tokens_by_idea_model(df):
    # 创建一个字典来存储结果
    token_counts = {}
    
    # 获取所有唯一的idea_model
    unique_models = df['idea_model'].unique()
    
    # 使用tqdm显示模型处理进度
    for model in tqdm(unique_models, desc="处理模型"):
        # 获取该模型生成的所有idea
        model_ideas = df[df['idea_model'] == model]['full_response']
        
        # 使用set获取唯一的idea
        unique_ideas = set(model_ideas)
        
        # 计算所有唯一idea的token总数，使用tqdm显示每个模型内的idea处理进度
        total_tokens = 0
        for idea in tqdm(unique_ideas, desc=f"处理{model}的idea", leave=False):
            try:
                total_tokens += count_tokens_from_local(idea)
            except:
                print('idea error:',idea)
        
        # 存储结果
        token_counts[model] = total_tokens
    
    return token_counts

# 使用函数计算各个idea_model的token总数
result = count_tokens_by_idea_model(df)
print(result)

处理模型:  10%|▉         | 4/41 [00:16<01:44,  2.82s/it]

idea error: nan


处理模型: 100%|██████████| 41/41 [01:45<00:00,  2.58s/it]

{'qwen/qwq-32b': 3272395, 'google/gemini-2.0-flash-exp:free': 289855, 'google/gemini-2.0-flash-thinking-exp:free': 268822, 'deepseek/deepseek-chat': 339689, 'deepseek/deepseek-r1-distill-llama-70b': 1054313, 'qwen/qwq-32b-preview': 3774662, 'anthropic/claude-3.7-sonnet:thinking': 4993782, 'google/gemini-2.0-pro-exp-02-05:free': 315840, 'deepseek/deepseek-r1': 1642147, 'anthropic/claude-3.5-haiku-20241022': 278554, 'deepseek/deepseek-r1-distill-qwen-32b': 1429143, 'openai/o1': 240612, 'openai/gpt-4.5-preview': 287003, 'mistralai/mistral-large-2411': 292152, 'openai/o3-mini-high': 232967, 'amazon/nova-lite-v1': 222522, 'sammcj/qwen2.5-dracarys2-72b:Q4_K_M': 261955, 'meta-llama/llama-3.1-405b-instruct': 317736, 'openai/o3-mini': 223555, 'openai/gpt-4o-2024-11-20': 322091, 'openai/gpt-4o-mini': 268069, 'google/gemma-2-27b-it': 215656, 'mistralai/mistral-small-24b-instruct-2501': 349875, 'openai/gpt-4-turbo': 300866, 'mistralai/mistral-small': 295047, 'openai/o1-mini': 292354, 'meta-llama/l




In [15]:
from pprint import pprint

In [16]:
pprint(result)

{'amazon/nova-lite-v1': 222522,
 'amazon/nova-pro-v1': 198872,
 'anthropic/claude-3-opus': 287743,
 'anthropic/claude-3.5-haiku-20241022': 278554,
 'anthropic/claude-3.5-sonnet': 223678,
 'anthropic/claude-3.7-sonnet': 304098,
 'anthropic/claude-3.7-sonnet:thinking': 4993782,
 'deepseek/deepseek-chat': 339689,
 'deepseek/deepseek-r1': 1642147,
 'deepseek/deepseek-r1-distill-llama-70b': 1054313,
 'deepseek/deepseek-r1-distill-qwen-32b': 1429143,
 'google/gemini-2.0-flash-exp:free': 289855,
 'google/gemini-2.0-flash-lite-001': 294225,
 'google/gemini-2.0-flash-thinking-exp:free': 268822,
 'google/gemini-2.0-pro-exp-02-05:free': 315840,
 'google/gemini-pro-1.5': 207097,
 'google/gemma-2-27b-it': 215656,
 'meta-llama/llama-3.1-405b-instruct': 317736,
 'meta-llama/llama-3.1-70b-instruct': 331842,
 'meta-llama/llama-3.3-70b-instruct': 131481,
 'microsoft/phi-4': 396819,
 'mistralai/mistral-large-2411': 292152,
 'mistralai/mistral-small': 295047,
 'mistralai/mistral-small-24b-instruct-2501': 

# CO2 emission of critic LLMs

In [17]:
from tqdm import tqdm
import numpy as np

def count_tokens_by_critic_model(df):
    # 创建一个字典来存储结果
    token_counts = {}
    
    # 获取所有唯一的critic_model
    unique_critic_models = df['critic_model'].unique()
    
    # 使用tqdm显示critic模型处理进度
    for critic_model in tqdm(unique_critic_models, desc="处理critic模型"):
        # 筛选该critic模型的所有行
        critic_rows = df[df['critic_model'] == critic_model]
        
        # 计算该critic模型使用的总token数
        total_tokens = 0
        
        # 遍历该critic模型的每一行
        for _, row in tqdm(critic_rows.iterrows(), desc=f"处理{critic_model}的critique", leave=False, total=len(critic_rows)):
            try:
                # 处理raw_critique
                if not pd.isna(row['raw_critique']):
                    total_tokens += count_tokens_from_local(row['raw_critique'])
                
                # 处理critique_reasoning (可能为NaN)
                if not pd.isna(row['critique_reasoning']):
                    total_tokens += count_tokens_from_local(row['critique_reasoning'])
            except Exception as e:
                print(f'Error processing row: {e}')
                print(f'raw_critique: {row["raw_critique"][:100]}...')
                print(f'critique_reasoning: {row["critique_reasoning"][:100] if not pd.isna(row["critique_reasoning"]) else "NaN"}...')
        
        # 存储结果
        token_counts[critic_model] = total_tokens
    
    return token_counts

# 使用函数计算各个critic_model的token总数
critic_token_results = count_tokens_by_critic_model(df)
print(critic_token_results)

处理critic模型:   0%|          | 0/10 [00:00<?, ?it/s]

处理critic模型: 100%|██████████| 10/10 [06:24<00:00, 38.46s/it]

{'openai/o3-mini-high': 2799760, 'anthropic/claude-3.7-sonnet:thinking': 25938526, 'openai/gpt-4.5-preview': 3611186, 'qwen/qwen-max': 3472270, 'google/gemini-2.0-flash-thinking-exp:free': 3220243, 'anthropic/claude-3.5-sonnet': 4158124, 'google/gemini-2.0-pro-exp-02-05:free': 3576721, 'deepseek/deepseek-chat': 3925279, 'deepseek/deepseek-r1': 14263413, 'qwen/qwq-32b': 19476097}





In [18]:
pprint(critic_token_results)

{'anthropic/claude-3.5-sonnet': 4158124,
 'anthropic/claude-3.7-sonnet:thinking': 25938526,
 'deepseek/deepseek-chat': 3925279,
 'deepseek/deepseek-r1': 14263413,
 'google/gemini-2.0-flash-thinking-exp:free': 3220243,
 'google/gemini-2.0-pro-exp-02-05:free': 3576721,
 'openai/gpt-4.5-preview': 3611186,
 'openai/o3-mini-high': 2799760,
 'qwen/qwen-max': 3472270,
 'qwen/qwq-32b': 19476097}


In [19]:


# i# 假设我们添加一个是否开源的信息字典
open_source_models = {
    'gemini-pro-1.5': False,
    'qwq-32b-preview': True,
    'qwq-32b': True,
    'o1-preview': False,
    'claude-3.5-sonnet': False,
    'claude-3.7-sonnet': False,
    'gemini-2.0-flash-exp': False,
    'qwen2.5-dracarys2-72b': True,
    'nova-pro-v1': False,
    'gpt-4o-2024-11-20': False,
    'mistral-large-2411': True,
    'llama-3.1-nemotron-70b-instruct': True,
    'qwen-2.5-coder-32b-instruct': True,
    'llama-3.1-405b-instruct': True,
    'o1-mini': False,
    'qwen-2.5-72b-instruct': True,
    'claude-3.5-haiku': False,
    'step-2-16k': False,
    'grok-2-1212': False,
    'gpt-4o-mini': False,
    'deepseek-chat': True,

    'deepseek-r1': True,
    'deepseek-r1-distill-llama-70b': True,

    'minimax-01': True,
    'mistral-nemo': True,
    'phi-4': True,
    'claude-3-opus': False,

    'llama-3.3-70b-instruct': True,

    'QwQ-32B-Preview-IdeaWhiz-v1': True,
    'gemini-2.0-flash-001': False,
    
    'mistral-small':True,
    
    'gemini-2.0-flash-thinking-exp':False,
    
    'deepseek-r1-distill-qwen-32b':True,
    'gemini-2.0-flash-lite-001':False,
    'codestral-2501':True,
    
    'qwen-max':False,
    'qwen-plus':False,
    'o3-mini':False,
    
    'o3-mini-high':False,
    "gemma-2-27b-it":True,
    "mistral-small-24b-instruct-2501":True,

    # small models
    'mistral-small': True,
    'qwen-2.5-7b-instruct':True,
    'phi-3-mini-128k-instruct':True,
    'llama-3.1-8b-instruct':True,
    'nova-micro-v1':False,
    'gemma-2-9b-it':True,
    'command-r-08-2024':True,
    'gemini-2.0-pro-exp-02-05':False,
    'o1':False,
    'llama-3.1-70b-instruct':True,
    'step-2-16k-202411':False,
    'gpt-4.5-preview':False,
    'claude-3.5-haiku-20241022':False,
    'gpt-4-turbo':False,
    'nova-lite-v1':False,
    'google/gemini-2.0-flash-exp:free':False,
    'anthropic/claude-3.7-sonnet':False,
    'openai/o1':False,
    'openai/o3-mini':False,
    'openai/o1-mini':False,
    'openai/gpt-4o-2024-11-20':False,
    'deepseek/deepseek-r1-distill-llama-70b':True,
    'google/gemini-pro-1.5':False,
    'x-ai/grok-2-1212':False,
    'google/gemini-2.0-flash-lite-001':False,
    'sammcj/qwen2.5-dracarys2-72b:Q4_K_M':True,
    'meta-llama/llama-3.1-405b-instruct':True,
    'qwen/qwen-2.5-72b-instruct':True,
    'openai/gpt-4-turbo':False,
    'meta-llama/llama-3.3-70b-instruct':True,
    'anthropic/claude-3-opus':False,
    'mistralai/mistral-large-2411':True,
    'qwen/qwen-2.5-coder-32b-instruct':True,
    'deepseek/deepseek-r1-distill-qwen-32b':True,
    'meta-llama/llama-3.1-70b-instruct':True,
    'amazon/nova-pro-v1':False,
    'anthropic/claude-3.5-haiku-20241022':False,
    'mistralai/mistral-small-24b-instruct-2501':True,
    'microsoft/phi-4':True,
    'openai/gpt-4o-mini':False,
    'qwen/qwq-32b-preview':True,
    'amazon/nova-lite-v1':False,
    'qwen/qwen-2.5-7b-instruct':True,
    'mistralai/mistral-small':True,
    'google/gemma-2-27b-it':True,
    'anthropic/claude-3.7-sonnet:thinking':False,
    'openai/o3-mini-high':False,
    'openai/gpt-4.5-preview':False,
    'qwen/qwq-32b':True,
    'deepseek/deepseek-r1':True,
    'google/gemini-2.0-flash-thinking-exp:free':False,
    'google/gemini-2.0-pro-exp-02-05:free':False,
    'qwen/qwen-max':False,
    'deepseek/deepseek-chat':True,
    'anthropic/claude-3.5-sonnet':False,
    'claude-3.7-sonnet:thinking':False,
    
}

In [20]:
from tqdm import tqdm

def clean_model_name(name):
    # Handle paths by removing the part before "/"
    if '/' in name:
        name = name.split('/', 1)[1]  # Split only once, take the second part
    
    # Handle specific suffixes
    if ':' in name:
        base_name, suffix = name.split(':', 1)
        # Only remove if the suffix is of a specific type
        if suffix in ['Q4_K_M', 'free']:
            return base_name
        # Otherwise keep the full name, including colon and suffix
        return name
    
    return name

def calculate_combined_emissions_table(idea_token_counts, critic_token_counts, model_params_dict, open_source_models):
    # Create dictionaries to store results
    idea_emissions = {}
    critic_emissions = {}
    all_models = set(list(idea_token_counts.keys()) + list(critic_token_counts.keys()))
    
    # Calculate energy and emissions for idea models
    for model_name, token_count in tqdm(idea_token_counts.items(), desc="Calculating Idea Model Environmental Impact"):
        if model_name in model_params_dict:
            # Get model parameters
            active = model_params_dict[model_name]['activated_params']
            total = model_params_dict[model_name]['total_params']
            
            # Calculate energy and emissions per 100 output tokens
            res = ecologits_calculator_per100_output_token(active, total)
            energy_Wh_per100 = res[0]
            GHG_emissions_per100 = res[1]
            
            # Calculate total energy and emissions (convert Wh to kWh and gCO2eq to kgCO2eq)
            total_energy_kWh = ((token_count / 100) * energy_Wh_per100) / 1000
            total_GHG_emissions_kg = ((token_count / 100) * GHG_emissions_per100) / 1000
            
            # Clean the model name
            clean_name = clean_model_name(model_name)
            
            # Save results
            idea_emissions[model_name] = {
                'clean_name': clean_name,
                'token_count': token_count,
                'energy_kWh': total_energy_kWh,
                'GHG_emissions_kg': total_GHG_emissions_kg
            }
        else:
            print(f"% Warning: Parameters for model '{model_name}' not found")
    
    # Calculate energy and emissions for critic models
    for model_name, token_count in tqdm(critic_token_counts.items(), desc="Calculating Critic Model Environmental Impact"):
        if model_name in model_params_dict:
            # Get model parameters
            active = model_params_dict[model_name]['activated_params']
            total = model_params_dict[model_name]['total_params']
            
            # Calculate energy and emissions per 100 output tokens
            res = ecologits_calculator_per100_output_token(active, total)
            energy_Wh_per100 = res[0]
            GHG_emissions_per100 = res[1]
            
            # Calculate total energy and emissions (convert Wh to kWh and gCO2eq to kgCO2eq)
            total_energy_kWh = ((token_count / 100) * energy_Wh_per100) / 1000
            total_GHG_emissions_kg = ((token_count / 100) * GHG_emissions_per100) / 1000
            
            # Clean the model name
            clean_name = clean_model_name(model_name)
            
            # Save results
            critic_emissions[model_name] = {
                'clean_name': clean_name,
                'token_count': token_count,
                'energy_kWh': total_energy_kWh,
                'GHG_emissions_kg': total_GHG_emissions_kg
            }
        else:
            print(f"% Warning: Parameters for model '{model_name}' not found")
    
    # Start LaTeX table
    print("\\begin{table}[htbp]")
    print("\\centering")
    print("\\caption{Energy Consumption and Carbon Emissions by Model: As Idea Generator and Critic}")
    # Add resizebox command to scale the table to 0.8\textwidth
    print("\\resizebox{0.8\\textwidth}{!}{%")
    # Make sure to include makecell package in the preamble: \usepackage{makecell}
    # Make sure to include graphicx package in the preamble: \usepackage{graphicx}
    print("\\begin{tabular}{lrrrrrr}")
    print("\\toprule")
    print("\\multirow{2}{*}{Model Name} & \\multicolumn{2}{c}{As Idea Model} & \\multicolumn{2}{c}{As Critic Model} & \\multicolumn{2}{c}{Total} \\\\")
    print("\\cmidrule(lr){2-3} \\cmidrule(lr){4-5} \\cmidrule(lr){6-7}")
    print("& \\makecell{Energy\\\\(kWh)} & \\makecell{Emissions\\\\(kgCO$_2$eq.)} & \\makecell{Energy\\\\(kWh)} & \\makecell{Emissions\\\\(kgCO$_2$eq.)} & \\makecell{Energy\\\\(kWh)} & \\makecell{Emissions\\\\(kgCO$_2$eq.)} \\\\")
    print("\\midrule")
    
    # Create a dictionary containing all models for combined results
    combined_results = {}
    
    for model in all_models:
        if model in model_params_dict:
            clean_name = clean_model_name(model) if model in idea_emissions or model in critic_emissions else "Unknown Model"
            
            # Check if the model is open source
            is_open_source = open_source_models.get(model, True)  # Default to True if not in the dictionary
            
            # Get Idea model data (if available)
            idea_energy = idea_emissions[model]['energy_kWh'] if model in idea_emissions else 0
            idea_emissions_val = idea_emissions[model]['GHG_emissions_kg'] if model in idea_emissions else 0
            
            # Get Critic model data (if available)
            critic_energy = critic_emissions[model]['energy_kWh'] if model in critic_emissions else 0
            critic_emissions_val = critic_emissions[model]['GHG_emissions_kg'] if model in critic_emissions else 0
            
            # Calculate totals
            total_energy = idea_energy + critic_energy
            total_emissions_val = idea_emissions_val + critic_emissions_val
            
            # Store results for sorting
            combined_results[model] = {
                'clean_name': clean_name,
                'is_open_source': is_open_source,
                'idea_energy': idea_energy,
                'idea_emissions': idea_emissions_val,
                'critic_energy': critic_energy,
                'critic_emissions': critic_emissions_val,
                'total_energy': total_energy,
                'total_emissions': total_emissions_val
            }
    
    # Create a list of tuples with (clean_name, original_name) for sorting
    model_name_pairs = [(data['clean_name'], original_name) for original_name, data in combined_results.items()]
    
    # Sort by cleaned name and print results
    for clean_name, original_name in sorted(model_name_pairs, key=lambda x: x[0].lower()):
        data = combined_results[original_name]
        
        # Format values: replace 0.00 with "-"
        idea_energy_str = f"{data['idea_energy']:.2f}" if data['idea_energy'] > 0 else "-"
        idea_emissions_str = f"{data['idea_emissions']:.2f}" if data['idea_emissions'] > 0 else "-"
        critic_energy_str = f"{data['critic_energy']:.2f}" if data['critic_energy'] > 0 else "-"
        critic_emissions_str = f"{data['critic_emissions']:.2f}" if data['critic_emissions'] > 0 else "-"
        total_energy_str = f"{data['total_energy']:.2f}" if data['total_energy'] > 0 else "-"
        total_emissions_str = f"{data['total_emissions']:.2f}" if data['total_emissions'] > 0 else "-"
        
        # Add asterisk to non-open source models
        display_name = clean_name
        if not data['is_open_source']:
            display_name = f"{clean_name}$^*$"
        
        # Print results in LaTeX format
        print(f"\\texttt{{{display_name}}} & {idea_energy_str} & {idea_emissions_str} & {critic_energy_str} & {critic_emissions_str} & {total_energy_str} & {total_emissions_str} \\\\")
    
    # Calculate total energy and emissions
    total_idea_energy = sum(data['idea_energy'] for data in combined_results.values())
    total_idea_emissions = sum(data['idea_emissions'] for data in combined_results.values())
    total_critic_energy = sum(data['critic_energy'] for data in combined_results.values())
    total_critic_emissions = sum(data['critic_emissions'] for data in combined_results.values())
    grand_total_energy = total_idea_energy + total_critic_energy
    grand_total_emissions = total_idea_emissions + total_critic_emissions
    
    # Format total values
    total_idea_energy_str = f"{total_idea_energy:.2f}" if total_idea_energy > 0 else "-"
    total_idea_emissions_str = f"{total_idea_emissions:.2f}" if total_idea_emissions > 0 else "-"
    total_critic_energy_str = f"{total_critic_energy:.2f}" if total_critic_energy > 0 else "-"
    total_critic_emissions_str = f"{total_critic_emissions:.2f}" if total_critic_emissions > 0 else "-"
    grand_total_energy_str = f"{grand_total_energy:.2f}" if grand_total_energy > 0 else "-"
    grand_total_emissions_str = f"{grand_total_emissions:.2f}" if grand_total_emissions > 0 else "-"
    
    # Print totals
    print("\\midrule")
    print(f"Total & {total_idea_energy_str} & {total_idea_emissions_str} & {total_critic_energy_str} & {total_critic_emissions_str} & {grand_total_energy_str} & {grand_total_emissions_str} \\\\")
    print("\\bottomrule")
    print("\\end{tabular}")
    # Close the resizebox command
    print("}")
    print("\\label{tab:combined_emissions}")
    
    # Add note about non-open source models in the table note
    print("\\caption*{\\footnotesize{$^*$Non-open models: environmental impact values are rough estimates and may not be accurate.}}")
    
    print("\\end{table}")
    
    return combined_results


combined_results = calculate_combined_emissions_table(
    idea_token_counts=result, 
    critic_token_counts=critic_token_results, 
    model_params_dict=model_params_dict,
    open_source_models=open_source_models
)

Calculating Idea Model Environmental Impact: 100%|██████████| 41/41 [00:00<00:00, 105500.90it/s]
Calculating Critic Model Environmental Impact: 100%|██████████| 10/10 [00:00<00:00, 88674.50it/s]

\begin{table}[htbp]
\centering
\caption{Energy Consumption and Carbon Emissions by Model: As Idea Generator and Critic}
\resizebox{0.8\textwidth}{!}{%
\begin{tabular}{lrrrrrr}
\toprule
\multirow{2}{*}{Model Name} & \multicolumn{2}{c}{As Idea Model} & \multicolumn{2}{c}{As Critic Model} & \multicolumn{2}{c}{Total} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
& \makecell{Energy\\(kWh)} & \makecell{Emissions\\(kgCO$_2$eq.)} & \makecell{Energy\\(kWh)} & \makecell{Emissions\\(kgCO$_2$eq.)} & \makecell{Energy\\(kWh)} & \makecell{Emissions\\(kgCO$_2$eq.)} \\
\midrule
\texttt{claude-3-opus$^*$} & 83.73 & 51.22 & - & - & 83.73 & 51.22 \\
\texttt{claude-3.5-haiku-20241022$^*$} & 1.52 & 0.93 & - & - & 1.52 & 0.93 \\
\texttt{claude-3.5-sonnet$^*$} & 12.17 & 7.43 & 226.20 & 138.05 & 238.37 & 145.48 \\
\texttt{claude-3.7-sonnet$^*$} & 16.54 & 10.10 & - & - & 16.54 & 10.10 \\
\texttt{claude-3.7-sonnet:thinking$^*$} & 271.66 & 165.79 & 1411.06 & 861.16 & 1682.72 & 1026.95 \\
\texttt{dee


