# **The model parameters were obtained using Optuna.**
 
1. Add 'response_a_len','response_b_len' etc. they are valueable.Accuracy increased by 0.01.

2. I think simple classification models are not effective in solving this problem, and the following perspectives should be considered:
    (1)Accuracy of content: Check if the answer accurately and correctly answers the question.
    (2)Integrity of content: Evaluate whether the answer comprehensively covers all aspects of the question.
    (3)Standardization of language: Evaluate whether the language expression of the answer is clear, accurate, and appropriate, and whether there are grammar or spelling errors.
    
    So,this code is for reference only. Please consider other more suitable methods.

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.ensemble import VotingClassifier
import warnings
warnings.filterwarnings("ignore")
from sklearn.preprocessing import LabelEncoder

In [None]:
train = pd.read_parquet("/kaggle/input/wsdm-cup-multilingual-chatbot-arena/train.parquet")
test = pd.read_parquet("/kaggle/input/wsdm-cup-multilingual-chatbot-arena/test.parquet")
print("Read dataset completed")


#train['prompt_len']=train['prompt'].apply(len)
#train['response_a_len']=train['response_a'].apply(len)
#train['response_b_len']=train['response_b'].apply(len)
#train['response_a_len/prompt_len']=train['response_a_len']/train['prompt_len']
#train['response_b_len/prompt_len']=train['response_b_len']/train['prompt_len']
#train['response_b_len/response_a_len']=train['response_b_len']/train['response_a_len']

#test['prompt_len']=test['prompt'].apply(len)
#test['response_a_len']=test['response_a'].apply(len)
#test['response_b_len']=test['response_b'].apply(len)
#test['response_a_len/prompt_len']=test['response_a_len']/test['prompt_len']
#test['response_b_len/prompt_len']=test['response_b_len']/test['prompt_len']
#test['response_b_len/response_a_len']=test['response_b_len']/test['response_a_len']

In [None]:
df_model_a = train[train['winner'] == 'model_a']
df_model_a.drop(columns=['response_b'], inplace=True)
df_model_a.rename(columns={'response_a': 'response'}, inplace=True)


df_model_b = train[train['winner'] == 'model_b']
df_model_b.drop(columns=['response_a'], inplace=True)
df_model_b.rename(columns={'response_b': 'response'}, inplace=True)

train = pd.concat([df_model_a, df_model_b], ignore_index=True)

train['prompt_len']=train['prompt'].apply(len)
train['response_len']=train['response'].apply(len)
train['response_len/prompt_len']=train['response_len']/train['prompt_len']

train['winner'] = train['winner'].map({"model_a": 0, "model_b": 1})

#print(train.head())

In [None]:
test_a = test.drop('response_b', axis=1)
test_a.rename(columns={'response_a': 'response'}, inplace=True)
test_a['prompt_len']=test_a['prompt'].apply(len)
test_a['response_len']=test_a['response'].apply(len)
test_a['response_len/prompt_len']=test_a['response_len']/test_a['prompt_len']
#print(test_a.head())

test_b = test.drop('response_a', axis=1)
test_b.rename(columns={'response_b': 'response'}, inplace=True)
test_b['prompt_len']=test_b['prompt'].apply(len)
test_b['response_len']=test_b['response'].apply(len)
test_b['response_len/prompt_len']=test_b['response_len']/test_b['prompt_len']
#print(test_b.head())


In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('prompt_feats', TfidfVectorizer(analyzer = 'char_wb',max_features=1000), 'prompt'),
        ('response', TfidfVectorizer(analyzer = 'char_wb',max_features=1000), 'response'),
        #('response_a_feats', TfidfVectorizer(analyzer = 'char_wb',max_features=3000), 'response_a'),
        #('response_b_feats', TfidfVectorizer(analyzer = 'char_wb',max_features=3000), 'response_b')
    ]
)
print("TfidfVectorizer is woking...")
train_tfidf = preprocessor.fit_transform(train)
test_a_tfidf = preprocessor.transform(test_a)
test_b_tfidf = preprocessor.transform(test_b)


In [None]:
######################################################################
num_features = train_tfidf.shape[1]
new_columns = [f"tfidf{i+1}" for i in range(num_features)]

train_tfidf = pd.DataFrame(train_tfidf.toarray(), columns=new_columns)
train_tfidf = pd.concat([train[['prompt_len','response_len','response_len/prompt_len']],train_tfidf], axis=1)

test_a_tfidf = pd.DataFrame(test_a_tfidf.toarray(), columns=new_columns)
test_a_tfidf = pd.concat([test_a[['prompt_len','response_len','response_len/prompt_len']],test_a_tfidf], axis=1)

test_b_tfidf = pd.DataFrame(test_b_tfidf.toarray(), columns=new_columns)
test_b_tfidf = pd.concat([test_b[['prompt_len','response_len','response_len/prompt_len']],test_b_tfidf], axis=1)

print("Feature engineering is completed")
#print(train_tfidf.columns.value_counts())
#print(test_tfidf.columns.value_counts())
######################################################################


In [None]:
X=train_tfidf
y=train['winner']

In [None]:
lgb_param = {'num_leaves': 72, 'max_depth': 20, 'learning_rate': 0.04011765418908751, 'n_estimators': 244, 'reg_alpha': 2.2150749155666984, 'reg_lambda': 19.795375399670494,
        'random_state': 42, 'verbose': -1}
xgb_param = {'n_estimators': 454, 'learning_rate': 0.06466238370778891, 'max_depth': 2, 'reg_lambda': 27.280518858342674, 'min_data_in_leaf': 34,
        'random_state': 42, 'verbose': -1}
cat_param = {'depth': 7, 'learning_rate': 0.014180537144799797, 'n_estimators': 1980, 'reg_lambda': 0.6903912535122932,
        'random_state': 42, 'verbose': 0}

lgb_model = lgb.LGBMClassifier(**lgb_param)
xgb_model = xgb.XGBClassifier(**xgb_param)
cat_model = CatBoostClassifier(**cat_param)

In [None]:
print("Training model is working...")
models = [('lgb', lgb_model), ('xgb', xgb_model), ('cat', cat_model)]
weights = [1, 1, 1] 
voting_clf = VotingClassifier(estimators=models, voting='soft', weights=weights)
voting_clf.fit(X, y)


# Accuracy of train Dataset
y_pred = voting_clf.predict(X)
accuracy = accuracy_score(y, y_pred)
print(f"Accuracy of train Dataset: {accuracy:.4f}")


In [None]:
print("Prediction is being executed...")
y_pred_a = voting_clf.predict(test_a_tfidf)
y_pred_b = voting_clf.predict(test_b_tfidf)
#y_pred_labels = ['model_a' if label == 0 else 'model_b' for label in y_pred]
y_pred_labels = np.where(y_pred_a > y_pred_b, 'model_a', 'model_b')

In [None]:
submission= pd.DataFrame({'id': test['id'], 'winner': y_pred_labels})
submission.to_csv('submission.csv', index=False)