# Who's Introvert?

## Install Dependencies & Import Libs

In [1]:
# Data Processing
import polars as pl
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly

# ML & DL
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import KNNImputer
from sklearn.model_selection import GridSearchCV
import xgboost as xgb

In [2]:
RANDOM_SEED = 2212550

def seed_everything(seed=RANDOM_SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

## Data Screening

In [3]:
train = pl.read_csv("/kaggle/input/playground-series-s5e7/train.csv")
test = pl.read_csv("/kaggle/input/playground-series-s5e7/test.csv")

labels = train["Personality"]
train = train.drop("Personality")

train.head(5)

id,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency
i64,f64,str,f64,f64,str,f64,f64
0,0.0,"""No""",6.0,4.0,"""No""",15.0,5.0
1,1.0,"""No""",7.0,3.0,"""No""",10.0,8.0
2,6.0,"""Yes""",1.0,0.0,,3.0,0.0
3,3.0,"""No""",7.0,3.0,"""No""",11.0,5.0
4,1.0,"""No""",4.0,4.0,"""No""",13.0,


In [4]:
train.shape

(18524, 8)

In [5]:
labels

Personality
str
"""Extrovert"""
"""Extrovert"""
"""Introvert"""
"""Extrovert"""
"""Extrovert"""
…
"""Extrovert"""
"""Extrovert"""
"""Introvert"""
"""Introvert"""


In [6]:
test.head(5)

id,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency
i64,f64,str,f64,f64,str,f64,f64
18524,3.0,"""No""",7.0,4.0,"""No""",6.0,
18525,,"""Yes""",0.0,0.0,"""Yes""",5.0,1.0
18526,3.0,"""No""",5.0,6.0,"""No""",15.0,9.0
18527,3.0,"""No""",4.0,4.0,"""No""",5.0,6.0
18528,9.0,"""Yes""",1.0,2.0,"""Yes""",1.0,1.0


In [7]:
dataset = pl.concat([train, test])
dataset.shape

(24699, 8)

## Fill Missing Values 🔩

In [8]:
dataset.describe()

statistic,id,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency
str,f64,f64,str,f64,f64,str,f64,f64
"""count""",24699.0,23084.0,"""22208""",23122.0,22767.0,"""23118""",23295.0,23027.0
"""null_count""",0.0,1615.0,"""2491""",1577.0,1932.0,"""1581""",1404.0,1672.0
"""mean""",12349.0,3.132559,,5.270824,4.042693,,7.999657,4.993833
"""std""",7130.131485,2.99923,,2.754491,2.058194,,4.215721,2.876184
"""min""",0.0,0.0,"""No""",0.0,0.0,"""No""",0.0,0.0
"""25%""",6175.0,1.0,,3.0,3.0,,5.0,3.0
"""50%""",12349.0,2.0,,5.0,4.0,,8.0,5.0
"""75%""",18524.0,4.0,,8.0,6.0,,12.0,7.0
"""max""",24698.0,11.0,"""Yes""",10.0,7.0,"""Yes""",15.0,10.0


We may need to label encode first.

In [9]:
for col in dataset.columns:
    if str(dataset[col].dtype) == "String":
        print(dataset[col].unique())

shape: (3,)
Series: 'Stage_fear' [str]
[
	"No"
	null
	"Yes"
]
shape: (3,)
Series: 'Drained_after_socializing' [str]
[
	"Yes"
	"No"
	null
]


In [10]:
def encode_category_label(df):
    # encode category features by LabelEncoder
    for col in df.columns:
        if str(df[col].dtype) == "String":
            encoder = LabelEncoder()
            encoded_col= pl.Series(encoder.fit_transform(df[col].to_numpy()))
            df = df.with_columns(encoded_col.alias(col))
    return df

dataset = encode_category_label(dataset)
dataset.head(5)

id,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency
i64,f64,i64,f64,f64,i64,f64,f64
0,0.0,0,6.0,4.0,0,15.0,5.0
1,1.0,0,7.0,3.0,0,10.0,8.0
2,6.0,1,1.0,0.0,2,3.0,0.0
3,3.0,0,7.0,3.0,0,11.0,5.0
4,1.0,0,4.0,4.0,0,13.0,


In [11]:
def fill_NaN_values(df):
    # fill NaN values for numeric features
    imputer = KNNImputer()
    imputed_df = imputer.fit_transform(df.to_numpy())
    df = pl.DataFrame(imputed_df, schema=df.columns)
    return df

dataset = fill_NaN_values(dataset)
dataset

id,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency
f64,f64,f64,f64,f64,f64,f64,f64
0.0,0.0,0.0,6.0,4.0,0.0,15.0,5.0
1.0,1.0,0.0,7.0,3.0,0.0,10.0,8.0
2.0,6.0,1.0,1.0,0.0,2.0,3.0,0.0
3.0,3.0,0.0,7.0,3.0,0.0,11.0,5.0
4.0,1.0,0.0,4.0,4.0,0.0,13.0,5.0
…,…,…,…,…,…,…,…
24694.0,3.0,0.0,5.0,5.0,0.0,9.0,6.0
24695.0,8.0,1.0,2.0,1.0,1.0,0.0,0.0
24696.0,2.0,0.0,4.0,3.0,0.0,9.0,7.0
24697.0,3.0,0.0,4.0,4.0,0.0,11.0,9.0


## Model Training 🤖

In [12]:
X = dataset[:18524].to_numpy()
y = labels.to_numpy()

In [13]:
rf_model = RandomForestClassifier(random_state=RANDOM_SEED)

param_grid = {
    'n_estimators': [50, 100, 200],       
    'max_depth': [None, 10, 20, 30],       
    'min_samples_split': [2, 5, 10],         
    'min_samples_leaf': [1, 2, 4],         
    'bootstrap': [True, False]
}

# 创建GridSearchCV对象（5折交叉验证）
grid_search = GridSearchCV(
    estimator=rf_model,
    param_grid=param_grid,
    cv=4,                
    n_jobs=-1,         
    scoring='f1',
    verbose=5
)

# 执行网格搜索
grid_search.fit(X, y)

Fitting 4 folds for each of 216 candidates, totalling 864 fits


Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil

[CV 2/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   1.3s
[CV 3/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=nan total time=   2.8s
[CV 3/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=nan total time=   5.0s
[CV 3/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=50;, score=nan total time=   1.3s
[CV 3/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100;, score=nan total time=   2.6s
[CV 3/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=200;, score=nan total time=   4.9s
[CV 3/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=10, n_estimators=50;, score=nan total time=   1.3s
[CV 3/4] END bootstrap=True, max_depth=None, min_samples_leaf=1,

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil

[CV 3/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   1.3s
[CV 2/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=nan total time=   2.7s
[CV 1/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=nan total time=   5.1s
[CV 2/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=50;, score=nan total time=   1.3s
[CV 2/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100;, score=nan total time=   2.5s
[CV 2/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=200;, score=nan total time=   4.9s
[CV 2/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=10, n_estimators=50;, score=nan total time=   1.3s
[CV 2/4] END bootstrap=True, max_depth=None, min_samples_leaf=1,

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil

[CV 1/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   1.3s
[CV 1/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=nan total time=   2.7s
[CV 2/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=nan total time=   5.0s
[CV 1/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=50;, score=nan total time=   1.3s
[CV 1/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100;, score=nan total time=   2.5s
[CV 1/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=200;, score=nan total time=   4.9s
[CV 1/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=10, n_estimators=50;, score=nan total time=   1.3s
[CV 1/4] END bootstrap=True, max_depth=None, min_samples_leaf=1,

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil

[CV 4/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50;, score=nan total time=   1.3s
[CV 4/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=nan total time=   2.8s
[CV 4/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=nan total time=   5.1s
[CV 4/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=50;, score=nan total time=   1.3s
[CV 4/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100;, score=nan total time=   2.6s
[CV 4/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=200;, score=nan total time=   5.0s
[CV 4/4] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=10, n_estimators=50;, score=nan total time=   1.3s
[CV 4/4] END bootstrap=True, max_depth=None, min_samples_leaf=1,

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil


[CV 4/4] END bootstrap=True, max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=50;, score=nan total time=   1.3s
[CV 4/4] END bootstrap=True, max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=100;, score=nan total time=   3.0s
[CV 3/4] END bootstrap=True, max_depth=20, min_samples_leaf=1, min_samples_split=10, n_estimators=200;, score=nan total time=   5.4s
[CV 4/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=50;, score=nan total time=   1.3s
[CV 4/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=100;, score=nan total time=   2.5s
[CV 4/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=200;, score=nan total time=   4.9s
[CV 1/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=100;, score=nan total time=   2.4s
[CV 1/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil


[CV 1/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=50;, score=nan total time=   1.3s
[CV 3/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=50;, score=nan total time=   1.3s
[CV 3/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=100;, score=nan total time=   2.5s
[CV 3/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=200;, score=nan total time=   4.9s
[CV 3/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=50;, score=nan total time=   1.3s
[CV 3/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=100;, score=nan total time=   2.4s
[CV 3/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=200;, score=nan total time=   4.9s
[CV 3/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_spl

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil


[CV 2/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=50;, score=nan total time=   1.3s
[CV 2/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=100;, score=nan total time=   2.5s
[CV 2/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=2, n_estimators=200;, score=nan total time=   4.8s
[CV 2/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=50;, score=nan total time=   1.3s
[CV 2/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=100;, score=nan total time=   2.4s
[CV 2/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=5, n_estimators=200;, score=nan total time=   4.8s
[CV 2/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   1.3s
[CV 2/4] END bootstrap=True, max_depth=20, min_samples_leaf=2, min_samples_sp

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil


[CV 1/4] END bootstrap=False, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=100;, score=nan total time=   3.8s
[CV 1/4] END bootstrap=False, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=   6.9s
[CV 1/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=50;, score=nan total time=   1.7s
[CV 4/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=50;, score=nan total time=   1.7s
[CV 4/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=100;, score=nan total time=   3.4s
[CV 4/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=200;, score=nan total time=   6.8s
[CV 4/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=50;, score=nan total time=   1.7s
[CV 4/4] END bootstrap=False, max_depth=None, min_sampl

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil


[CV 4/4] END bootstrap=False, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   2.8s
[CV 4/4] END bootstrap=False, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=100;, score=nan total time=   3.9s
[CV 4/4] END bootstrap=False, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=   7.0s
[CV 2/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=100;, score=nan total time=   3.3s
[CV 2/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=200;, score=nan total time=   6.6s
[CV 2/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=50;, score=nan total time=   1.7s
[CV 2/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=100;, score=nan total time=   3.3s
[CV 2/4] END bootstrap=False, max_depth=None, min_sam

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil


[CV 3/4] END bootstrap=False, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=50;, score=nan total time=   2.1s
[CV 3/4] END bootstrap=False, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=100;, score=nan total time=   4.5s
[CV 3/4] END bootstrap=False, max_depth=None, min_samples_leaf=2, min_samples_split=10, n_estimators=200;, score=nan total time=   6.9s
[CV 3/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=50;, score=nan total time=   1.7s
[CV 3/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=100;, score=nan total time=   3.3s
[CV 3/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=200;, score=nan total time=   6.8s
[CV 3/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=50;, score=nan total time=   1.7s
[CV 3/4] END bootstrap=False, max_depth=None, min_samp

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil


[CV 2/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=50;, score=nan total time=   1.7s
[CV 1/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=100;, score=nan total time=   3.3s
[CV 1/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=2, n_estimators=200;, score=nan total time=   6.5s
[CV 1/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=50;, score=nan total time=   1.7s
[CV 1/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=100;, score=nan total time=   3.3s
[CV 1/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=5, n_estimators=200;, score=nan total time=   6.8s
[CV 1/4] END bootstrap=False, max_depth=None, min_samples_leaf=4, min_samples_split=10, n_estimators=50;, score=nan total time=   1.8s
[CV 3/4] END bootstrap=False, max_depth=None, min_sample

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1146, in f1_score
    return fbeta_score(
           ^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/sklearn/metrics/_classification.py", line 1287, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil

In [14]:
print("Best Params:", grid_search.best_params_)
print("Best F1-Score:", grid_search.best_score_)

Best Params: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Best F1-Score: nan


In [15]:
model = grid_search.best_estimator_
y_pred = model.predict(dataset[18524:].to_numpy())
y_pred

array(['Extrovert', 'Introvert', 'Extrovert', ..., 'Extrovert',
       'Extrovert', 'Introvert'], dtype=object)

## Submission

In [16]:
submission = pl.read_csv('/kaggle/input/playground-series-s5e7/sample_submission.csv')
submission = submission.with_columns(pl.Series(y_pred).alias('Personality'))
submission

id,Personality
i64,str
18524,"""Extrovert"""
18525,"""Introvert"""
18526,"""Extrovert"""
18527,"""Extrovert"""
18528,"""Introvert"""
…,…
24694,"""Extrovert"""
24695,"""Introvert"""
24696,"""Extrovert"""
24697,"""Extrovert"""


In [17]:
submission.write_csv('submission.csv')