In [1]:
import pandas as pd
import numpy as np
from m6_feats_comb import *
from m7_utils import *
from m5_models import *
from m3_model_params import lgb_params_2 as lgbm_params
from m3_model_params import xgb_params_2 as xgb_params
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import PowerTransformer, MinMaxScaler, StandardScaler
from m3_model_params import non_important_feats

In [2]:
INPUT_DIR = 'kaggle/input/linking-writing-processes-to-writing-quality'
FEAT_STORE_DIR = 'feat_store_combined'
train_logs = pd.read_csv(f'{INPUT_DIR}/train_logs.csv')
train_scores = pd.read_csv(f'{INPUT_DIR}/train_scores.csv')
test_logs = pd.read_csv(f'{INPUT_DIR}/test_logs.csv')
ss_df = pd.read_csv(f'{INPUT_DIR}/sample_submission.csv')

logs = pd.concat([train_logs, test_logs], axis=0)

In [3]:
train_ids = train_logs.id
test_ids = test_logs.id

logs = pd.concat([train_logs, test_logs], axis=0)
logs = normalise_up_down_times(logs)

train_logs = normalise_up_down_times(train_logs)
test_logs = normalise_up_down_times(test_logs)

preprocessor = Preprocessor(seed=42)
train_feats = preprocessor.make_feats(train_logs)
test_feats = preprocessor.make_feats(test_logs)

essays = getEssays(logs)
sent_feats = compute_sentence_aggregations(essays)
par_feats = compute_paragraph_aggregations(essays)
word_feats = create_word_length_features(essays, 'essay', 'id', 'essay_words')

train_sent = sent_feats[sent_feats['id'].isin(train_ids)]
train_par = par_feats[par_feats['id'].isin(train_ids)]
train_words = word_feats[word_feats['id'].isin(train_ids)]
test_sent = sent_feats[sent_feats['id'].isin(test_ids)]
test_par = par_feats[par_feats['id'].isin(test_ids)]
test_words = word_feats[word_feats['id'].isin(test_ids)]

train_vector = countvectorize_one_one(train_logs)
test_vector = countvectorize_one_one(test_logs)

train_feats = train_feats.merge(train_sent, how='left', on='id')
train_feats = train_feats.merge(train_par, how='left', on='id')
train_feats = train_feats.merge(train_words, how='left', on='id')
train_feats = pd.concat([train_feats, train_vector], axis=1)

test_feats = test_feats.merge(test_sent, on='id', how='left')
test_feats = test_feats.merge(test_par, on='id', how='left')
test_feats = test_feats.merge(test_words, on='id', how='left')
test_feats = pd.concat([test_feats, test_vector], axis=1)

train_feats.to_pickle('feat_store_hybrid/train_super.pkl')
test_feats.to_pickle('feat_store_hybrid/test_feats.pkl')

Engineering time data
Engineering cursor position data
Engineering word count data
Engineering statistical summaries for features


100%|██████████| 33/33 [01:25<00:00,  2.58s/it, column=word_count_change100, method=kurt]         


Engineering activity counts data


100%|██████████| 2471/2471 [00:00<00:00, 14506.76it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)


Engineering event counts data


100%|██████████| 2471/2471 [00:00<00:00, 13732.50it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)
100%|██████████| 2471/2471 [00:00<00:00, 12198.90it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)


Engineering text change counts data


100%|██████████| 2471/2471 [00:00<00:00, 12580.71it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)


Engineering punctuation counts data


100%|██████████| 2471/2471 [00:00<00:00, 12888.03it/s]


Engineering input words data


  feats['word_time_ratio'] = feats['word_count_max'] / feats['up_time_max']
  feats['word_event_ratio'] = feats['word_count_max'] / feats['event_id_max']
  feats['event_time_ratio'] = feats['event_id_max']  / feats['up_time_max']
  feats['idle_time_ratio'] = feats['action_time_gap1_sum'] / feats['up_time_max']


Engineering ratios data
Engineering time data
Engineering cursor position data
Engineering word count data
Engineering statistical summaries for features


100%|██████████| 33/33 [00:01<00:00, 24.08it/s, column=word_count_change100, method=kurt]         


Engineering activity counts data


100%|██████████| 3/3 [00:00<00:00, 50533.78it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)


Engineering event counts data


100%|██████████| 3/3 [00:00<00:00, 40201.00it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)
100%|██████████| 3/3 [00:00<00:00, 36900.04it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)


Engineering text change counts data


100%|██████████| 3/3 [00:00<00:00, 34285.86it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)


Engineering punctuation counts data


100%|██████████| 3/3 [00:00<00:00, 24338.32it/s]
  feats['word_time_ratio'] = feats['word_count_max'] / feats['up_time_max']
  feats['word_event_ratio'] = feats['word_count_max'] / feats['event_id_max']
  feats['event_time_ratio'] = feats['event_id_max']  / feats['up_time_max']
  feats['idle_time_ratio'] = feats['action_time_gap1_sum'] / feats['up_time_max']


Engineering input words data
Engineering ratios data


100%|██████████| 2474/2474 [00:04<00:00, 580.04it/s]
  word_len_feats = diverse_stats(pd.Series(word_lengths), scope)
100%|██████████| 2471/2471 [00:04<00:00, 570.17it/s]
100%|██████████| 3/3 [00:00<00:00, 3350.98it/s]


In [11]:
train_feats = pd.read_pickle('feat_store_hybrid/train_super.pkl')
test_feats = pd.read_pickle('feat_store_hybrid/test_feats.pkl')
train_feats.fillna(-10e10, inplace=True)

train_feats.isna().sum()[train_feats.isna().sum() > 0]

Series([], dtype: int64)

In [15]:
train_feats.isna().sum()[train_feats.isna().sum() > 0]

Series([], dtype: int64)

In [20]:
train_ids = train_logs.id
test_ids = test_logs.id

#0.6197 (super), 0.620938 (pause), 0.620495 (StandardScaler), 0.621384 (MinMax), 0.619713(yeo-johnson), 0.619367(super with yeo-johnson)
# Super 0.608303, 0.610934 6,5
# feats = pd.read_pickle('feat_store_hybrid/super_feats.pkl') 
# feats = preprocess_feats(feats, PowerTransformer('yeo-johnson'))
#train_feats = train_feats.merge(train_scores, on='id', how='left')

# train_feats = feats[feats['id'].isin(train_ids)]
# test_feats = feats[feats['id'].isin(test_ids)]

train_feats = pd.read_pickle('feat_store_hybrid/train_super.pkl')
test_feats = pd.read_pickle('feat_store_hybrid/test_feats.pkl')

train_feats = train_feats.drop(columns=non_important_feats)
test_feats = test_feats.drop(columns=test_feats.columns.intersection(non_important_feats))

# train_feats = preprocess_feats(train_feats, PowerTransformer('yeo-johnson'))
train_feats = train_feats.merge(train_scores, on='id', how='left')

train_feats.replace([np.inf, -np.inf], np.nan, inplace=True)
test_feats.replace([np.inf, -np.inf], np.nan, inplace=True)

train_feats.fillna(-10e6, inplace=True)
test_feats.fillna(-10e6, inplace=True)

# After merging and preprocessing
print("Check for NaNs after merging:")
print(train_feats.isna().sum().sort_values(ascending=False).head(10))
print(test_feats.isna().sum().sort_values(ascending=False).head(10))

non_numeric_cols = train_feats.select_dtypes(exclude=[np.number]).columns
print("Non-numeric columns:", non_numeric_cols)

# Now check for infinities on numeric columns
print("Check for Infinities on Numeric Columns:")
print(np.isinf(train_feats.select_dtypes(include=[np.number])).sum().sort_values(ascending=False).head(10))

# Data types
print("Data types:")
print(train_feats.dtypes.value_counts())
print(test_feats.dtypes.value_counts())

target_col = ['score']
drop_cols = ['id']
train_cols = [col for col in train_feats.columns if col not in target_col + drop_cols]
# Ensure correct columns are used
print("Columns used for training:")
print(train_cols)

n_repeats=5
n_splits=6

ridge_params = {'alpha': 1.0}  # Example parameter for Ridge
_,_,_,_ = ridge_cv_pipeline(train_feats, test_feats, ridge_params, seed=42, n_repeats=n_repeats, n_splits=n_splits)

Check for NaNs after merging:
id                     0
down_event_10_count    0
up_event_4_count       0
up_event_3_count       0
up_event_1_count       0
up_event_0_count       0
down_event_14_count    0
down_event_13_count    0
down_event_12_count    0
down_event_11_count    0
dtype: int64
id                     0
down_event_10_count    0
down_event_8_count     0
down_event_7_count     0
down_event_6_count     0
down_event_5_count     0
down_event_4_count     0
down_event_3_count     0
down_event_2_count     0
down_event_1_count     0
dtype: int64
Non-numeric columns: Index(['id'], dtype='object')
Check for Infinities on Numeric Columns:
event_id_max           0
down_event_10_count    0
up_event_4_count       0
up_event_3_count       0
up_event_1_count       0
up_event_0_count       0
down_event_14_count    0
down_event_13_count    0
down_event_12_count    0
down_event_11_count    0
dtype: int64
Data types:
float64    296
int64       51
object       1
dtype: int64
float64    306
int6

ValueError: Input X contains NaN.
Ridge does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [3]:
train_ids = train_logs.id
test_ids = test_logs.id

#0.6197 (super), 0.620938 (pause), 0.620495 (StandardScaler), 0.621384 (MinMax), 0.619713(yeo-johnson), 0.619367(super with yeo-johnson)
# Super 0.608303, 0.610934 6,5
# feats = pd.read_pickle('feat_store_hybrid/super_feats.pkl') 
# feats = preprocess_feats(feats, PowerTransformer('yeo-johnson'))
#train_feats = train_feats.merge(train_scores, on='id', how='left')

# train_feats = feats[feats['id'].isin(train_ids)]
# test_feats = feats[feats['id'].isin(test_ids)]

train_feats = pd.read_pickle('feat_store_hybrid/train_super.pkl')
test_feats = pd.read_pickle('feat_store_hybrid/test_feats.pkl')

train_feats = train_feats.drop(columns=non_important_feats)
test_feats = test_feats.drop(columns=test_feats.columns.intersection(non_important_feats))

# train_feats = preprocess_feats(train_feats, PowerTransformer('yeo-johnson'))
train_feats = train_feats.merge(train_scores, on='id', how='left')

n_repeats=5
n_splits=6

_, oof_1, rmse, model1 = xgb_cv_pipeline(train_feats=train_feats, 
                                        test_feats=test_feats, 
                                        xgb_params=xgb_params, 
                                        seed=42, 
                                        n_repeats=n_repeats, 
                                        n_splits=n_splits)
                                        
_, oof_2, rmse, model1 = cv_pipeline(train_feats, 
                                     test_feats, 
                                     lgbm_params, 
                                     lgbm_params['boosting_type'],
                                     seed = 42,
                                     n_repeats= n_repeats,
                                     n_splits = n_splits)

blend = pd.concat([oof_1, oof_2], axis=0)
blend_scores = blend.groupby(['id','score'])['prediction'].mean().reset_index()
blend_rmse = mean_squared_error(blend_scores['score'], blend_scores['prediction'], squared=False)
print(f'Blend RMSE {blend_rmse:.6f}')

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


Iterations: 100%|██████████| 5/5 [00:54<00:00, 10.87s/it]


XGB Average RMSE over 30 folds: 0.605156
LGBM Average RMSE over 30 folds: 0.610728
Blend RMSE 0.605737


In [33]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.inspection import permutation_importance

def run_lgb_model_(model, X_train, y_train, X_valid, y_valid, X_test, boosting_type):
    if boosting_type != 'dart':
        model.fit(X_train, y_train, 
                  eval_set=[(X_valid, y_valid)], 
                  callbacks=[lgb.early_stopping(250, first_metric_only=True, verbose=False)])
    else:
        model.fit(X_train, y_train)  # No early stopping for DART

    valid_predictions = model.predict(X_valid, num_iteration=model.best_iteration_)
    test_predictions = model.predict(X_test, num_iteration=model.best_iteration_)
    return valid_predictions, test_predictions

def run_lgb_cv_(train_feats, test_feats, train_cols, target_col, lgb_params, boosting_type, seed, n_repeats, n_splits):
    oof_results = pd.DataFrame(columns=['id', 'score', 'prediction'])

    X = train_feats[train_cols]
    y = train_feats[target_col].values
    X_test = test_feats[train_cols]

    models = []
    for i in range(n_repeats):
        skf = KFold(n_splits=n_splits, shuffle=True, random_state=seed + i)

        for train_idx, valid_idx in skf.split(X, y):
            X_train, y_train = X.iloc[train_idx], y[train_idx]
            X_valid, y_valid = X.iloc[valid_idx], y[valid_idx]

            model_lgb = lgb.LGBMRegressor(**lgb_params, verbose=-1, random_state=seed)
            valid_preds_lgb, test_preds_lgb = run_lgb_model_(model = model_lgb,
                                               X_train=X_train, y_train=y_train, 
                                               X_valid=X_valid, y_valid=y_valid, 
                                               X_test=X_test, boosting_type=boosting_type)
        
            tmp_df = train_feats.loc[valid_idx][['id','score']]
            tmp_df['prediction'] = valid_preds_lgb
            oof_results = pd.concat([oof_results, tmp_df])
            models.append(model_lgb)

    avg_preds = oof_results.groupby(['id', 'score'])['prediction'].mean().reset_index()
    rmse = mean_squared_error(avg_preds['score'], avg_preds['prediction'], squared=False)
    print(f"LGBM Average RMSE over {n_repeats * n_splits} folds: {rmse:.6f}")
    
    return models, oof_results, rmse

def cv_pipeline_(train_feats, test_feats, lgb_params, boosting_type, seed=42, n_repeats=5, n_splits=10):
    target_col = 'score'
    drop_cols = ['id']
    train_cols = [col for col in train_feats.columns if col not in [target_col] + drop_cols]

    missing_cols = [col for col in train_cols if col not in test_feats.columns]
    missing_cols_df = pd.DataFrame({col: np.nan for col in missing_cols}, index=test_feats.index)
    test_feats = pd.concat([test_feats, missing_cols_df], axis=1)

    train_feats.replace([np.inf, -np.inf], np.nan, inplace=True)
    test_feats.replace([np.inf, -np.inf], np.nan, inplace=True)

    models, oof_preds, rmse = run_lgb_cv_(train_feats=train_feats, test_feats=test_feats, 
                                         train_cols=train_cols, target_col=target_col, 
                                         lgb_params=lgb_params, boosting_type=boosting_type,
                                         seed=seed, n_repeats=n_repeats, n_splits=n_splits)

    # Calculate permutation feature importance
    importance_scores = np.zeros((len(models), len(train_cols)))
    for i, model in enumerate(models):
        result = permutation_importance(model, train_feats[train_cols], train_feats[target_col], n_repeats=30, random_state=seed)
        importance_scores[i, :] = result.importances_mean

    importance_scores_mean = np.mean(importance_scores, axis=0)
    feature_importance = pd.DataFrame({'feature': train_cols, 'importance': importance_scores_mean})
    feature_importance.sort_values(by='importance', ascending=False, inplace=True)

    print("Number of models:", len(models))
    print("Train columns:", train_cols)

    # Calculate permutation feature importance
    importance_scores = np.zeros((len(models), len(train_cols)))
    for i, model in enumerate(models):
        print(f"Calculating importance for model {i+1}/{len(models)}")
        result = permutation_importance(model, train_feats[train_cols], train_feats[target_col], n_repeats=10, random_state=seed)
        importance_scores[i, :] = result.importances_mean

    if np.all(importance_scores == 0):
        print("Warning: All importance scores are zero. Check your data and model.")

    importance_scores_mean = np.mean(importance_scores, axis=0)
    feature_importance = pd.DataFrame({'feature': train_cols, 'importance': importance_scores_mean})
    feature_importance.sort_values(by='importance', ascending=False, inplace=True)

    return models, oof_preds, rmse, feature_importance

_,_,_,feature_importance = cv_pipeline_(train_feats, test_feats, lgbm_params, 'gbdt')

LGBM Average RMSE over 50 folds: 0.608614
Number of models: 50
Train columns: ['event_id_max', 'up_time_max', 'action_time_max', 'action_time_min', 'action_time_mean', 'action_time_std', 'action_time_quantile', 'action_time_sem', 'action_time_sum', 'action_time_skew', 'action_time_kurt', 'activity_nunique', 'down_event_nunique', 'up_event_nunique', 'text_change_nunique', 'cursor_position_nunique', 'cursor_position_max', 'cursor_position_quantile', 'cursor_position_sem', 'cursor_position_mean', 'word_count_nunique', 'word_count_max', 'word_count_quantile', 'word_count_sem', 'word_count_mean', 'action_time_gap1_max', 'action_time_gap1_min', 'action_time_gap1_mean', 'action_time_gap1_std', 'action_time_gap1_quantile', 'action_time_gap1_sem', 'action_time_gap1_sum', 'action_time_gap1_skew', 'action_time_gap1_kurt', 'cursor_position_change1_max', 'cursor_position_change1_mean', 'cursor_position_change1_std', 'cursor_position_change1_quantile', 'cursor_position_change1_sem', 'cursor_position

In [40]:
non_important_feats = feature_importance[feature_importance['importance']==0]['feature']

In [43]:
non_important_feats

373                              tok_21
372                              tok_20
366                              tok_14
3                       action_time_min
367                              tok_15
375                              tok_23
368                              tok_16
374                              tok_22
369                              tok_17
376                              tok_24
377                              tok_25
370                              tok_18
371                              tok_19
258                   up_event_12_count
37     cursor_position_change1_quantile
95          word_count_change3_quantile
261                   up_event_15_count
256                   up_event_10_count
248                    up_event_2_count
245                 down_event_15_count
120         word_count_change5_quantile
313                      low_sent_count
112    cursor_position_change5_quantile
92               word_count_change3_max
351                     essay_words_min


In [41]:
train_feats = train_feats.drop(columns=non_important_feats)

In [42]:
train_feats

Unnamed: 0,id,event_id_max,up_time_max,action_time_max,action_time_mean,action_time_std,action_time_quantile,action_time_sem,action_time_sum,action_time_skew,...,tok_5,tok_6,tok_7,tok_8,tok_9,tok_10,tok_11,tok_12,tok_13,score
0,001519c8,-0.412654,0.297256,0.649055,0.708224,0.832047,0.602728,1.013468,-0.076341,0.510733,...,-0.511288,-0.177651,0.660481,0.136887,-0.418590,-1.324192,-1.207260,-0.637806,-0.380843,3.5
1,0022f953,-0.503278,0.139794,0.417773,0.550464,-0.113848,0.718889,0.094719,-0.232795,0.381025,...,-1.269252,-1.538362,-2.665536,-0.739307,-0.664690,-1.324192,0.891026,1.498801,-0.380843,3.5
2,0042269b,0.653824,0.175623,0.891881,0.131551,0.656133,-0.099564,0.378357,0.682566,0.779134,...,0.762865,1.424855,1.668483,-0.252833,1.443518,-0.184352,0.891026,-0.637806,-0.380843,6.0
3,0059420b,-1.501620,-1.492521,-0.423774,0.923781,1.143871,0.525154,1.627881,-0.950668,-0.780595,...,-0.167792,-2.473972,-2.207593,-1.041033,-1.365132,-0.184352,1.630053,-0.637806,-0.380843,2.0
4,0075873a,-0.435189,-0.570642,-0.596296,1.003290,0.128298,1.257973,0.301895,0.036145,-0.927886,...,-0.913036,-0.919497,-1.561731,-0.252833,0.416839,0.426843,-1.207260,-0.637806,-0.380843,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466,ffb8c745,0.957832,0.183501,0.972627,0.278844,0.178234,0.641475,-0.336100,1.082875,1.308845,...,0.061690,-0.177651,-0.247576,-1.041033,-0.418590,1.584498,-1.207260,1.498801,-0.380843,3.5
2467,ffbef7e5,-0.372480,0.216784,-0.021411,-0.713232,-1.214167,-0.652955,-0.976094,-0.724162,0.320733,...,0.338687,0.833085,-0.710961,-1.041033,-1.978150,0.426843,-0.038395,-0.637806,-0.380843,4.0
2468,ffccd6fd,-0.013636,0.849759,-0.879874,-1.020584,0.174176,-1.052774,0.148528,-0.575474,-0.500749,...,-1.402204,-1.105123,-0.391202,-1.407041,-0.209522,-1.324192,-1.207260,1.498801,-0.380843,1.5
2469,ffec5b38,0.112226,-0.965479,0.183245,-0.401310,-0.151052,-0.454518,-0.254160,-0.131240,0.248387,...,0.926199,-0.177651,0.014538,0.750972,1.050112,1.385238,1.500539,-0.637806,2.624682,5.0


In [32]:
feature_importance[~feature_importance['importance'].isna()]

Unnamed: 0,feature,importance


In [3]:
train_ids = train_logs.id
test_ids = test_logs.id

logs = pd.concat([train_logs, test_logs], axis=0)
logs = normalise_up_down_times(logs)

preprocessor = Preprocessor(seed=42)
feats = preprocessor.make_feats(logs)
nan_cols = feats.columns[feats[feats['id'].isin(train_ids)].isna().any()].tolist()
feats = feats.drop(columns=nan_cols)

essays = getEssays(logs)
sent_feats = compute_sentence_aggregations(essays)
par_feats = compute_paragraph_aggregations(essays)
word_feats = create_word_length_features(essays, 'essay', 'id', 'essay_words')
vector_feats = countvectorize_one_one(logs)

feats = feats.merge(sent_feats, how='left', on='id')
feats = feats.merge(par_feats, how='left', on='id')
feats = feats.merge(word_feats, how='left', on='id')
feats = pd.concat([feats, vector_feats], axis=1)

feats.to_pickle('feat_store_hybrid/super_feats.pkl')

Engineering time data
Engineering cursor position data
Engineering word count data
Engineering statistical summaries for features


100%|██████████| 33/33 [01:27<00:00,  2.65s/it, column=word_count_change100, method=kurt]         


Engineering activity counts data


100%|██████████| 2474/2474 [00:00<00:00, 13157.46it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)


Engineering event counts data


100%|██████████| 2474/2474 [00:00<00:00, 12394.92it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)
100%|██████████| 2474/2474 [00:00<00:00, 12763.46it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)


Engineering text change counts data


100%|██████████| 2474/2474 [00:00<00:00, 13661.11it/s]
  result = getattr(ufunc, method)(*inputs, **kwargs)


Engineering punctuation counts data


100%|██████████| 2474/2474 [00:00<00:00, 13841.46it/s]


Engineering input words data
Engineering ratios data


  feats['word_time_ratio'] = feats['word_count_max'] / feats['up_time_max']
  feats['word_event_ratio'] = feats['word_count_max'] / feats['event_id_max']
  feats['event_time_ratio'] = feats['event_id_max']  / feats['up_time_max']
  feats['idle_time_ratio'] = feats['action_time_gap1_sum'] / feats['up_time_max']
100%|██████████| 2474/2474 [00:04<00:00, 599.09it/s]
  word_len_feats = diverse_stats(pd.Series(word_lengths), scope)
100%|██████████| 2474/2474 [00:03<00:00, 618.64it/s]
