In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import cross_val_score

from scipy.stats import zscore

In [None]:
all_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
submission = pd.DataFrame(columns = pd.read_csv('sample_submission.csv').columns)

In [None]:
X = all_data[['x', 'y', 'z']]

y_x = all_data['u_x'].values
y_y = all_data['u_y'].values
y_z = all_data['u_z'].values
y_p = all_data['p'].values

Drop outliers

In [None]:
def filter_data(X, y, thresh = 3):
    mask = (np.abs(zscore(X)) < thresh).any(axis=1)
    return X[mask], y[mask]

### CatBoost Regressor

In [None]:
from catboost import CatBoostRegressor
from ipywidgets import interact  
import ipywidgets as widgets
import optuna

In [None]:
PARAMS = []
for column, y in zip(submission.columns, [y_x, y_y, y_z, y_p]):
    print(column + '\n')
    def objective(trial):
        params = {
            "iterations": 1000,
            "learning_rate": trial.suggest_float("learning_rate", 5e-3, 1e-2, log=True),
            "depth": trial.suggest_int("depth", 3, 8),
            "subsample": trial.suggest_float("subsample", 0.5, 1),
            "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 20),
            "loss_function": "RMSE",
            "task_type": "CPU",
            "thread_count": 16,
            "verbose": 0,
            "bootstrap_type": "MVS"
        }
        model = CatBoostRegressor(**params)
        return -cross_val_score(model, *filter_data(X, y), scoring='neg_mean_absolute_error', cv = 5).mean()

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=20)

    params = study.best_params.copy()
    params['iterations'] = 10000
    params['verbose'] = 0
    PARAMS.append(params)
    model = CatBoostRegressor(**params).fit(*filter_data(X, y))
    submission[column] = model.predict(test_data)

In [None]:
submission.to_csv('submission.csv', index=False)