In [32]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

import torch.nn as nn
import torch
from torch import optim

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/linking-writing-processes-to-writing-quality/sample_submission.csv
/kaggle/input/linking-writing-processes-to-writing-quality/test_logs.csv
/kaggle/input/linking-writing-processes-to-writing-quality/train_scores.csv
/kaggle/input/linking-writing-processes-to-writing-quality/train_logs.csv


# ***Competition Overview:***

**Dataset:** The competition dataset comprises approximately 5000 logs of user inputs. These logs are generated while users are composing essays.
**Task:** The goal of this competition is to predict the score that an essay received based on the log of user inputs. The scores are on a scale of 0 to 6, indicating the quality or effectiveness of the essay.
**File and Field Information:**
The competition provides a CSV file called train_logs.csv, which contains the following fields:

* id: This is a unique identifier for each essay.
* event_id: An index indicating the order of events in the log, ordered chronologically.
* down_time: The time when the down event (e.g., keypress or mouse click) occurred, measured in milliseconds.
* up_time: The time when the up event (e.g., key release or mouse release) occurred, measured in milliseconds.
* action_time: The duration of the event, which is the difference between down_time and up_time.
* activity: The category of activity that the event belongs to. It can have values like "Nonproduction," "Input," "Remove/Cut," "Paste," "Replace," or "Move From [x1, y1] To [x2, y2]".
* down_event: The name of the event when the key or mouse is pressed.
* up_event: The name of the event when the key or mouse is released.
* text_change: The text that changed as a result of the event (if any). This field represents the alteration made to the essay text.
* cursor_position: The character index of the text cursor after the event.
* word_count: The word count of the essay after the event.

**Objective:**
Participants in this competition are tasked with using the provided log data to build a predictive model. This model should take the log events as input and predict the essay's score on the 0 to 6 scale.

In [33]:
df = pd.read_csv('/kaggle/input/linking-writing-processes-to-writing-quality/train_logs.csv')
df_score = pd.read_csv('/kaggle/input/linking-writing-processes-to-writing-quality/train_scores.csv')

ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [None]:
test = pd.read_csv('/kaggle/input/linking-writing-processes-to-writing-quality/test_logs.csv')

#### Activity 
* Nonproduction - The event does not alter the text in any way
* Input - The event adds text to the essay
* Remove/Cut - The event removes text from the essay
* Paste - The event changes the text through a paste input
* Replace - The event replaces a section of text with another string
* Move From [x1, y1] To [x2, y2] - The event moves a section of text spanning character index x1, y1 to a new location x2, y2

In [None]:
def move_from(x):
    if 'Move From' in x:
        return 'Move From'
    else:
        return x 

In [None]:
df.activity  = df['activity'].apply(lambda x : move_from(x))

In [None]:
test.activity  = test['activity'].apply(lambda x : move_from(x))

In [None]:
seq_len = 100
# Count the number of event_ids for each essay_id
counts = df.groupby('id').size().sort_values()
print(counts)
# Split the dataframe based on the condition
groups = []
current_group = []
current_min = counts.iloc[0]
current_max = current_min

for essay, count in counts.items():
    if (current_max - current_min) <= seq_len:
        current_group.append(essay)
        current_max = max(current_max, count)
    else:
        groups.append(current_group)
        current_group = [essay]
        current_min = count
        current_max = count

if current_group:
    groups.append(current_group)

# Split the dataframe into multiple dataframes based on the groups
dfs = [df[df['id'].isin(group)] for group in groups]

print(f"num buckets: {len(dfs)}")
# Print the resulting dataframes
for group_df in dfs:
    print(group_df)
    print("------")

# Creating Essay Dataset

In [74]:
from torch.utils.data import Dataset, DataLoader, random_split

def pad_collate_fn(batch):
    # Extract sequences and targets from the batch
    sequences, targets = zip(*batch)
    
    # Determine the maximum sequence length in this batch
    max_length = max([seq.size(0) for seq in sequences])
    
    # Initialize list to store padded sequences
    padded_sequences = []
    
    for seq in sequences:
        # Calculate the padding length for this sequence
        padding_length = max_length - seq.size(0)
        
        # Create a zero-filled tensor for padding
        padding_tensor = torch.zeros((padding_length, 4))
        
        # Append the padding to the original sequence
        padded_seq = torch.cat([seq, padding_tensor], dim=0)
        
        # Add the padded sequence to the list
        padded_sequences.append(padded_seq)
    
    # Convert lists of tensors into tensors
    return torch.stack(padded_sequences), torch.tensor(targets)

class EssayDataset(Dataset):
    def __init__(self, train_df, score_df):
        self.train_df = train_df
        self.score_df = score_df
        self.unique_ids = train_df['id'].unique()
        self.columns_to_return = ['action_time', 'activity', 'down_event', 'word_count']
        self.down_event_to_index = {event: idx for idx, event in enumerate(unique_down_events)}
        self.unidentified_de_index = self.down_event_to_index.get('Unidentified', len(unique_down_events))
        self.activity_to_index =  {act: idx for idx, act in enumerate(unique_activities)}
        self.unidentified_a_index = self.activity_to_index.get('Unidentified', len(unique_activities))
        self.score_to_index =  {score: idx for idx, score in enumerate(unique_scores)}
        


    def __len__(self):
        return len(self.unique_ids)

    def __getitem__(self, idx):
        essay_id = self.unique_ids[idx]
        essay_data = self.train_df[self.train_df['id'] == essay_id]
        essay_data = essay_data.sort_values(by='event_id')[self.columns_to_return]
        
        # Normalize and replace categories with indices
        essay_data['action_time'] = (essay_data['action_time'] - essay_data['action_time'].mean()) / essay_data['action_time'].std()
        essay_data['word_count'] = (essay_data['word_count'] - essay_data['word_count'].mean()) / essay_data['word_count'].std()
        essay_data['down_event'] = essay_data['down_event'].map(self.down_event_to_index).fillna(self.unidentified_de_index).astype(int)
        essay_data['activity'] = essay_data['activity'].map(self.activity_to_index).fillna(self.unidentified_a_index).astype(int)
        
        essay_score = self.score_to_index[self.score_df[self.score_df['id'] == essay_id]['score'].values[0]]
        essay_tensor = torch.Tensor(essay_data.values)
        return essay_tensor, essay_score

In [75]:


unique_down_events = list(df['down_event'].unique())
unique_activities = list(df['activity'].unique())
unique_scores = list(df_score['score'].unique())
print(unique_scores)
unique_activities.append('Unidentified')
batch_size = 64
train_loaders = []
val_loaders = []
for bucket_df in dfs:
    dataset = EssayDataset(bucket_df, df_score)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=pad_collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=pad_collate_fn)
    train_loaders.append(train_loader)
    val_loaders.append(val_loader)

print(len(train_loaders))
print(len(val_loaders))

[3.5, 6.0, 2.0, 4.0, 4.5, 2.5, 5.0, 3.0, 1.5, 5.5, 1.0, 0.5]
73
73


# Define Input Embeddings


In [76]:
class InputEssayEmbeddings(nn.Module):
    def __init__(self, n_embed):
        super().__init__()

        # Embedding for the categorical columns
        self.embed_activity = nn.Embedding(len(unique_activities), (n_embed-2)//2)
        self.embed_down_event = nn.Embedding(len(unique_down_events), (n_embed-2)//2)
        
        # activities: 6
        # down_event: 131

    def forward(self, input_tensor):
        # Slice columns from the input tensor
        action_time_col = input_tensor[:, :, 0:1]  # [B, num_events, 1]
        activity_col = input_tensor[:, :, 1].long()  # [B, num_events]
        down_event_col = input_tensor[:, :, 2].long()    # [B, num_events]
        word_count_col = input_tensor[:, :, 3:4]  # [B, num_events, 1]

        # Pass through respective embedding layers
        embed_down_event = self.embed_down_event(down_event_col)  # [B, num_events, embed_dim_down_event]
        embed_activity = self.embed_activity(activity_col)        # [B, num_events, embed_dim_activity]
        
        # Concatenate them with the rest of the tensor
        out = torch.cat([action_time_col, embed_activity, embed_down_event, word_count_col], dim=2)
        return out # [B, num_events, n_embed]
    
class PositionalEncoding(nn.Module):
    def __init__(self, n_embed, seq_len):
        super().__init__()
        # Dict size
        self.emb = nn.Embedding(seq_len, n_embed)
        device = "cuda" if torch.cuda.is_available() else "cpu"
        self.device = torch.device(device)

    def forward(self, x, batched=False):
        """
        :param x: If using batching, should be [batch size, seq len, embedding dim]. Otherwise, [seq len, embedding dim]
        :return: a tensor of the same size with positional embeddings added in
        """
        # Second-to-last dimension will always be sequence length
        input_size = x.shape[-2]
        indices_to_embed = torch.tensor(np.asarray(range(0, input_size))).type(torch.LongTensor).to(self.device)
        if batched:
            # Use unsqueeze to form a [1, seq len, embedding dim] tensor -- broadcasting will ensure that this
            # gets added correctly across the batch
            emb_unsq = self.emb(indices_to_embed).unsqueeze(0)
            return x + emb_unsq
        else:
            return x + self.emb(indices_to_embed)

# Define Architecture Sub Components

In [77]:
class FeedFoward(nn.Module):
    def __init__(self, n_embed):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embed, 4 * n_embed),
            nn.ReLU(),
            nn.Linear(4 * n_embed, n_embed),
        )

    def forward(self, x):
        return self.net(x)
    
class Head(nn.Module):
    def __init__(self, seq_length, n_embed, num_heads, n_internal):
        super().__init__()
        self.K = nn.Linear(n_embed, n_internal)
        self.Q = nn.Linear(n_embed, n_internal)
        self.V = nn.Linear(n_embed, n_internal)
        self.w0 = nn.Linear(n_internal, n_embed // num_heads)
        

    def forward(self, input_vecs):
        keys = self.K(input_vecs) # B, L, d_internal
        d_k = keys.shape[-1]
        queries = self.Q(input_vecs) # B, L, d_internal
        value = self.V(input_vecs) # B, L, d_internal
        
        weights = torch.matmul(queries, keys.transpose(-2, -1)) * d_k**-0.5# L, L
        attention = torch.softmax(weights, dim=-1)

        logit = torch.matmul(attention , value) # B, L, d_internal
        logit = self.w0(logit)
        return logit

class MultiHeadAttention(nn.Module):

    def __init__(self, seq_length, n_embed, num_heads, n_internal):
        super().__init__()
        self.heads = nn.ModuleList([Head(seq_length, n_embed, num_heads, n_internal) for _ in range(num_heads)])
        
        
    def forward(self, input_vecs):
        cls_tokens = []
        for head in self.heads:
            head_out = head(input_vecs)  
            cls_tokens.append(head_out[:, 0])
        cls_tokens_cat = torch.cat(cls_tokens, dim=-1)
        #print(cls_tokens[0].shape)
        return cls_tokens_cat.unsqueeze(1) # B, n_embed

class MHAConvolution(nn.Module):
    def __init__(self, seq_length, n_embed, num_heads, n_internal, stride=1):
        super().__init__()
        self.stride = stride
        self.window_size = seq_length
        self.n_heads = num_heads
        self.cls_token = nn.Parameter(torch.randn(1, 1, n_embed)) 
        self.pos_embedding = PositionalEncoding(n_embed, seq_length)
        self.multi_head_attention = MultiHeadAttention( seq_length+1, n_embed, num_heads, n_internal)
        self.ffwd = FeedFoward(n_embed)
        self.ln1 = nn.LayerNorm(n_embed)
        self.ln2 = nn.LayerNorm(n_embed)

    def forward(self, input_vecs, batched=False): # B, long_seq_len, n_embed
        
        outputs = []
        #print(input_vecs.shape)
        for i in range(0, input_vecs.size(1) - self.window_size + 1, self.stride): # TODO: think about ways to downsample other than stride
            # prepend the cls token to the input
            local_window = input_vecs[:, i:i+self.window_size, :]  # [B, seq_length, embed_size]
            B, L, _ = local_window.size()
            cls_tokens_repeated = self.cls_token.repeat(B, 1, 1) # B, 1, n_embed
            local_window = self.pos_embedding(local_window, batched=batched)
            local_window_cls = torch.cat([cls_tokens_repeated, local_window], dim=1)
            attention_out = self.multi_head_attention(self.ln1(local_window_cls)) # B, 1, n_embed, a single cls vector cated from all heads
            attention_out += cls_tokens_repeated # residual for the cls_tokens
            out = attention_out + self.ffwd(self.ln2(attention_out))
            out = out.view(B, -1) # B, n_heads*n_embed
            outputs.append(out) 
        final = torch.stack(outputs, dim=1)
        #print(final.shape)
        return final # B, (long_seq_len-seq_len)/stride, n_embed
    

# Define the Model

In [78]:
class GlobalAveragingTransformer(nn.Module):
    def __init__(self, seq_len, n_embed, n_internal, num_heads, n_scores):
        super().__init__()
        self.seq_len = seq_len
        self.mha_conv = MHAConvolution(seq_len, n_embed, num_heads, n_internal, stride=seq_len//2)
        self.classifier = nn.Linear(n_embed, n_scores) # consider adding intermediat ffw layers
        self.embedding = InputEssayEmbeddings(n_embed)
        self.adaptive_pool = nn.AdaptiveAvgPool1d(1)
        
    def forward(self, input_tensor, batched=False):
        x = self.embedding(input_tensor)
        x = self.mha_conv(x) # B, down_sampled_len, n_embed
        x_permuted = x.permute(0, 2, 1) # B, n_embed, down_sampled_len
        x = self.adaptive_pool(x_permuted).squeeze(-1) # B, n_embed
        #print(x.shape)
        x = self.classifier(x) # B, n_scores
        if batched:
            return x
        else:
            return x.squeeze(0)

# Create Model

In [79]:
model = GlobalAveragingTransformer(100, 10, 5, 2, len(unique_scores))
device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device)

model.to(device)


GlobalAveragingTransformer(
  (mha_conv): MHAConvolution(
    (pos_embedding): PositionalEncoding(
      (emb): Embedding(100, 10)
    )
    (multi_head_attention): MultiHeadAttention(
      (heads): ModuleList(
        (0-1): 2 x Head(
          (K): Linear(in_features=10, out_features=5, bias=True)
          (Q): Linear(in_features=10, out_features=5, bias=True)
          (V): Linear(in_features=10, out_features=5, bias=True)
          (w0): Linear(in_features=5, out_features=5, bias=True)
        )
      )
    )
    (ffwd): FeedFoward(
      (net): Sequential(
        (0): Linear(in_features=10, out_features=40, bias=True)
        (1): ReLU()
        (2): Linear(in_features=40, out_features=10, bias=True)
      )
    )
    (ln1): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
    (ln2): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
  )
  (classifier): Linear(in_features=10, out_features=12, bias=True)
  (embedding): InputEssayEmbeddings(
    (embed_activity): Embedding(

# Run Training Loop

In [None]:
max_epochs = 40
scheduler_enabled = False

optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=10)
criterion = torch.nn.CrossEntropyLoss().to(device)
model.train()
dataset_size = len(train_loader)
print("Starting training loop")
print(len(train_loaders))
num_buckets = len(train_loaders)
for epoch in range(max_epochs):
    print(f"Training epoch {str(epoch)}")
    num_correct = 0
    num_samples = 0
    total_loss = 0.0
    i = 0
    for bucket_train_loader in train_loaders:
        #print(f"training bucket {i}...")
        train_loader = bucket_train_loader
        
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            logit = model.forward(batch_x, batched=True)
            #print(logit.shape)
            #print(batch_y.shape)
            loss = criterion(logit, batch_y)
            total_loss += loss
            #train_logger.add_scalar('loss', loss, epoch * dataset_size + i)
            _, pred_labels = torch.max(logit, dim=1)
            num_correct += (pred_labels == batch_y).sum().item()
            num_samples += batch_y.size(0)

            loss.backward()
            optimizer.step()
        #print(f"finished bucket {i} out of {num_buckets} with loss {total_loss/num_buckets}")
        i+=1
        

    acc = 100 * num_correct / num_samples
    print(f"training epoch {epoch} finished with acc {acc} and total loss {total_loss}")
    #train_logger.add_scalar('accuracy', acc, epoch * dataset_size + i - 1)
    
        
    model.eval()
    num_correct = 0
    num_samples = 0
    with torch.no_grad():
        i = 0
        for bucket_val_loader in val_loaders:
            #print(f"validating bucket {i}...")
            validation_data_loader = bucket_val_loader
            for batch_x, batch_y in validation_data_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                logits = model(batch_x, batched=True)
                #print(logits.shape)
                _, predicted = torch.max(logits, dim=1)
                num_samples += batch_y.size(0)
                num_correct += (predicted == batch_y).sum().item()
            i += 1
            if i > 10:
                break
    acc = 100 * num_correct / num_samples
    print(f"validation epoch {epoch} finished with acc {acc}")
    if scheduler_enabled:
        scheduler.step(acc)
    #valid_logger.add_scalar('accuracy', acc, epoch * dataset_size + i - 1)
    

Starting training loop
73
Training epoch 0
training epoch 0 finished with acc 5.295629820051414 and total loss 194.32501220703125
validation epoch 0 finished with acc 0.0
Training epoch 1
training epoch 1 finished with acc 14.55012853470437 and total loss 181.71363830566406
validation epoch 1 finished with acc 0.0
Training epoch 2
training epoch 2 finished with acc 15.886889460154242 and total loss 177.4959259033203
validation epoch 2 finished with acc 0.0
Training epoch 3


In [None]:
df_agg = df.groupby(['id','activity']).agg({
    'event_id': np.max,
    'action_time' : [np.mean, np.sum, np.min, np.max],
    'word_count' : np.max,
    'cursor_position' : np.max
}).reset_index()
df_agg.head(10)
#df_agg.colum1ns = [i+"_"+j for i,j in df_agg.columns]

In [None]:
test = test.groupby(['id','activity']).agg({
    'event_id': np.max,
    'action_time' : [np.mean, np.sum, np.min, np.max],
    'word_count' : np.max,
    'cursor_position' : np.max
}).reset_index()

test.columns = [i+"_"+j for i,j in test.columns]

In [None]:
feat = ['event_id_amax', 'action_time_mean',
       'action_time_sum', 'action_time_amin', 'action_time_amax',
       'word_count_amax', 'cursor_position_amax']
df_pvt = pd.pivot_table(df_agg, values =feat, index =['id_'],
                         columns =['activity_'], aggfunc = np.max).reset_index()

df_pvt.columns = [i+"_"+j for i,j in df_pvt.columns]

In [None]:
remaining_cols = list(set(feat) - set(test.columns))
if len(remaining_cols) != 0:
    for i in remaining_cols:
        test[i] = 0

In [None]:
test = pd.pivot_table(test, values = feat, index =['id_'],
                         columns =['activity_'], aggfunc = np.max).reset_index()

test.columns = [i+"_"+j for i,j in test.columns]

In [None]:
remaining_cols = list(set(df_pvt.columns) - set(test.columns))
if len(remaining_cols) != 0:
    for i in remaining_cols:
        test[i] = 0

In [None]:
test = test[df_pvt.columns]

In [None]:
test = test[df_pvt.columns]
df_pvt = df_pvt.rename(columns={'id__':'id'})
test = test.rename(columns={'id__':'id'})
df_pvt = df_pvt.merge(df_score, on = 'id', how = 'left')

In [None]:
sns.histplot(df_pvt['score'])
plt.figure(figsize=(15, 7))
sns.heatmap(df_pvt.drop(['id'],axis= 1).corr(), annot=False)

In [None]:
cols = ['action_time_amax_Input', 'action_time_amax_Move From',
       'action_time_amax_Nonproduction', 'action_time_amax_Paste',
       'action_time_amax_Remove/Cut', 'action_time_amax_Replace',
       'action_time_amin_Input', 'action_time_amin_Move From',
       'action_time_amin_Nonproduction', 'action_time_amin_Paste',
       'action_time_amin_Remove/Cut', 'action_time_amin_Replace',
       'action_time_mean_Input', 'action_time_mean_Move From',
       'action_time_mean_Nonproduction', 'action_time_mean_Paste',
       'action_time_mean_Remove/Cut', 'action_time_mean_Replace',
       'action_time_sum_Input', 'action_time_sum_Move From',
       'action_time_sum_Nonproduction', 'action_time_sum_Paste',
       'action_time_sum_Remove/Cut', 'action_time_sum_Replace',
       'cursor_position_amax_Input', 'cursor_position_amax_Move From',
       'cursor_position_amax_Nonproduction', 'cursor_position_amax_Paste',
       'cursor_position_amax_Remove/Cut', 'cursor_position_amax_Replace',
       'event_id_amax_Input', 'event_id_amax_Move From',
       'event_id_amax_Nonproduction', 'event_id_amax_Paste',
       'event_id_amax_Remove/Cut', 'event_id_amax_Replace',
       'word_count_amax_Input', 'word_count_amax_Move From',
       'word_count_amax_Nonproduction', 'word_count_amax_Paste',
       'word_count_amax_Remove/Cut', 'word_count_amax_Replace',]

In [None]:
for col in cols:
    print(f'Plots for {col}')
    plt.figure(figsize=(15, 5))

    plt.subplot(1,3, 1)  
    sns.boxplot(y=df_pvt[col], x=df_pvt['score'], color='#4082ed')
    plt.title("Scatterplot with score")

    plt.subplot(1, 3, 2)  
    sns.lineplot(y=df_pvt[col], x=df_pvt['score'], color='#40b9ed')
    plt.title("trend with score")

    plt.subplot(1, 3, 3)  
    sns.histplot(x=df_pvt[col], bins=50, kde=True, color='#40d3ed')
    plt.title(f"Histogram of {col}")

    plt.tight_layout() 
    plt.show()


# XGB model tuned using Optuna

In [None]:
X = df_pvt.drop(['id','score'], axis = 1)
y = df_pvt['score']

In [None]:
import optuna
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'reg:squarederror',  
        'eval_metric': 'rmse',  
        'booster': trial.suggest_categorical('booster', ['gbtree', 'gblinear', 'dart']),
        'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_weight': trial.suggest_int('min_child_weight', 2, 10),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 0.9),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000)
    }

    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)

    y_pred = model.predict(X_test)
    rmse = mean_squared_error(y_test, y_pred, squared=False)

    return rmse

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  

best_params = study.best_params
best_rmse = study.best_value

print(f'Best Parameters: {best_params}')
print(f'Best RMSE: {best_rmse}')

final_model = xgb.XGBRegressor(**best_params, random_state=42)
final_model.fit(X_train, y_train)

In [None]:
print('Train')
print(mean_squared_error(y_train,final_model.predict(X_train)))
print('Test')
print(mean_squared_error(y_test,final_model.predict(X_test)))

In [None]:
test_pred = final_model.predict(test.drop('id',axis = 1))
test = test[['id']]
test['score'] = test_pred

test.to_csv('submission.csv', index = False)