# Beat Saber Mapping Model v1

This notebook explores creating a model for generating custom Beat Saber maps using an iterative random forest approach.

Goals include:
- Load data from pickle files
- Process data into RF model format
- Train model
- Predict 
- Evaluate
- Write predictions to Beat Saber .dat files

In [24]:
import pandas as pd
import numpy as np
import pickle
from sklearn.multioutput import MultiOutputClassifier, ClassifierChain
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import os

In [3]:
with open('./level_df/1602_expert.pkl', 'rb') as f:
    df = pickle.load(f)

In [4]:
with open('./level_df/1023_expert.pkl', 'rb') as f:
    df2 = pickle.load(f)

In [5]:
df.columns

Index([               '_time',                      0,                      1,
                            2,                      3,                      4,
                            5,                      6,                      7,
                            8,                      9,                     10,
                           11,    'notes_lineIndex_0',    'notes_lineLayer_0',
               'notes_type_0', 'notes_cutDirection_0',    'notes_lineIndex_1',
          'notes_lineLayer_1',         'notes_type_1', 'notes_cutDirection_1'],
      dtype='object')

In [17]:
df2.head()

Unnamed: 0,_time,0,1,2,3,4,5,6,7,8,...,10,11,notes_lineIndex_0,notes_lineLayer_0,notes_type_0,notes_cutDirection_0,notes_lineIndex_1,notes_lineLayer_1,notes_type_1,notes_cutDirection_1
0,0.0,0.840129,0.988685,0.849094,0.711231,0.733086,0.620522,0.696874,0.647646,0.576001,...,0.601821,0.550767,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
1,1.0,0.647516,0.977041,0.760961,0.580212,0.758228,0.670512,0.427985,0.527901,0.662909,...,0.475355,0.497519,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
2,2.0,0.496205,1.0,0.627755,0.457174,0.549942,0.340731,0.199601,0.184958,0.315051,...,0.244849,0.22009,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
3,3.0,0.560961,1.0,0.642114,0.241092,0.282111,0.249536,0.179029,0.259913,0.42579,...,0.485574,0.382745,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
4,4.0,0.598664,1.0,0.552977,0.153704,0.260963,0.147146,0.095006,0.274012,0.370645,...,0.163096,0.169267,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0


In [6]:
X = df.iloc[:, 0: 13]

In [18]:
y = df[list(filter(lambda x: str(x).startswith('notes_type'), df.columns))]

In [19]:
y.head()

Unnamed: 0,notes_lineIndex_0,notes_lineLayer_0,notes_type_0,notes_cutDirection_0,notes_lineIndex_1,notes_lineLayer_1,notes_type_1,notes_cutDirection_1
0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
1,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
2,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
3,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
4,1.0,0.0,0.0,1.0,999.0,999.0,999.0,999.0


In [9]:
X.isna().sum()

_time    0
0        0
1        0
2        0
3        0
4        0
5        0
6        0
7        0
8        0
9        0
10       0
11       0
dtype: int64

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [21]:
multi = MultiOutputClassifier(RandomForestClassifier()).fit(X_train, y_train)

In [22]:
multi.predict(X_test)

array([[999., 999., 999., ...,   0.,   1.,   0.],
       [999., 999., 999., ..., 999., 999., 999.],
       [  0.,   0.,   0., ...,   0.,   1.,   0.],
       ...,
       [999., 999., 999., ...,   0.,   1.,   7.],
       [  1.,   0.,   0., ..., 999., 999., 999.],
       [999., 999., 999., ..., 999., 999., 999.]])

In [23]:
multi.score(X_test, y_test)

0.134185303514377

In [16]:
multi.get_params

<bound method BaseEstimator.get_params of MultiOutputClassifier(estimator=RandomForestClassifier(bootstrap=True,
                                                       ccp_alpha=0.0,
                                                       class_weight=None,
                                                       criterion='gini',
                                                       max_depth=None,
                                                       max_features='auto',
                                                       max_leaf_nodes=None,
                                                       max_samples=None,
                                                       min_impurity_decrease=0.0,
                                                       min_impurity_split=None,
                                                       min_samples_leaf=1,
                                                       min_samples_split=2,
                                                       min_we

In [27]:
filelist = [f for f in os.listdir('level_df')]
full = pd.DataFrame()
for f in filelist:
    if f.endswith('expert.pkl'):
        with open(f"./level_df/{f}", 'rb') as d:
            df = pickle.load(d)
        full = pd.concat([full, df], axis = 0, ignore_index = True)
    else:
        continue

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  import sys


In [28]:
full.shape

(315169, 25)

In [49]:
full.dropna(subset = list(full.iloc[:, 0:13].columns), axis = 0, inplace = True)

In [50]:
full.fillna(999, inplace = True)

In [61]:
X = full.iloc[:, 0: 13]
y = full[list(filter(lambda x: str(x).startswith('notes'), full.columns))]

In [63]:
y.head()

Unnamed: 0,notes_lineIndex_0,notes_lineLayer_0,notes_type_0,notes_cutDirection_0,notes_lineIndex_1,notes_lineLayer_1,notes_type_1,notes_cutDirection_1,notes_lineIndex_3,notes_lineLayer_3,notes_type_3,notes_cutDirection_3
0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
1,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
2,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
3,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
4,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0


In [62]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [55]:
multifull = MultiOutputClassifier(RandomForestClassifier()).fit(X_train, y_train)

In [56]:
multifull.score(X_test, y_test)

0.2997694081475788

In [64]:
chain = ClassifierChain(RandomForestClassifier(), order = [2, 0, 1, 3, 6, 4, 5, 7, 10, 8, 9, 11])

In [65]:
chain.fit(X_train, y_train)

ClassifierChain(base_estimator=RandomForestClassifier(bootstrap=True,
                                                      ccp_alpha=0.0,
                                                      class_weight=None,
                                                      criterion='gini',
                                                      max_depth=None,
                                                      max_features='auto',
                                                      max_leaf_nodes=None,
                                                      max_samples=None,
                                                      min_impurity_decrease=0.0,
                                                      min_impurity_split=None,
                                                      min_samples_leaf=1,
                                                      min_samples_split=2,
                                                      min_weight_fraction_leaf=0.0,
                               

In [72]:
preds = pd.DataFrame(chain.predict(X_test), columns = y.columns)

In [82]:
preds

Unnamed: 0,notes_lineIndex_0,notes_lineLayer_0,notes_type_0,notes_cutDirection_0,notes_lineIndex_1,notes_lineLayer_1,notes_type_1,notes_cutDirection_1,notes_lineIndex_3,notes_lineLayer_3,notes_type_3,notes_cutDirection_3
0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
1,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
2,1.0,0.0,0.0,0.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
3,999.0,999.0,999.0,999.0,3.0,0.0,1.0,1.0,999.0,999.0,999.0,999.0
4,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
...,...,...,...,...,...,...,...,...,...,...,...,...
78055,1.0,0.0,0.0,1.0,2.0,0.0,1.0,1.0,999.0,999.0,999.0,999.0
78056,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
78057,1.0,0.0,0.0,1.0,2.0,0.0,1.0,1.0,999.0,999.0,999.0,999.0
78058,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
