In [None]:
# default_exp series.train

# series.train
> Methods for training a `RandomForestClassifier` from `scikit-learn` to classify MRI series types.

In [None]:
#export
from dicomtools.basics import *
from sklearn.ensemble import RandomForestClassifier

np.random.seed(42)

In [None]:
#export
def train_setup(df):
    "Extract labels for training data and use 'unknown' as test set"
    df1 = preprocess(df)
    labels = extract_labels(df1)
    df1 = df1.join(labels[['plane', 'contrast', 'seq_label']])
    filt = df1['seq_label'] == 'unknown'
    train = df1[~filt].copy().reset_index(drop=True)
    test = df1[filt].copy().reset_index(drop=True)
    y, y_names = pd.factorize(train['seq_label'])
    return train, test, y, y_names


In [None]:
#export
def train_fit(train, y, features, fname='model-run.skl'):
    "Train a Random Forest classifier on `train[features]` and `y`, then save to `fname` and return."
    clf = RandomForestClassifier(n_jobs=2, random_state=0)
    clf.fit(train[features], y)
    dump(clf, fname)
    return clf
