In [1]:
%matplotlib notebook
import json
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import itertools
from IPython.core.debugger import set_trace
# library
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
import collections
import random
import re
from natsort import natsorted

# Check cuda.is_available ?

In [2]:
cuda_available = torch.cuda.is_available()
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print("cuda_available : {}, device : {}".format(cuda_available, device))

cuda_available : True, device : cuda:0


In [3]:
class ParkinsonDataset(Dataset):
    def __init__(self, 
                 task_type,
                 train_or_test,
                 split_ratio=(0.8, 0.2),
                 UDysRS_rating_file = './UDysRS_UPDRS_Export/UDysRS.txt',
                 UPDRS_rating_file = './UDysRS_UPDRS_Export/UPDRS.txt',
                 CAPSIT_rating_file = './UDysRS_UPDRS_Export/CAPSIT.txt',
                 sn_file = './UDysRS_UPDRS_Export/sn_numbers.txt',
                 maxlen=600,
                 seed=5
                ):
        
        if task_type not in ['typeA', 'typeB']:
            raise ValueError('Task type is not valid [ typeA | typeB ]')
        
        self.task_type = task_type
        self.maxlen = maxlen

        # support 2 type of tasks ( A : "Communication/Drinking Tasks",
        #                           B : "Leg Agility Task" )
        trajectory_files = ['./UDysRS_UPDRS_Export/Communication_all_export.txt', 
                            './UDysRS_UPDRS_Export/Drinking_all_export.txt']  if task_type=='typeA' else \
                              ['./UDysRS_UPDRS_Export/LA_split_all_export.txt']

        
        def load_data(file_path):
            with open(file_path, 'r') as infile:
                data = json.load(infile)
            return data

        # input data
        traj_data = {}
        for traj_file in trajectory_files:
            data = load_data(traj_file)
            traj_data.update(data)
        
        # target data
        rating_data = {k:load_data(v) for k,v in zip(['UDysRS',
                                                      'UPDRS',
                                                      'CAPSIT'],
                                                      [UDysRS_rating_file,
                                                       UPDRS_rating_file,
                                                       CAPSIT_rating_file])}

        # map : trial number -> subject name
        self.sn_map = load_data(sn_file)

        ## preprocess input data (trajectory)
        input_data = collections.defaultdict(list)
        
        if task_type=='typeB':
            for key,meta_dict in traj_data.items():
                
                all_body_parts = meta_dict['position'].keys()
                
                tmp_dict = collections.defaultdict(list)

                part_active = list(filter(lambda x: x.endswith('_act'), all_body_parts))
                part_rst = list(filter(lambda x: x.endswith('_rst'), all_body_parts))

                # add active data first
                for part in part_active:
                    pure_part = part.split('_')[0]
                    tmp_dict[pure_part] += meta_dict['position'][part]
                    
                # append rst data
                for part in part_rst:
                    pure_part = part.split('_')[0]
                    tmp_dict[pure_part] += meta_dict['position'][part]
                    
                
                # replace with modified tmp_dict
                traj_data[key]['position'] = tmp_dict
        
        
        for key,meta_dict in traj_data.items():
            n_joints = len(meta_dict['position'].keys())
            
            # time-major
            time_series_data = np.asanyarray(list(meta_dict['position'].values())).transpose(1,0,2)
            
            # normalization
#             time_series_data[:,:,0] /= 640.0
#             time_series_data[:,:,1] /= 480.0
                        
            time_series_data = time_series_data.reshape(-1, n_joints*2)  # vectorisze; e.g. seq of position vectors
            
            input_data["sample_id"].append(key)
            input_data["trajectory"].append(time_series_data.tolist())        


            
        ## preprocess target data

        # prepare empty data
        target_data = collections.defaultdict(list)
        all_trials = list(self.sn_map.keys())
        
        # for "UDysRS"
        target_data["trial_nbr"] = all_trials
        for column_name in ['Communication', 'Drinking', 'Higher']:
            for part in [ "Neck", 
                          "Right arm/shoulder", "Left arm/shoulder",
                          "Trunk",
                          "Right leg/hip", "Left leg/hip" ]:
                target_data["UDysRS" + '_' + column_name + '_' + part] = [ np.nan ] * len(target_data["trial_nbr"])

        # for "UPDRS"
        target_data["UPDRS_Total"] = [ np.nan ] * len(target_data["trial_nbr"])            
        
        
        RaitingItem2Name = { 
                            k: v for k,v in zip(['3.1', '3.10', '3.4', '3.5', '3.6', '3.8', '3.9'],
                                                ['SPEECH', 'GAIT', 'FINGER TAPPING', 'HAND MOVEMENTS', 
                                                 'PRONATION-SUPINATION MOVEMENTS OFHANDS', 'LEG AGILITY', 'ARISING FROM CHAIR']
                                               )
                          }
        
        # for CAPSIT
        for column_name in rating_data['CAPSIT'].keys():
            for part in [ "Neck", 
                          "Trunk", 
                          "Upper limb right","Upper limb left",
                          "Lower limb right", "Lower limb left" ]:
                target_data["CAPSIT" + '_' + RaitingItem2Name[column_name] + '_' + part] = [ np.nan ] * len(target_data["trial_nbr"])

        
        # part 1 : 'UDysRS'            
        for column_name, meta_dict in rating_data['UDysRS'].items():
            trial_nbrs = meta_dict.keys()
            for trial_nbr in trial_nbrs:
                try:
                    ix = all_trials.index(trial_nbr)
                except ValueError:
                    # if trial_nbr is not found in all_trials, Skip
                    continue
                    
                for p_ix, part in enumerate([ "Neck", 
                                              "Right arm/shoulder", "Left arm/shoulder",
                                              "Trunk",
                                              "Right leg/hip", "Left leg/hip"]):
                    target_data["UDysRS" + '_' + column_name + '_' + part][ix] = meta_dict[trial_nbr][p_ix]

                
        
            
        # part 2 : 'UPDRS'
        for trial_nbr, val in rating_data['UPDRS']['Total'].items():
            try:
                ix = all_trials.index(trial_nbr)
            except ValueError:
                # if trial_nbr is not found in all_trials, Skip!
                continue

            target_data["UPDRS_Total"][ix] = val

            
        # part 3 : 'CAPSIT'
        for column_name, meta_dict in rating_data['CAPSIT'].items():
            trial_nbrs = meta_dict.keys()
            for trial_nbr in trial_nbrs:
                try:
                    ix = all_trials.index(trial_nbr)
                except ValueError:
                    # if trial_nbr is not found in all_trials, Skip
                    continue
                    
                for p_ix, part in enumerate([ "Neck", 
                                              "Trunk", 
                                             "Upper limb right","Upper limb left",
                                              "Lower limb right", "Lower limb left" ]):
                    target_data["CAPSIT" + '_' + RaitingItem2Name[column_name] + '_' + part][ix] = meta_dict[trial_nbr][p_ix]
        

        # input data frame
        input_df = pd.DataFrame(data=input_data).fillna(0)

        # integratged target data frame
        self.target_df = target_df = pd.DataFrame(data=target_data).fillna(0)        
        
        # valid target indices
        valid_indices = self.target_df[self.target_df[self.target_columns]!=0][self.target_columns].dropna().index
        valid_trial_nbrs = self.target_df['trial_nbr'].iloc[valid_indices].values
        
        if self.task_type=='typeA':
            regex = lambda x: '^{}-.*$'.format(x)
        elif self.task_type=='typeB':
            regex = lambda x: '^{}$'.format(x)
            
        valid_sample_id_regex = '|'.join([regex(x) for x in valid_trial_nbrs])
        input_df = input_df[input_df.sample_id.str.contains(valid_sample_id_regex)]
        target_df = target_df.iloc[valid_indices]
                
        sorted_sample_ids = natsorted(input_df.sample_id.values)
        
        # set random seed, to consistency of performance
        np.random.seed(seed)
        
        # shuffle before split
        input_df = input_df.iloc[np.random.permutation(len(input_df))]
        target_df = target_df.iloc[np.random.permutation(len(target_df))]
        
        if train_or_test=='train':
            self.input_df = input_df[input_df.sample_id.isin(sorted_sample_ids[:int(len(input_df)*split_ratio[0])])]
            self.target_df = target_df
            
        elif train_or_test=='test':
            self.input_df = input_df[input_df.sample_id.isin(sorted_sample_ids[int(len(input_df)*split_ratio[0]):])]
            self.target_df = target_df


    @property
    def list_of_ratings(self):
        if self.task_type == 'typeA':
            list_of_ratings = ['UDysRS']
        elif self.task_type == 'typeB':
#             list_of_ratings = ['UPDRS', 'CAPSIT']
            list_of_ratings = ['UPDRS']
            
        return list_of_ratings
    
    @property
    def target_columns(self):
        _target_columns = []
        for rating_name in self.list_of_ratings:
            # filter columns by name
            _target_columns += list(filter(lambda x: x.startswith(rating_name), self.target_df.columns))

        return _target_columns
    
    def __len__(self):
        return len(self.input_df)
    
    def __getitem__(self, idx):
        sample_id = self.input_df.iloc[idx].sample_id
        trial_nbr = re.split("\s|-", sample_id)[0]  
        
        input_data = self.input_df[self.input_df.sample_id==sample_id].iloc[:, 1:].values[0]
        target_data = self.target_df[self.target_df.trial_nbr==trial_nbr][self.target_columns].values[0]

        input_data = np.asanyarray(list(np.asanyarray(x) for x in input_data)).squeeze(axis=0)
        target_data = np.asanyarray(list(np.asanyarray(x) for x in target_data))
                
        # zero padding
        input_data = np.pad(input_data, ((0,self.maxlen-len(input_data)),(0,0)),
                                               'constant', constant_values=0).transpose(1,0)

        sample = { 'keypoints_seq': torch.tensor(input_data, dtype=torch.float32),
                   'targets': torch.tensor(target_data, dtype=torch.float32)
        }
        
        return sample

In [4]:
mydataset = { x : ParkinsonDataset(task_type='typeB', train_or_test=x) \
                        for x in ['train', 'test'] }

dataloader = { x : DataLoader(mydataset[x],
                        batch_size=5,
                        shuffle=True,
                        num_workers=4) \
                    for x in ['train', 'test'] }

# Visualize patient distribution

In [5]:
patient_distribution = collections.defaultdict(None)
patient_distribution['train'] = list(re.split("\s|-", x)[0] for x in  mydataset['train'].input_df.sample_id.values)
patient_distribution['test'] = list(re.split("\s|-", x)[0] for x in  mydataset['test'].input_df.sample_id.values)
venn2([set(patient_distribution['train']), set(patient_distribution['test'])], set_labels=('Train', 'Test'))

<IPython.core.display.Javascript object>

<matplotlib_venn._common.VennDiagram at 0x7f8802c79cc0>

# Define DNN

In [6]:
class Conv1d(nn.Conv1d):
    def __init__(self, in_channels,
                       out_channels,
                       kernel_size,
                       stride=1,
                       padding=0,
                       dilation=1,
                       groups=1,
                       bias=True,
                       padding_type='same'):
        
        super(Conv1d, self).__init__(in_channels,
                                     out_channels,
                                     kernel_size,
                                     stride,
                                     padding,
                                     dilation,
                                     groups,
                                     bias)
        
        self.padding_type = padding_type
    
    def forward(self, x):
        _, _, input_length = x.size()
        
        if self.padding_type == 'same':
            padding_need = int((input_length * (self.stride[0]-1) + self.kernel_size[0] - self.stride[0]) / 2)
            
        return F.conv1d(x, self.weight, self.bias, self.stride, 
                        padding_need, self.dilation, self.groups)

In [7]:
class ResidualBlock(nn.Module):
    def __init__(self, C_in, C_out, pool, highway=True):
        super(ResidualBlock, self).__init__()
        self.pool = pool
        self.highway = highway
                
        stride = 1
        
        if C_in != C_out:
            C = C_out
        else:
            C = C_in = C_out
            
        if pool:
            # input dimension matchig
            self.conv_1x1_matching = Conv1d(C_in, C, kernel_size=1, stride=1, padding_type='same')
            self.bn_1x1_matching = nn.BatchNorm1d(C)

            # for pooling of residual path
            stride = 2
            self.conv_2x1_pool = Conv1d(C_in, C, kernel_size=1, stride=2, padding_type='same')
            self.bn_2x1_pool= nn.BatchNorm1d(C)
                
        # conv_1x1_a : reduce number of channels by factor of 4 (output_channel = C/4)
        self.conv_1x1_a = Conv1d(C, int(C/4), kernel_size=1, stride=stride, padding_type='same')
        self.bn_1x1_a = nn.BatchNorm1d(int(C/4))
        
        # conv_3x3_b : more wide receptive field (output_channel = C/4)
        self.conv_3x3_b = Conv1d(int(C/4), int(C/4), kernel_size=3, stride=1, padding_type='same')
        self.bn_3x3_b = nn.BatchNorm1d(int(C/4))
        
        # conv_1x1_c : recover org channel C (output_channel = C)
        self.conv_1x1_c = Conv1d(int(C/4), C, kernel_size=1, stride=1, padding_type='same')
        self.bn_1x1_c = nn.BatchNorm1d(C)
        
        if highway:
            # conv_1x1_g : gating for highway network
            self.conv_1x1_g = Conv1d(C, C, kernel_size=1, stride=1, padding_type='same')
        
        # output
        self.bn_1x1_out = nn.BatchNorm1d(C)
        
    
    def forward(self, x):
        '''
            x : size = (batch, C, maxlen)
        '''
        
        res = x
        
        if self.pool:
            # input dimension matching with 1x1 conv
            x = self.conv_1x1_matching(x)
            x = self.bn_1x1_matching(x)
            
            # pooling of residual path
            res = self.conv_2x1_pool(res)
            res = self.bn_2x1_pool(res)
        
        # 1x1_a (C/4)
        x = self.conv_1x1_a(x)
        x = self.bn_1x1_a(x)
        x = F.relu(x)
        
        # 3x3_b (C/4)
        x = self.conv_3x3_b(x)
        x = self.bn_3x3_b(x)
        x = F.relu(x)
        
        # 1x1_c (C)
        x = self.conv_1x1_c(x)
        x = self.bn_1x1_c(x)
        
        if self.highway:
            # gating mechanism from "highway network"
            
            # gating factors controll intensity between x and f(x)
            # gating = 1.0 (short circuit) --> output is identity (same as initial input)
            # gating = 0.0 (open circuit)--> output is f(x) (case of non-residual network)
            gating = F.sigmoid(self.conv_1x1_g(x))
            
            # apply gating mechanism
            x = gating * res + (1.0 - gating) * F.relu(x)

            
        else:
            # normal residual ops (addition)
            x = F.relu(x) + res

            
        x = self.bn_1x1_out(x)
        x = F.relu(x)
        
        return x

In [8]:
class View(nn.Module):
    def __init__(self, *shape):
        super(View, self).__init__()
        self.shape = shape
    def forward(self, x):
        return x.view(*self.shape)

class GAP(nn.Module):
    def __init__(self):
        super(GAP, self).__init__()
    def forward(self, x):
        '''
            x : size = (B, C, L)
        '''
        return torch.mean(x, 2)
        
        
class Net(nn.Module):
    def __init__(self, input_size, target_size, num_layers = [3,4,6], num_filters = [64,128,128]):
        super(Net, self).__init__()
        
        self.input_size = input_size
        self.target_size = target_size
        
        def res_blocks(residual_blocks, num_layers, num_filters, block_ix, pool_first_layer=True):
            block_layers = num_layers[block_ix]

            for i in range(block_layers):
                # default values
                pool = False
                block_filters = num_filters[block_ix]
                
                C_in = C_out = block_filters
                
                if pool_first_layer and i==0:
                    pool = True
                if i==0 and block_ix > 0:
                    C_in = num_filters[block_ix-1]
                    
                print(f"layer : {i}, block : {block_ix}, C_in/C_out : {C_in}/{C_out}")
                residual_blocks.append(ResidualBlock(C_in=C_in, C_out=C_out,pool=pool, highway=True))
                
        residual_blocks = []

        for i in range(len(num_layers)):
            pool_first_layer = True
            if i == 0:
                pool_first_layer = False
            res_blocks(residual_blocks, num_layers=num_layers, num_filters=num_filters, block_ix=i,
                       pool_first_layer=pool_first_layer)
                
        self.model = nn.Sequential(nn.Conv1d(input_size, num_filters[0], kernel_size=7, stride=2),
                                   nn.BatchNorm1d(num_filters[0]),
                                   nn.ReLU(),
                                   nn.MaxPool1d(kernel_size=(3,), stride=2,),
                                   nn.Conv1d(num_filters[0], num_filters[0], kernel_size=3, stride=1),
                                   nn.BatchNorm1d(num_filters[0]),
                                   nn.ReLU(),
                                   nn.MaxPool1d(kernel_size=(3,), stride=2,),                                   
                                   *residual_blocks,
                                   nn.Conv1d(num_filters[-1], target_size, kernel_size=1, stride=1),
                                   GAP(),
#                                    View(-1,735),
#                                    nn.Linear(735, num_filters[-1]),
#                                    nn.ReLU(),
#                                    nn.Dropout(),
#                                    nn.Linear(num_filters[-1], num_filters[-1]),
#                                    nn.ReLU(),
#                                    nn.Dropout(),
#                                    nn.Linear(num_filters[-1], target_size)
                                   )
        
    def forward(self, x):
        '''
            x : size = (batch, input_size, maxlen)
        '''
        return self.model(x)

net = Net(input_size=np.array(mydataset['train'].input_df.trajectory.values[0]).shape[1],
          target_size=len(mydataset['train'].target_columns),
          num_layers = [3,4,6], num_filters = [64,128,128])

if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    net = nn.DataParallel(net)
else:
    print("Single GPU mode")
    
net.to(device)

layer : 0, block : 0, C_in/C_out : 64/64
layer : 1, block : 0, C_in/C_out : 64/64
layer : 2, block : 0, C_in/C_out : 64/64
layer : 0, block : 1, C_in/C_out : 64/128
layer : 1, block : 1, C_in/C_out : 128/128
layer : 2, block : 1, C_in/C_out : 128/128
layer : 3, block : 1, C_in/C_out : 128/128
layer : 0, block : 2, C_in/C_out : 128/128
layer : 1, block : 2, C_in/C_out : 128/128
layer : 2, block : 2, C_in/C_out : 128/128
layer : 3, block : 2, C_in/C_out : 128/128
layer : 4, block : 2, C_in/C_out : 128/128
layer : 5, block : 2, C_in/C_out : 128/128
Single GPU mode


Net(
  (model): Sequential(
    (0): Conv1d(12, 64, kernel_size=(7,), stride=(2,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool1d(kernel_size=(3,), stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv1d(64, 64, kernel_size=(3,), stride=(1,))
    (5): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool1d(kernel_size=(3,), stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): ResidualBlock(
      (conv_1x1_a): Conv1d(64, 16, kernel_size=(1,), stride=(1,))
      (bn_1x1_a): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv_3x3_b): Conv1d(16, 16, kernel_size=(3,), stride=(1,))
      (bn_3x3_b): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv_1x1_c): Conv1d(16, 64, kernel_size=(1,), stride=(1,))
      (bn_1x1_c): BatchNorm1d(64, eps=1e-05, momentum=0.1, a

In [9]:
# define criterion
criterion = nn.MSELoss()
# criterion = nn.L1Loss()

history = {'train': [],
           'test': []}

import torch.optim as optim
from torch.optim import lr_scheduler

# Observe that all parameters are being optimized
optimizer = optim.Adam(net.parameters(), lr=1e-4)

epoch_loss = {'train': 0.0, 'test': 0.0}
kk = []
for epoch in range(1,301):
    for phase in ['train', 'test']:
        if phase=='train':
            net.train()
        elif phase=='test':
            net.eval()
        
        running_loss = 0.0

        for idx, batch_item in enumerate(dataloader[phase]):
            input, target = batch_item['keypoints_seq'].to(device), batch_item['targets'].to(device)
            
            optimizer.zero_grad()
            
            with torch.set_grad_enabled(phase=='train'):
                # feed data to network
                output = net(input)

                # compute loss
                loss = criterion(output, target)
                
                if phase=='train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item()
            
        steps_per_epoch = int(len(dataloader[phase].dataset)/dataloader[phase].batch_size)
        avg_loss = running_loss / steps_per_epoch
                
        epoch_loss[phase] += avg_loss
        
        if epoch % 10 == 0:
            print('=================={}========================'.format(phase.upper()))
            print('EPOCH : {}, AVG_MSE : {:.4f}'.format(epoch, epoch_loss[phase] / 10))
            history[phase].append(epoch_loss[phase] / 10)
            
            # init epoch_loss at its own phase
            epoch_loss[phase] = 0.0
                
# plot learning curve
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
ax.plot(history['train'], label='train', color='b')
ax.plot(history['test'], label='test', color='orange')

ax.set_title('Learning Curve')
ax.set_xlabel('Epoch')
ax.set_ylabel('MSE')
plt.legend()

print("Saving learning curve...")
fig.savefig('learning.png', dpi=fig.dpi)



EPOCH : 10, AVG_MSE : 722.7170
EPOCH : 10, AVG_MSE : 453.9300
EPOCH : 20, AVG_MSE : 698.5519
EPOCH : 20, AVG_MSE : 425.4425
EPOCH : 30, AVG_MSE : 671.1780
EPOCH : 30, AVG_MSE : 394.0985
EPOCH : 40, AVG_MSE : 644.2852
EPOCH : 40, AVG_MSE : 371.4489
EPOCH : 50, AVG_MSE : 616.5718
EPOCH : 50, AVG_MSE : 356.9995
EPOCH : 60, AVG_MSE : 587.3325
EPOCH : 60, AVG_MSE : 337.1651
EPOCH : 70, AVG_MSE : 555.8164
EPOCH : 70, AVG_MSE : 314.4113
EPOCH : 80, AVG_MSE : 522.2116
EPOCH : 80, AVG_MSE : 298.5041
EPOCH : 90, AVG_MSE : 486.1060
EPOCH : 90, AVG_MSE : 272.4496
EPOCH : 100, AVG_MSE : 448.3581
EPOCH : 100, AVG_MSE : 243.8236
EPOCH : 110, AVG_MSE : 412.2804
EPOCH : 110, AVG_MSE : 214.2014
EPOCH : 120, AVG_MSE : 376.2774
EPOCH : 120, AVG_MSE : 200.1546
EPOCH : 130, AVG_MSE : 343.6897
EPOCH : 130, AVG_MSE : 176.6292
EPOCH : 140, AVG_MSE : 311.3741
EPOCH : 140, AVG_MSE : 153.7593
EPOCH : 150, AVG_MSE : 281.3922
EPOCH : 150, AVG_MSE : 142.5377
EPOCH : 160, AVG_MSE : 253.6171
EPOCH : 160, AVG_MSE : 118

<IPython.core.display.Javascript object>

Saving learning curve...


In [10]:
_inp, _target = mydataset['test'][3]['keypoints_seq'][None,:,:].to(device), mydataset['test'][3]['targets'][None,:].to(device)

In [11]:
net.eval()
_pred = net(_inp)

print(f'_pred : {_pred}, _target : {_target}')

_pred : tensor([[17.1628]], device='cuda:0', grad_fn=<MeanBackward0>), _target : tensor([[8.3333]], device='cuda:0')


# Random Forest

In [12]:
X = {'train': [], 'test': []}
y = {'train': [], 'test': []}

for phase in ['train', 'test']:
    i = 0
    while True:
        try:
            X[phase].append(mydataset[phase][i]['keypoints_seq'].numpy().flatten())
            y[phase].append(mydataset[phase][i]['targets'].numpy())
            i += 1
        except:
            break

In [13]:
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(max_depth=2, n_estimators=50, random_state=0)  
X_train, y_train, X_test, y_test = np.asanyarray(X['train']), np.asanyarray(y['train']).flatten(), \
                                    np.asanyarray(X['test']), np.asanyarray(y['test']).flatten()

regressor.fit(X_train, y_train)  
y_pred = regressor.predict(X_test)  

In [14]:
from sklearn import metrics

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))  

Mean Absolute Error: 6.657898356658984
Mean Squared Error: 60.33751950677265
Root Mean Squared Error: 7.767722929325727
