In [11]:
import numpy as np
import pandas as pd
import os

for dirname, _, filenames in os.walk('../kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

../kaggle/input\DogInfo.xlsx
../kaggle/input\DogMoveData.csv


In [17]:
import matplotlib.pyplot as plt
import seaborn as sns
import sys, os
import random
import gc
%matplotlib inline

from scipy import stats
from scipy.stats import norm, skew
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, GroupKFold, TimeSeriesSplit, GridSearchCV
from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier, RandomForestRegressor, RandomForestClassifier
import lightgbm as lgb
import xgboost as xgb
from sklearn.metrics import *

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

from IPython.display import display

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('max_colwidth', 600)


def set_seed(seed=2021):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
set_seed()


def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df


In [19]:
data = pd.read_csv('../kaggle/input/DogMoveData.csv')
print(data.shape)
data.head(20)

(10611068, 20)


Unnamed: 0,DogID,TestNum,t_sec,ABack_x,ABack_y,ABack_z,ANeck_x,ANeck_y,ANeck_z,GBack_x,GBack_y,GBack_z,GNeck_x,GNeck_y,GNeck_z,Task,Behavior_1,Behavior_2,Behavior_3,PointEvent
0,16,1,0.0,0.041504,0.938965,-0.015137,-0.067871,-0.510254,-0.93457,-17.639161,-22.766115,7.44629,-7.934571,6.347657,13.427735,<undefined>,<undefined>,<undefined>,<undefined>,<undefined>
1,16,1,0.01,0.041992,0.941895,-0.02002,-0.128906,-0.494141,-0.913086,-15.075685,-11.413575,4.821778,-3.90625,4.394532,16.540528,<undefined>,Synchronization,<undefined>,<undefined>,<undefined>
2,16,1,0.02,0.040527,0.939453,-0.004395,-0.158691,-0.480469,-0.911133,-12.207032,-0.12207,2.807617,-0.488281,-1.953125,26.794435,<undefined>,Synchronization,<undefined>,<undefined>,<undefined>
3,16,1,0.03,0.021484,0.946289,0.007813,-0.12207,-0.486816,-0.880371,-9.46045,7.995606,1.586914,1.159668,-5.67627,38.08594,<undefined>,Synchronization,<undefined>,<undefined>,<undefined>
4,16,1,0.04,-0.000977,0.951172,0.033691,-0.053711,-0.5,-0.807129,-8.361817,14.587403,-1.037598,4.577637,4.089356,41.503909,<undefined>,Synchronization,<undefined>,<undefined>,<undefined>
5,16,1,0.05,-0.006348,0.955078,0.038574,0.019043,-0.463867,-0.749023,-9.826661,19.653322,-5.493164,12.939454,20.81299,32.53174,<undefined>,Synchronization,<undefined>,<undefined>,<undefined>
6,16,1,0.06,0.001953,0.959473,0.035156,0.045898,-0.376953,-0.78125,-11.657716,15.869142,-7.751465,22.70508,29.968264,14.038087,<undefined>,Synchronization,<undefined>,<undefined>,<undefined>
7,16,1,0.07,0.013672,0.958984,0.030273,0.028809,-0.307617,-0.941406,-13.671876,5.859375,-8.361817,23.925783,19.470216,-5.920411,<undefined>,Synchronization,<undefined>,<undefined>,<undefined>
8,16,1,0.08,0.02832,0.941895,0.04541,-0.014648,-0.349121,-1.100098,-16.113282,-1.220703,-8.483887,12.084962,-8.300782,-20.202638,<undefined>,Synchronization,<undefined>,<undefined>,<undefined>
9,16,1,0.09,0.028809,0.934082,0.06543,-0.061035,-0.480469,-1.129395,-18.859864,-0.183105,-7.8125,-3.845215,-37.475588,-24.475099,<undefined>,Synchronization,<undefined>,<undefined>,<undefined>
