# Train, Validate and Test

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
import cv2
import dlib

In [37]:
def train_validate_test_split(df, train_set=0.65, validate_set=0.25, seed=None):
    np.random.seed(seed)
    pmt = np.random.permutation(df.index)
    k = len(df.index)
    training = int(train_set * k)
    validating = int(validate_set * k) + training
    train = df.iloc[pmt[:training]]
    validate = df.iloc[pmt[training:validating]]
    test = df.iloc[pmt[validating:]]
    return train, validate, test

In [38]:
np.random.seed([3,30000])
df = pd.DataFrame(np.random.rand(30, 10), columns=list('ABCDEFGHIJ'))
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,0.356062,0.972819,0.127206,0.059672,0.304422,0.098176,0.534043,0.259867,0.519322,0.672175
1,0.123008,0.411324,0.232764,0.987009,0.551233,0.278758,0.539014,0.038833,0.428988,0.62117
2,0.325049,0.204925,0.48153,0.226049,0.991371,0.704538,0.323845,0.032657,0.778413,0.390295
3,0.649613,0.354221,0.78642,0.138962,0.543617,0.263715,0.019357,0.187854,0.274841,0.01911
4,0.033685,0.770225,0.11104,0.602867,0.9742,0.607366,0.79979,0.482907,0.066861,0.821987
5,0.392121,0.242875,0.050188,0.290004,0.991458,0.271394,0.538572,0.601179,0.709812,0.540433
6,0.632539,0.498768,0.92466,0.335474,0.452512,0.631555,0.004602,0.619657,0.388716,0.343246
7,0.212397,0.667286,0.749431,0.930457,0.977694,0.894602,0.034282,0.478286,0.429454,0.134967
8,0.351397,0.180367,0.738633,0.0624,0.967385,0.512193,0.780293,0.270865,0.068433,0.548205
9,0.231236,0.336135,0.265208,0.649615,0.362679,0.469544,0.42528,0.862093,0.300196,0.016413


# Training set

In [34]:
train, validate, test = train_validate_test_split(df)

train

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
26,0.946306,0.627498,0.986997,0.491455,0.169515,0.393517,0.225337,0.909298,0.514153,0.306531
22,0.760082,0.245039,0.007045,0.641896,0.135339,0.360389,0.362649,0.727421,0.355682,0.669057
17,0.562799,0.434189,0.910732,0.896135,0.694787,0.213491,0.182328,0.50828,0.604616,0.485409
23,0.796716,0.284385,0.460068,0.674684,0.646419,0.235929,0.695303,0.914632,0.019687,0.582629
21,0.151029,0.686263,0.459787,0.813916,0.847332,0.081672,0.671289,0.435203,0.774258,0.719451
18,0.564887,0.650185,0.050166,0.816718,0.995958,0.488206,0.62783,0.498282,0.50508,0.484644
20,0.030578,0.349024,0.524954,0.316168,0.988404,0.760005,0.291313,0.875176,0.911333,0.446213
24,0.743633,0.152152,0.888558,0.338995,0.19105,0.095361,0.691593,0.995834,0.093918,0.975088
2,0.325049,0.204925,0.48153,0.226049,0.991371,0.704538,0.323845,0.032657,0.778413,0.390295
13,0.757713,0.771774,0.974347,0.712754,0.511195,0.183454,0.290975,0.41531,0.232968,0.98721


# Validating Set

In [35]:
validate

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
28,0.759422,0.789671,0.422417,0.992893,0.413756,0.152996,0.874635,0.27187,0.510598,0.449328
19,0.284056,0.114819,0.453894,0.282198,0.322593,0.78732,0.902087,0.453151,0.544505,0.212847
5,0.392121,0.242875,0.050188,0.290004,0.991458,0.271394,0.538572,0.601179,0.709812,0.540433
3,0.649613,0.354221,0.78642,0.138962,0.543617,0.263715,0.019357,0.187854,0.274841,0.01911
12,0.322427,0.084969,0.212872,0.146675,0.250443,0.227235,0.885838,0.22598,0.65046,0.527838
8,0.351397,0.180367,0.738633,0.0624,0.967385,0.512193,0.780293,0.270865,0.068433,0.548205
16,0.123577,0.775841,0.806348,0.568887,0.193866,0.272568,0.493805,0.33524,0.222863,0.729882


# Testing set

In [36]:
test

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
6,0.632539,0.498768,0.92466,0.335474,0.452512,0.631555,0.004602,0.619657,0.388716,0.343246
25,0.285137,0.843791,0.362935,0.989206,0.044064,0.582896,0.034247,0.955109,0.504912,0.142034
4,0.033685,0.770225,0.11104,0.602867,0.9742,0.607366,0.79979,0.482907,0.066861,0.821987
29,0.819412,0.720154,0.881496,0.692804,0.26216,0.550339,0.603688,0.158471,0.766021,0.875905
