# train_test_split

In [None]:
train_test_split(input_relation: str, 
                 cursor = None, 
                 test_size: float = 0.33, 
                 schema_writing: str = "")

Creates a temporary table and 2 views which can be used to evaluate a model. The table will include all the main relation information with a test column (boolean) which represents if the data belong to the test or train set.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">input_relation</div></td> <td><div class="type">str</div></td> <td><div class = "no">&#10060;</div></td> <td>Input Relation.</td> </tr>
    <tr> <td><div class="param_name">cursor</div></td> <td><div class="type">DBcursor</div></td> <td><div class = "yes">&#10003;</div></td> <td>Vertica DB cursor.</td> </tr>
    <tr> <td><div class="param_name">test_size</div></td> <td><div class="type">float</div></td> <td><div class = "yes">&#10003;</div></td> <td>Proportion of the test set comparint to the training set.</td> </tr>
    <tr> <td><div class="param_name">schema_writing</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Schema used to write the main relation.</td> </tr>
</table>

### Returns

<b>tuple</b> : (name of the train view, name of the test view)

### Example

In [10]:
from vertica_ml_python.learn.model_selection import train_test_split
train_test_split("public.iris")

('"public".VERTICA_ML_PYTHON_SPLIT_iris_67_TRAIN',
 '"public".VERTICA_ML_PYTHON_SPLIT_iris_33_TEST')

In [11]:
from vertica_ml_python import vDataFrame
vDataFrame('"public".VERTICA_ML_PYTHON_SPLIT_iris_67_TRAIN')

0,1,2,3,4,5,6
,PetalLengthCm,SepalWidthCm,test,SepalLengthCm,Species,PetalWidthCm
0.0,1.10,3.00,False,4.30,Iris-setosa,0.10
1.0,1.40,2.90,False,4.40,Iris-setosa,0.20
2.0,1.30,3.00,False,4.40,Iris-setosa,0.20
3.0,1.30,3.20,False,4.40,Iris-setosa,0.20
4.0,1.30,2.30,False,4.50,Iris-setosa,0.30
,...,...,...,...,...,...


<object>  Name: VERTICA_ML_PYTHON_SPLIT_iris_67_TRAIN, Number of rows: 110, Number of columns: 6

In [12]:
from vertica_ml_python import vDataFrame
vDataFrame('"public".VERTICA_ML_PYTHON_SPLIT_iris_33_TEST')

0,1,2,3,4,5,6
,test,SepalLengthCm,Species,PetalWidthCm,PetalLengthCm,SepalWidthCm
0.0,True,4.60,Iris-setosa,0.30,1.40,3.40
1.0,True,4.70,Iris-setosa,0.20,1.60,3.20
2.0,True,4.80,Iris-setosa,0.20,1.60,3.10
3.0,True,4.90,Iris-versicolor,1.00,3.30,2.40
4.0,True,4.90,Iris-virginica,1.70,4.50,2.50
,...,...,...,...,...,...


<object>  Name: VERTICA_ML_PYTHON_SPLIT_iris_33_TEST, Number of rows: 40, Number of columns: 6