# Balance

In [None]:
Balance(name: str, 
        input_relation: str,
        y: str,
        cursor = None,
        method: str = "hybrid", 
        ratio: float = 0.5)

Creates a view with an equal distribution of the input data based on the response_column.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">name</div></td> <td><div class="type">str</div></td> <td><div class = "no">&#10060;</div></td> <td>Name of the the view.</td> </tr>
    <tr> <td><div class="param_name">input_relation</div></td> <td><div class="type">str</div></td> <td><div class = "no">&#10060;</div></td> <td>Relation used to create the new relation.</td> </tr>
    <tr> <td><div class="param_name">y</div></td> <td><div class="type">str</div></td> <td><div class = "no">&#10060;</div></td> <td>Response column.</td> </tr>
    <tr> <td><div class="param_name">cursor</div></td> <td><div class="type">DBcursor</div></td> <td><div class = "yes">&#10003;</div></td> <td>Vertica DB cursor.</td> </tr>
    <tr> <td><div class="param_name">method</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Method used to do the balancing. <br><ul><li><b>hybrid</b> : Performs over-sampling and under-sampling on different classes so each class is equally represented.</li><li><b>over</b> : Over-samples on all classes, with the exception of the most majority class, towards the most majority class's cardinality. </li> <li><b>under</b> : Under-samples on all classes, with the exception of the most minority class, towards the most minority class's cardinality.</li></ul></td> </tr>
    <tr> <td><div class="param_name">ratio</div></td> <td><div class="type">float</div></td> <td><div class = "yes">&#10003;</div></td> <td>The desired ratio between the majority class and the minority class. This value has no effect when used with balance method 'hybrid'.</td> </tr>
</table>

### Returns

<b>vDataFrame</b> : vDataFrame of the created view

### Example

In [32]:
from vertica_ml_python.learn.datasets import load_titanic
titanic = load_titanic()
print(titanic)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,survived,boat,ticket,embarked,home.dest,sibsp,fare,sex,body,pclass,age,name,cabin,parch
0.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2
1.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2
2.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2
3.0,0,,112050,S,"Belfast, NI",0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0
4.0,0,,PC 17609,C,"Montevideo, Uruguay",0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 14


In [35]:
from vertica_ml_python.learn.preprocessing import Balance
Balance(name = "public.titanic_balanced", 
        input_relation = "public.titanic",
        y = "survived",
        method = "under", 
        ratio = 1)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,survived,boat,ticket,embarked,home.dest,sibsp,fare,sex,body,pclass,age,name,cabin,parch
0.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2
1.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2
2.0,0,,PC 17757,C,"New York, NY",1,227.52500,male,124,1,47.000,"Astor, Col. John Jacob",C62 C64,0
3.0,0,,PC 17318,S,"New York, NY",0,25.92500,male,,1,,"Baumann, Mr. John D",,0
4.0,0,,PC 17558,C,"Montreal, PQ",0,247.52080,male,,1,24.000,"Baxter, Mr. Quigg Edmond",B58 B60,1
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic_balanced, Number of rows: 914, Number of columns: 14