# vDataFrame.get_dummies

In [None]:
vDataFrame.get_dummies(columns: list = [],
                       max_cardinality: int = 12, 
                       prefix_sep: str = "_", 
                       drop_first: bool = True, 
                       use_numbers_as_suffix: bool = False)

Encodes the vcolumns using the One-Hot Encoding algorithm.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">columns</div></td> <td><div class="type">list</div></td> <td><div class = "yes">&#10003;</div></td> <td>List of the vcolumns used to train the One Hot Encoding model. If empty, only the vcolumns having a cardinality lesser than 'max_cardinality' will be used.</td> </tr>
    <tr> <td><div class="param_name">max_cardinality</div></td> <td><div class="type">int</div></td> <td><div class = "yes">&#10003;</div></td> <td>Cardinality threshold used to determine if the vcolumn will be taken into account during the encoding. This parameter is used only if the parameter 'columns' is empty.</td> </tr>
    <tr> <td><div class="param_name">prefix_sep</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Prefix delimitor of the dummies.</td> </tr>
    <tr> <td><div class="param_name">drop_first</div></td> <td><div class="type">bool</div></td> <td><div class = "yes">&#10003;</div></td> <td>Drops the first dummy to avoid the creation of correlated features.</td> </tr>
    <tr> <td><div class="param_name">use_numbers_as_suffix</div></td> <td><div class="type">bool</div></td> <td><div class = "yes">&#10003;</div></td> <td>Uses numbers as suffix instead of the vcolumns categories.</td> </tr>
</table>

### Returns

<b>vDataFrame</b> : self

### Example

In [34]:
from vertica_ml_python import vDataFrame
churn = vDataFrame("public.churn")
churn = churn.select(["InternetService", "PaymentMethod", "gender", "Contract", "churn"])
print(churn)

0,1,2,3,4,5
,InternetService,PaymentMethod,gender,Contract,Churn
0.0,DSL,Mailed check,Female,One year,False
1.0,DSL,Mailed check,Male,Month-to-month,False
2.0,Fiber optic,Electronic check,Male,Month-to-month,True
3.0,Fiber optic,Electronic check,Male,Month-to-month,True
4.0,Fiber optic,Mailed check,Female,Month-to-month,True
,...,...,...,...,...


<object>  Name: churn, Number of rows: 7043, Number of columns: 5


In [33]:
churn.get_dummies()

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,InternetService,PaymentMethod,gender,Contract,Churn,InternetService_DSL,InternetService_Fiber optic,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,gender_Female,Contract_Month-to-month,Contract_One year,Churn_False
0.0,DSL,Mailed check,Female,One year,False,1,0,0,0,0,1,0,1,1
1.0,DSL,Mailed check,Male,Month-to-month,False,1,0,0,0,0,0,1,0,1
2.0,Fiber optic,Electronic check,Male,Month-to-month,True,0,1,0,0,1,0,1,0,0
3.0,Fiber optic,Electronic check,Male,Month-to-month,True,0,1,0,0,1,0,1,0,0
4.0,Fiber optic,Mailed check,Female,Month-to-month,True,0,1,0,0,0,1,1,0,0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: churn, Number of rows: 7043, Number of columns: 14

In [35]:
# Number as suffix
churn.get_dummies(use_numbers_as_suffix = True)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,InternetService,PaymentMethod,gender,Contract,Churn,InternetService_0,InternetService_1,PaymentMethod_0,PaymentMethod_1,PaymentMethod_2,gender_0,Contract_0,Contract_1,Churn_0
0.0,DSL,Mailed check,Female,One year,False,1,0,0,0,0,1,0,1,1
1.0,DSL,Mailed check,Male,Month-to-month,False,1,0,0,0,0,0,1,0,1
2.0,Fiber optic,Electronic check,Male,Month-to-month,True,0,1,0,0,1,0,1,0,0
3.0,Fiber optic,Electronic check,Male,Month-to-month,True,0,1,0,0,1,0,1,0,0
4.0,Fiber optic,Mailed check,Female,Month-to-month,True,0,1,0,0,0,1,1,0,0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: churn, Number of rows: 7043, Number of columns: 14

### See Also

<table id="seealso">
    <tr><td><a href="../../vcolumn-methods/decode">vDataFrame[].decode</a></td> <td>Encodes the vcolumn using a user defined Encoding.</td></tr>
    <tr><td><a href="../../vcolumn-methods/discretize">vDataFrame[].discretize</a></td> <td>Discretizes the vcolumn.</td></tr>
    <tr><td><a href="../../vcolumn-methods/get_dummies">vDataFrame[].get_dummies</a></td> <td>Computes the vcolumns result of One Hot Encoding.</td></tr>
    <tr><td><a href="../../vcolumn-methods/label_encode">vDataFrame[].label_encode</a></td> <td>Encodes the vcolumn using the Label Encoding.</td></tr>
    <tr><td><a href="../../vcolumn-methods/mean_encode">vDataFrame[].mean_encode</a></td> <td>Encodes the vcolumn using the Mean Encoding of a response.</td></tr>
</table>