# vDataFrame[].fillna

In [None]:
vDataFrame[].fillna(val = None,
                    method: str = "auto",
                    by: list = [],
                    order_by: list = [],
                    print_info: bool = True)

Fills the vcolumn missing elements using specific rules.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">val</div></td> <td><div class="type">int / float / str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Value used to impute the vcolumn.</td> </tr>
    <tr> <td><div class="param_name">method</div></td> <td><div class="type">dict</div></td> <td><div class = "yes">&#10003;</div></td> <td>Method used to impute the missing values.<br>
                                                    <ul>
                                                        <li><b>auto :</b> Mean for the numerical and Mode for the categorical vcolumns.</li>
                                                        <li><b>bfill :</b> Back Propagation of the next element (Constant Interpolation).</li>
                                                        <li><b>ffill :</b> Propagation of the first element (Constant Interpolation).</li>
                                                        <li><b>mean :</b> Average.</li>
                                                        <li><b>median :</b> Median.</li>
                                                        <li><b>mode :</b> Mode (most occurent element).</li>
                                                        <li><b>0ifnull :</b> 0 when the vcolumn is null, 1 otherwise.</li></ul></td> </tr>
    <tr> <td><div class="param_name">by</div></td> <td><div class="type">list</div></td> <td><div class = "yes">&#10003;</div></td> <td>vcolumns used in the partition.</td> </tr>
    <tr> <td><div class="param_name">order_by</div></td> <td><div class="type">list</div></td> <td><div class = "yes">&#10003;</div></td> <td>List of the vcolumns used to sort the data when using TS methods.</td> </tr>
    <tr> <td><div class="param_name">print_info</div></td> <td><div class="type">bool</div></td> <td><div class = "yes">&#10003;</div></td> <td>If set to True, displays all the filling information.</td> </tr>
</table>

### Returns

<b>vDataFrame</b> : self.parent

### Example

In [48]:
from vertica_ml_python.learn.datasets import load_titanic
titanic = load_titanic()
print(titanic)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,survived,boat,ticket,embarked,home.dest,sibsp,fare,sex,body,pclass,age,name,cabin,parch
0.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2
1.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2
2.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2
3.0,0,,112050,S,"Belfast, NI",0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0
4.0,0,,PC 17609,C,"Montevideo, Uruguay",0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 14


In [46]:
print(titanic["age"])
# Imputation using the avg over partition by pclass and sex
titanic["age"].fillna(method = "avg", by = ["pclass", "sex"])

0,1
,age
0.0,2.000
1.0,30.000
2.0,25.000
3.0,39.000
4.0,71.000
,...


<object>  Name: age, Number of rows: 1234, dtype: numeric(6,3)
237 element(s) was/were filled


0,1
,age
0.0,36.0
1.0,31.0
2.0,21.0
3.0,50.0
4.0,45.0
,...


<object>  Name: age, Number of rows: 1234, dtype: numeric(6,3)

In [49]:
print(titanic["age"])
# Imputation using the median over partition by pclass and sex
titanic["age"].fillna(method = "median", by = ["pclass", "sex"])

0,1
,age
0.0,2.000
1.0,30.000
2.0,25.000
3.0,39.000
4.0,71.000
,...


<object>  Name: age, Number of rows: 1234, dtype: numeric(6,3)
237 element(s) was/were filled


0,1
,age
0.0,36.0
1.0,36.0
2.0,36.0
3.0,36.0
4.0,36.0
,...


<object>  Name: age, Number of rows: 1234, dtype: float

In [50]:
print(titanic["embarked"])
# Imputation using the mode
titanic["embarked"].fillna(method = "mode")["embarked"]

0,1
,embarked
0.0,S
1.0,S
2.0,S
3.0,S
4.0,C
,...


<object>  Name: embarked, Number of rows: 1234, dtype: varchar(20)
2 element(s) was/were filled


0,1
,embarked
0.0,S
1.0,S
2.0,S
3.0,S
4.0,C
,...


<object>  Name: embarked, Number of rows: 1234, dtype: varchar(20)

In [51]:
print(titanic["boat"])
# As the missing values are the most occurent categories, an idea
# could be to merge all the non-null category (category 1) and to
# impute the missing values par 0. '0ifnull' method is following this
# process
titanic["boat"].fillna(method = "0ifnull")

0,1
,boat
0.0,
1.0,
2.0,
3.0,
4.0,
,...


<object>  Name: boat, Number of rows: 1234, dtype: varchar(100)
795 element(s) was/were filled


0,1
,boat
0.0,0
1.0,0
2.0,0
3.0,0
4.0,0
,...


<object>  Name: boat, Number of rows: 1234, dtype: bool

### See Also

<table id="seealso">
    <tr><td><a href="../dropna">vDataFrame[].dropna</a></td> <td>Drops the vcolumn missing values.</td></tr>
</table>