# vDataFrame[].fill_outliers

In [None]:
vDataFrame[].fill_outliers(method: str = "winsorize",
                           threshold: float = 4.0, 
                           use_threshold: bool = True,
                           alpha: float = 0.05)

Fills the vcolumns outliers using the input method.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">method</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Method used to fill the vcolumn outliers.<br>
                                                    <ul>
                                                        <li><b>mean :</b> Replaces the upper and lower outliers by their respective average. </li>
                                                        <li><b>null :</b> Replaces the outliers by the NULL value.</li>
                                                        <li><b>winsorize :</b> Clips the vcolumn using as lower bound quantile(alpha) and as upper bound quantile(1-alpha) if 'use_threshold' is set to False else the lower and upper ZScores.</li>
                                                        </ul></td> </tr>
    <tr> <td><div class="param_name">threshold</div></td> <td><div class="type">float</div></td> <td><div class = "yes">&#10003;</div></td> <td>Uses the Gaussian distribution to define the outliers. After normalizing the data (Z-Score), if the absolute value of the record is greater than the threshold it will be considered as an outlier.</td> </tr>
    <tr> <td><div class="param_name">use_threshold</div></td> <td><div class="type">bool</div></td> <td><div class = "yes">&#10003;</div></td> <td>Uses the threshold instead of the 'alpha' parameter.</td> </tr>
    <tr> <td><div class="param_name">alpha</div></td> <td><div class="type">float</div></td> <td><div class = "yes">&#10003;</div></td> <td>Number representing the outliers threshold. Values lesser than quantile(alpha) or greater than quantile(1-alpha) will be filled.</td> </tr>
</table>

### Returns

<b>vDataFrame</b> : self.parent

### Example

In [99]:
from vertica_ml_python.learn.datasets import load_market
market = load_market().filter("Price < 0.7")
print(market.head(20))

294 element(s) was/were filtered


0,1,2,3
,Form,Price,Name
0.0,Frozen,0.5104657455,Apples
1.0,Frozen,0.537867915537,Apples
2.0,Ready to drink,0.6311325278,Apples
3.0,Fresh,0.5494172928,Bananas
4.0,Fresh,0.566983414531,Bananas
5.0,Fresh green cabbage,0.579208394258,Cabbage
6.0,Fresh green cabbage,0.6238712291,Cabbage
7.0,Fresh,0.520793672,Cantaloupe
8.0,Fresh,0.535873776106,Cantaloupe


<object>  Name: market, Number of rows: 20, Number of columns: 3


In [94]:
# All the outliers (abs(ZSCORE) > 1.5) will be replaced by the NULL values
market["Price"].fill_outliers(method = "null",
                              threshold = 1.5,
                              use_threshold = True)

0,1,2,3
,Form,Price,Name
0.0,Frozen,,Apples
1.0,Frozen,0.537867915537,Apples
2.0,Ready to drink,0.6311325278,Apples
3.0,Fresh,0.5494172928,Bananas
4.0,Fresh,0.566983414531,Bananas
5.0,Fresh green cabbage,0.579208394258,Cabbage
6.0,Fresh green cabbage,0.6238712291,Cabbage
7.0,Fresh,0.520793672,Cantaloupe
8.0,Fresh,0.535873776106,Cantaloupe


<object>  Name: market, Number of rows: 20, Number of columns: 3

In [96]:
# All the outliers (abs(ZSCORE) > 1.5) will be replaced by the lower and 
# upper bound having a ZSCORE = 1.5 and -1.5
market["Price"].fill_outliers(method = "winsorize",
                              threshold = 1.5,
                              use_threshold = True)

0,1,2,3
,Price,Name,Form
0.0,0.5104657455,Apples,Frozen
1.0,0.537867915537,Apples,Frozen
2.0,0.6311325278,Apples,Ready to drink
3.0,0.5494172928,Bananas,Fresh
4.0,0.566983414531,Bananas,Fresh
5.0,0.579208394258,Cabbage,Fresh green cabbage
6.0,0.6238712291,Cabbage,Fresh green cabbage
7.0,0.520793672,Cantaloupe,Fresh
8.0,0.535873776106,Cantaloupe,Fresh


<object>  Name: market, Number of rows: 20, Number of columns: 3

In [100]:
# All the outliers (values > quantile(0.8) or < quantile(0.8)) will be 
# replaced by the nearest of the two quantiles
market["Price"].fill_outliers(method = "winsorize",
                              alpha = 0.2,
                              use_threshold = False)

0,1,2,3
,Form,Price,Name
0.0,Frozen,0.5328577552848,Apples
1.0,Frozen,0.537867915537,Apples
2.0,Ready to drink,0.6311325278,Apples
3.0,Fresh,0.5494172928,Bananas
4.0,Fresh,0.566983414531,Bananas
5.0,Fresh green cabbage,0.579208394258,Cabbage
6.0,Fresh green cabbage,0.6238712291,Cabbage
7.0,Fresh,0.5328577552848,Cantaloupe
8.0,Fresh,0.535873776106,Cantaloupe


<object>  Name: market, Number of rows: 20, Number of columns: 3

### See Also

<table id="seealso">
    <tr><td><a href="../drop_outliers">vDataFrame[].drop_outliers</a></td> <td>Drops the vcolumn outliers.</td></tr>
    <tr><td><a href="../../main-methods/outliers">vDataFrame.outliers</a></td> <td>Computes the vDataFrame Global Outliers.</td></tr>
</table>