# vDataFrame[].drop_outliers

In [None]:
vDataFrame[].drop_outliers(threshold: float = 4.0,
                           use_threshold: bool = True, 
                           alpha: float = 0.05)

Drops the vcolumn outliers.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">threshold</div></td> <td><div class="type">float</div></td> <td><div class = "yes">&#10003;</div></td> <td>Uses the Gaussian distribution to define the outliers. After normalizing the data (Z-Score), if the absolute value of the record is greater than the threshold it will be considered as an outlier.</td> </tr>
    <tr> <td><div class="param_name">use_threshold</div></td> <td><div class="type">bool</div></td> <td><div class = "yes">&#10003;</div></td> <td>Uses the threshold instead of the 'alpha' parameter.</td> </tr>
    <tr> <td><div class="param_name">alpha</div></td> <td><div class="type">float</div></td> <td><div class = "yes">&#10003;</div></td> <td>Number representing the outliers threshold. Values lesser than quantile(alpha) or greater than quantile(1-alpha) will be dropped.</td> </tr>
</table>

### Returns

<b>vDataFrame</b> : self.parent

### Example

In [9]:
from vertica_ml_python.learn.datasets import load_titanic
titanic = load_titanic()
print(titanic)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,fare,sex,body,pclass,age,name,cabin,parch,survived,boat,ticket,embarked,home.dest,sibsp
0.0,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
1.0,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
2.0,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
3.0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0,0,,112050,S,"Belfast, NI",0
4.0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0,0,,PC 17609,C,"Montevideo, Uruguay",0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 14


In [10]:
# Filtering the 'fare' having a Zscore greater than 3.0
titanic["fare"].drop_outliers(threshold = 3.0)

38 element(s) was/were filtered


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,fare,sex,body,pclass,age,name,cabin,parch,survived,boat,ticket,embarked,home.dest,sibsp
0.0,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
1.0,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
2.0,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
3.0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0,0,,112050,S,"Belfast, NI",0
4.0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0,0,,PC 17609,C,"Montevideo, Uruguay",0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1196, Number of columns: 14

In [11]:
# Filtering the 'age' greater than the last decile 
# or lesser than the first decile
titanic["age"].drop_outliers(use_threshold = False, alpha = 0.1)

413 element(s) was/were filtered


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,fare,sex,body,pclass,age,name,cabin,parch,survived,boat,ticket,embarked,home.dest,sibsp
0.0,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
1.0,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
2.0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0,0,,112050,S,"Belfast, NI",0
3.0,75.24170,male,,1,36.000,"Beattie, Mr. Thomson",C6,0,0,A,13050,C,"Winnipeg, MN",0
4.0,26.00000,male,148,1,25.000,"Birnbaum, Mr. Jakob",,0,0,,13905,C,"San Francisco, CA",0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 783, Number of columns: 14

### See Also

<table id="seealso">
    <tr><td><a href="../fill_outliers">vDataFrame[].fill_outliers</a></td> <td>Fills the vcolumns outliers.</td></tr>
    <tr><td><a href="../../main-methods/outliers">vDataFrame.outliers</a></td> <td>Adds a new vcolumn labeled with 0 and 1 (1 meaning global outlier).</td></tr>
</table>