# classifier.predict

<b>Definition for Multiclass Classifier:</b>

In [None]:
classifier.predict(vdf,
                   name: str = "",
                   cutoff: float = -1,
                   pos_label = None)

<b>Definition for Binary Classifier:</b>

In [None]:
classifier.predict(vdf,
                   name: str = "",
                   cutoff: float = -1)

Predicts using the input relation.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">vdf</div></td> <td><div class="type">vDataFrame </div></td> <td><div class = "no">&#10060;</div></td> <td>Object used to insert the prediction as a vcolumn.</td> </tr>
    <tr> <td><div class="param_name">name</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Name of the added vcolumn. If empty, a name will be generated.</td> </tr>
    <tr> <td><div class="param_name">cutoff</div></td> <td><div class="type">float</div></td> <td><div class = "yes">&#10003;</div></td> <td>Cutoff for which the tested category will be accepted as prediction. If the parameter is not between 0 and 1, the class probability will be returned.</td> </tr>
    <tr> <td><div class="param_name">pos_label</div></td> <td><div class="type">int / float / str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Class label.</td> </tr>
</table>

### Returns

<b>vDataFrame</b> : the input object.


### Example

In [30]:
from vertica_ml_python import vDataFrame
iris = vDataFrame("public.iris")
print(iris)

0,1,2,3,4,5
,SepalLengthCm,Species,PetalWidthCm,PetalLengthCm,SepalWidthCm
0.0,4.30,Iris-setosa,0.10,1.10,3.00
1.0,4.40,Iris-setosa,0.20,1.40,2.90
2.0,4.40,Iris-setosa,0.20,1.30,3.00
3.0,4.40,Iris-setosa,0.20,1.30,3.20
4.0,4.50,Iris-setosa,0.30,1.30,2.30
,...,...,...,...,...


<object>  Name: iris, Number of rows: 150, Number of columns: 5


In [32]:
# Multiclass Classification
from vertica_ml_python.learn.ensemble import RandomForestClassifier
model = RandomForestClassifier(name = "public.RF_iris",
                               n_estimators = 20,
                               max_features = "auto",
                               max_leaf_nodes = 32, 
                               sample = 0.7,
                               max_depth = 3,
                               min_samples_leaf = 5,
                               min_info_gain = 0.0,
                               nbins = 32)
model.fit("public.iris", ["PetalLengthCm", "PetalWidthCm"], "Species")
# Global Prediction
model.predict(iris, name = "iris_prediction")

0,1,2,3,4,5,6
,SepalLengthCm,Species,PetalWidthCm,PetalLengthCm,SepalWidthCm,iris_prediction
0.0,4.30,Iris-setosa,0.10,1.10,3.00,Iris-setosa
1.0,4.40,Iris-setosa,0.20,1.40,2.90,Iris-setosa
2.0,4.40,Iris-setosa,0.20,1.30,3.00,Iris-setosa
3.0,4.40,Iris-setosa,0.20,1.30,3.20,Iris-setosa
4.0,4.50,Iris-setosa,0.30,1.30,2.30,Iris-setosa
,...,...,...,...,...,...


<object>  Name: iris, Number of rows: 150, Number of columns: 6

In [33]:
# Prediction of class Iris-setosa
model.predict(iris, 
              name = "iris_versicolor_prediction", 
              pos_label = "Iris-versicolor")

0,1,2,3,4,5,6,7
,SepalLengthCm,Species,PetalWidthCm,PetalLengthCm,SepalWidthCm,iris_prediction,iris_versicolor_prediction
0.0,4.30,Iris-setosa,0.10,1.10,3.00,Iris-setosa,0
1.0,4.40,Iris-setosa,0.20,1.40,2.90,Iris-setosa,0
2.0,4.40,Iris-setosa,0.20,1.30,3.00,Iris-setosa,0
3.0,4.40,Iris-setosa,0.20,1.30,3.20,Iris-setosa,0
4.0,4.50,Iris-setosa,0.30,1.30,2.30,Iris-setosa,0
,...,...,...,...,...,...,...


<object>  Name: iris, Number of rows: 150, Number of columns: 7

In [31]:
titanic = vDataFrame("public.titanic")
print(titanic)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,survived,boat,ticket,embarked,home.dest,sibsp,fare,sex,body,pclass,age,name,cabin,parch
0.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2
1.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2
2.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2
3.0,0,,112050,S,"Belfast, NI",0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0
4.0,0,,PC 17609,C,"Montevideo, Uruguay",0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 14


In [34]:
# Binary Classification
model = RandomForestClassifier(name = "public.RF_titanic",
                               n_estimators = 20,
                               max_features = "auto",
                               max_leaf_nodes = 32, 
                               sample = 0.7,
                               max_depth = 3,
                               min_samples_leaf = 5,
                               min_info_gain = 0.0,
                               nbins = 32)
model.fit("public.titanic", ["age", "fare", "sex"], "survived")
# Probability of Positive class 1
model.predict(titanic, name = "survived_prediction")

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
,survived,boat,ticket,embarked,home.dest,sibsp,fare,sex,body,pclass,age,name,cabin,parch,survived_prediction
0.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2,0.897084
1.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2,0.253554
2.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2,0.929709
3.0,0,,112050,S,"Belfast, NI",0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0,0.151686
4.0,0,,PC 17609,C,"Montevideo, Uruguay",0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0,0.267721
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 15