# vDataFrame

In [None]:
vDataFrame(input_relation: str,
           cursor = None,
           dsn: str = "",
           usecols: list = [],
           schema: str = "",
           empty: bool = False)

Python object which will keep in mind all the user modifications in order to use the correct SQL code generation. It will send SQL queries to the Vertica DB which will aggregate and return the final result. vDataFrame will create for each column of the relation a Virtual Column (vcolumn) which will store the column alias and all the user transformations. Thus, vDataFrame allows to do easy data preparation and exploration without modifying the data.
<br><br>
<center><img src='../../img/vertica-ml-python.png' width="80%"></center>

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">input_relation</div></td> <td><div class="type">str</div></td> <td><div class = "no">&#10060;</div></td> <td>Relation (View, Table or Temporary Table) used to create the object. To get a specific schema relation, your string must include both relation and schema: 'schema.relation' or '"schema"."relation"'. You can also use the 'schema' parameter to be less ambiguous. In this case input_relation must only be the relation name (it must not include a schema).</td> </tr>
    <tr> <td><div class="param_name">cursor</div></td> <td><div class="type">DBcursor</div></td> <td><div class = "yes">&#10003;</div></td> <td>Vertica DB cursor. <br>For a cursor designed by Vertica, search for vertica_python. <br>For ODBC, search for pyodbc.<br>For JDBC, search for jaydebeapi.<br>Check out utilities.vHelp function, it may help you.</td> </tr>
    <tr> <td><div class="param_name">dsn</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Data Base DSN. OS File including the DB credentials. Vertica ML Python will try to create a vertica_python cursor first. If it didn't find the library, it will try to create a pyodbc cursor. Check out utilities.vHelp function, it may help you.</td> </tr>
    <tr> <td><div class="param_name">usecols</div></td> <td><div class="type">list</div></td> <td><div class = "yes">&#10003;</div></td> <td>List of columns used to create the object. As Vertica is a columnar DB including less columns is easy and makes the process faster. Do not hesitate to not include useless columns.</td> </tr>
    <tr> <td><div class="param_name">schema</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Relation schema. It can be used to be less ambiguous and allow to create schema and relation name with dots '.' inside.</td> </tr>
    <tr> <td><div class="param_name">empty</div></td> <td><div class="type">bool</div></td> <td><div class = "yes">&#10003;</div></td> <td>If set to True, the created object will be empty. It can be used to create customized vDataFrame without going through the initialization check.</td> </tr>
</table>

### Attributes

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">_VERTICA_ML_PYTHON_VARIABLES_</div></td> <td><div class="type">dict</div></td> <td>Dictionary containing all the vDataFrame attributes.<br>
                                                    <ul>
                                                        <li><b>allcols_ind, int :</b> Int used to optimize the SQL code generation.</li>
                                                        <li><b>columns, list :</b> List of the vcolumns names.</li>
                                                        <li><b>count, int :</b> Number of elements of the vDataFrame (catalog).</li>
                                                        <li><b>cursor, DBcursor :</b> Vertica Database cursor.</li>
                                                        <li><b>dsn, str :</b> Vertica Database DSN.</li>
                                                        <li><b>exclude_columns, list :</b> columns to exclude from the final relation.</li>
                                                        <li><b>history, list :</b> vDataFrame history (user modifications).</li>
                                                        <li><b>input_relation, str :</b> Name of the vDataFrame.</li>
                                                        <li><b>main_relation, str :</b> Relation used to build the vDataFrame (first floor).</li>
                                                        <li><b>order_by, dict :</b> Dictionary of all the rules to sort the vDataFrame.</li>
                                                        <li><b>query_on, bool :</b> If set to True, all the query will be printed.</li>
                                                        <li><b>saving, list :</b> List used to reconstruct the vDataFrame.</li>
                                                        <li><b>schema, str :</b> Schema of the input relation.</li>
                                                        <li><b>schema_writing, str :</b> Schema used to create temporary tables when needed.</li>
                                                        <li><b>time_on, bool :</b> If set to True, all the query elapsed time will be printed.</li>
                                                        <li><b>where, list :</b> List of all the rules to filter the vDataFrame.</li></ul></td> </tr>
    <tr> <td><div class="param_name">vcolumns</div></td> <td><div class="type">vcolumn</div></td> <td>Each vcolumn of the vDataFrame is accessible by entering its name between brackets. For example to access to "myVC", you can write vDataFrame["myVC"].</td> </tr>
</table>

### Example

In [2]:
from vertica_ml_python import vDataFrame
# Creating vDataFrame using the schema and the relation name
# in the 'input_relation' parameter
vDataFrame(input_relation = '"public"."titanic"')

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,fare,sex,body,pclass,age,name,cabin,parch,survived,boat,ticket,embarked,home.dest,sibsp
0.0,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
1.0,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
2.0,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
3.0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0,0,,112050,S,"Belfast, NI",0
4.0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0,0,,PC 17609,C,"Montevideo, Uruguay",0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 14

In [3]:
# Creating vDataFrame using the schema and the relation name
vDataFrame(input_relation = 'titanic', schema = 'public')

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,survived,boat,ticket,embarked,home.dest,sibsp,fare,sex,body,pclass,age,name,cabin,parch
0.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2
1.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2
2.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2
3.0,0,,112050,S,"Belfast, NI",0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0
4.0,0,,PC 17609,C,"Montevideo, Uruguay",0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 14

In [4]:
# Creating vDataFrame using only the input vcolumns
vDataFrame(input_relation = 'titanic', schema = 'public', usecols = ["age", "survived"])

0,1,2
,age,survived
0.0,2.000,0
1.0,30.000,0
2.0,25.000,0
3.0,39.000,0
4.0,71.000,0
,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 2

In [6]:
# Creating a vDataFrame using a DSN
vDataFrame(input_relation = '"public"."titanic"', dsn = "VerticaDSN")

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,fare,sex,body,pclass,age,name,cabin,parch,survived,boat,ticket,embarked,home.dest,sibsp
0.0,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
1.0,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
2.0,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2,0,,113781,S,"Montreal, PQ / Chesterville, ON",1
3.0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0,0,,112050,S,"Belfast, NI",0
4.0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0,0,,PC 17609,C,"Montevideo, Uruguay",0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 14

In [7]:
# Creating a vDataFrame using a cursor
from vertica_ml_python.connections.connect import vertica_cursor
cursor = vertica_cursor("VerticaDSN")
vDataFrame(input_relation = '"public"."titanic"', cursor = cursor)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,survived,boat,ticket,embarked,home.dest,sibsp,fare,sex,body,pclass,age,name,cabin,parch
0.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,2.000,"Allison, Miss. Helen Loraine",C22 C26,2
1.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,male,135,1,30.000,"Allison, Mr. Hudson Joshua Creighton",C22 C26,2
2.0,0,,113781,S,"Montreal, PQ / Chesterville, ON",1,151.55000,female,,1,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",C22 C26,2
3.0,0,,112050,S,"Belfast, NI",0,0.00000,male,,1,39.000,"Andrews, Mr. Thomas Jr",A36,0
4.0,0,,PC 17609,C,"Montevideo, Uruguay",0,49.50420,male,22,1,71.000,"Artagaveytia, Mr. Ramon",,0
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic, Number of rows: 1234, Number of columns: 14