# read_csv

In [None]:
read_csv(path: str,
         cursor = None, 
         schema: str = 'public', 
         table_name: str = '', 
         sep: str = ',', 
         header: bool = True,
         header_names: list = [],
         na_rep: str = '', 
         quotechar: str = '"', 
         escape: str = '\\', 
         genSQL: bool = False,
         parse_n_lines: int = -1,
         insert: bool = False)

Ingests a CSV file using flex tables.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">path</div></td> <td><div class="type">str</div></td> <td><div class = "no">&#10060;</div></td> <td>Absolute path where the CSV file is located.</td> </tr>
    <tr> <td><div class="param_name">cursor</div></td> <td><div class="type">DBcursor</div></td> <td><div class = "yes">&#10003;</div></td> <td>Vertica DB cursor.</td> </tr>
    <tr> <td><div class="param_name">schema</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Schema where the CSV file will be ingested.</td> </tr>
    <tr> <td><div class="param_name">table_name</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Final relation name.</td> </tr>
    <tr> <td><div class="param_name">sep</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Column separator.</td> </tr>
    <tr> <td><div class="param_name">header</div></td> <td><div class="type">bool</div></td> <td><div class = "yes">&#10003;</div></td> <td>If set to False, the parameter 'header_names' will be used to name the different columns.</td> </tr>
    <tr> <td><div class="param_name">header_names</div></td> <td><div class="type">list</div></td> <td><div class = "yes">&#10003;</div></td> <td>List of the columns names.</td> </tr>
    <tr> <td><div class="param_name">na_rep</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Missing values representation.</td> </tr>
    <tr> <td><div class="param_name">quotechar</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Char which is enclosing the str values.</td> </tr>
    <tr> <td><div class="param_name">escape</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Separator between each record.</td> </tr>
    <tr> <td><div class="param_name">genSQL</div></td> <td><div class="type">bool</div></td> <td><div class = "yes">&#10003;</div></td> <td>If set to True, the SQL code used to create the final table will be generated but not executed. It is a good way to change the final relation types or to customize the data ingestion.</td> </tr>
    <tr> <td><div class="param_name">parse_n_lines</div></td> <td><div class="type">int</div></td> <td><div class = "yes">&#10003;</div></td> <td>If this parameter is greater than 0. A new file of 'parse_n_lines' lines will be created and ingested first to identify the data types. It will be then dropped and the entire file will be ingested. The data types identification will be less precise but this parameter can make the process faster if the file is heavy.</td> </tr>
<tr> <td><div class="param_name">insert</div></td> <td><div class="type">bool</div></td> <td><div class = "yes">&#10003;</div></td> <td>If set to True, the data will be ingested to the input relation. Be sure that your file has a header corresponding to the name of the relation columns otherwise the ingestion will not work.</td> </tr>
</table>

### Returns

<b>vDataFrame</b> : The vDataFrame of the relation.

### Example

In [1]:
from vertica_ml_python.utilities import *
# Gen the SQL needed to create the Table
read_csv("titanic.csv", 
         table_name = "titanic_dataset",
         schema = "public",
         quotechar = '"',
         sep = ",",
         na_rep = "",
         genSQL = True)

CREATE TABLE "public"."titanic_dataset"("pclass" Integer, "survived" Integer, "name" Varchar(164), "sex" Varchar(20), "age" Numeric(6,3), "sibsp" Integer, "parch" Integer, "ticket" Varchar(36), "fare" Numeric(10,5), "cabin" Varchar(30), "embarked" Varchar(20), "boat" Varchar(100), "body" Integer, "home.dest" Varchar(100));
COPY "public"."titanic_dataset"("pclass", "survived", "name", "sex", "age", "sibsp", "parch", "ticket", "fare", "cabin", "embarked", "boat", "body", "home.dest") FROM {} DELIMITER ',' NULL '' ENCLOSED BY '"' ESCAPE AS '\' SKIP 1;


In [2]:
# Gen the SQL needed to create the Table & Parses only 100 lines 
read_csv("titanic.csv", 
         table_name = "titanic_dataset",
         schema = "public",
         quotechar = '"',
         sep = ",",
         na_rep = "",
         parse_n_lines = 100,
         genSQL = True)

CREATE TABLE "public"."titanic_dataset"("pclass" Integer, "survived" Integer, "name" Varchar(130), "sex" Varchar(20), "age" Numeric(5,3), "sibsp" Integer, "parch" Integer, "ticket" Varchar(22), "fare" Numeric(10,5), "cabin" Varchar(22), "embarked" Varchar(20), "boat" Varchar(100), "body" Integer, "home.dest" Varchar(92));
COPY "public"."titanic_dataset"("pclass", "survived", "name", "sex", "age", "sibsp", "parch", "ticket", "fare", "cabin", "embarked", "boat", "body", "home.dest") FROM {} DELIMITER ',' NULL '' ENCLOSED BY '"' ESCAPE AS '\' SKIP 1;


In [3]:
# Ingests the CSV file
read_csv("titanic.csv", 
         table_name = "titanic_dataset",
         schema = "public",
         quotechar = '"',
         sep = ",",
         na_rep = "",
         parse_n_lines = 100)

The table "public"."titanic_dataset" has been successfully created.


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,survived,cabin,boat,home.dest,embarked,age,name,sibsp,fare,sex,body,pclass,ticket,parch
0.0,1,B5,2,"St Louis, MO",S,29.000,"Allen, Miss. Elisabeth Walton",0,211.33750,female,,1,24160,0
1.0,1,C22 C26,11,"Montreal, PQ / Chesterville, ON",S,0.920,"Allison, Master. Hudson Trevor",1,151.55000,male,,1,113781,2
2.0,0,C22 C26,,"Montreal, PQ / Chesterville, ON",S,2.000,"Allison, Miss. Helen Loraine",1,151.55000,female,,1,113781,2
3.0,0,C22 C26,,"Montreal, PQ / Chesterville, ON",S,30.000,"Allison, Mr. Hudson Joshua Creighton",1,151.55000,male,135,1,113781,2
4.0,0,C22 C26,,"Montreal, PQ / Chesterville, ON",S,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",1,151.55000,female,,1,113781,2
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic_dataset, Number of rows: 1234, Number of columns: 14

In [4]:
# Inserts a new file in an existing table
read_csv("titanic.csv", 
         table_name = "titanic_dataset",
         schema = "public",
         quotechar = '"',
         sep = ",",
         na_rep = "",
         parse_n_lines = 100,
         insert = True)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
,fare,sex,body,pclass,ticket,parch,survived,cabin,boat,home.dest,embarked,age,name,sibsp
0.0,211.33750,female,,1,24160,0,1,B5,2,"St Louis, MO",S,29.000,"Allen, Miss. Elisabeth Walton",0
1.0,151.55000,male,,1,113781,2,1,C22 C26,11,"Montreal, PQ / Chesterville, ON",S,0.920,"Allison, Master. Hudson Trevor",1
2.0,151.55000,female,,1,113781,2,0,C22 C26,,"Montreal, PQ / Chesterville, ON",S,2.000,"Allison, Miss. Helen Loraine",1
3.0,151.55000,male,135,1,113781,2,0,C22 C26,,"Montreal, PQ / Chesterville, ON",S,30.000,"Allison, Mr. Hudson Joshua Creighton",1
4.0,151.55000,female,,1,113781,2,0,C22 C26,,"Montreal, PQ / Chesterville, ON",S,25.000,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",1
,...,...,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: titanic_dataset, Number of rows: 2468, Number of columns: 14

### See Also

<table id="seealso">
    <tr><td><a href="../read_json">read_json</a></td> <td>Ingests a JSON file in the Vertica DB.</td></tr>
</table>