# vDataFrame.regexp

In [None]:
vDataFrame.regexp(column: str,
                  pattern: str,
                  method: str = "substr", 
                  position: int = 1,
                  occurrence: int = 1,
                  replacement: str = '',
                  return_position : int = 0,
                  name: str = "")

Computes a new vcolumn based on regular expressions.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">column</div></td> <td><div class="type">str</div></td> <td><div class = "no">&#10060;</div></td> <td>Input vcolumn used to compute the regular expression.</td> </tr>
    <tr> <td><div class="param_name">pattern</div></td> <td><div class="type">str</div></td> <td><div class = "no">&#10060;</div></td> <td>The regular expression.</td> </tr>
    <tr> <td><div class="param_name">method</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Method used to compute the regular expressions.<br>
                                                    <ul>
                                                        <li><b>count :</b> Returns the number times a regular expression matches each element of the input vcolumn. </li>
                                                        <li><b>ilike :</b> Returns True if the vcolumn element contains a match for the regular expression.</li>
                                                        <li><b>instr :</b> Returns the starting or ending position in a vcolumn element where a regular expression matches.</li>
                                                        <li><b>like :</b> Returns True if the vcolumn element matches the regular expression.</li>
                                                        <li><b>not_ilike :</b> Returns True if the vcolumn element does not match the case-insensitive regular expression.</li>
                                                        <li><b>not_like :</b> Returns True if the vcolumn element does not contain a match for the regular expression.</li>
                                                        <li><b>replace :</b> Replaces all occurrences of a substring that match a regular expression with another substring.</li>
                                                        <li><b>substr :</b> Returns the substring that matches a regular expression within a vcolumn.</li></ul></td> </tr>
    <tr> <td><div class="param_name">position</div></td> <td><div class="type">int</div></td> <td><div class = "yes">&#10003;</div></td> <td>The number of characters from the start of the string where the function should start searching for matches.</td> </tr>
    <tr> <td><div class="param_name">occurrence</div></td> <td><div class="type">int</div></td> <td><div class = "yes">&#10003;</div></td> <td>Controls which occurrence of a pattern match in the string to return.</td> </tr>
    <tr> <td><div class="param_name">replacement</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>The string to replace matched substrings.</td> </tr>
    <tr> <td><div class="param_name">return_position</div></td> <td><div class="type">int</div></td> <td><div class = "yes">&#10003;</div></td> <td>Sets the position within the string to return.</td> </tr>
    <tr> <td><div class="param_name">name</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>New feature name. If empty, a name will be generated.</td> </tr>
</table>

### Returns

<b>vDataFrame</b> : self

### Example

In [8]:
from vertica_ml_python import *
filmtv_movies = vDataFrame("filmtv_movies")
print(filmtv_movies.head(1))

0,1,2,3,4,5,6,7,8,9,10,11,12
,actors,avg_vote,description,genre,director,votes,year,notes,title,filmtv_id,duration,country
0.0,"Mickey Rourke, Steve Guttenberg, Ellen Barkin, Daniel Stern, Kevin Bacon, Timothy Daly, Paul Reiser, Kelle Kipp, Colette Blonigan",7.20,"Five boys from Baltimore are in the habit of meeting periodically for dinner and playing tricks together. One of them, Boogie, is in trouble because of his passion for betting: he owes a gangster two thousand dollars and to find them he keeps betting and losing. The others have marital or sex-related problems, and everyone has a few jokes.",Comedy,Barry Levinson,15,1982.00,A cast of will be famous for Levinson's directorial debut. Very bitter and very well written: jokes are seen as a manifestation of immaturity rather than carefree; Rourke enhances his charge of beautiful darkness. Guttenberg is the good guy in the group.,Diner,18,95,United States
,...,...,...,...,...,...,...,...,...,...,...,...


<object>  Name: filmtv_movies, Number of rows: 53397, Number of columns: 12


In [9]:
# Retrieving the second actor
filmtv_movies.regexp(column = "actors", 
                     pattern = "[^,]+", 
                     method = "substr",
                     occurrence = 2,
                     name = "actor2").select(["actors", 
                                              "actor2"])

0,1,2
,actors,actor2
0.0,"Mickey Rourke, Steve Guttenberg, Ellen Barkin, Daniel Stern, Kevin Bacon, Timothy Daly, Paul Reiser, Kelle Kipp, Colette Blonigan",Steve Guttenberg
1.0,"Gian Maria Volonté, Irene Papas, Gabriele Ferzetti, Salvo Randone, Laura Nucci, Mario Scaccia, Luigi Pistilli, Leopoldo Trieste",Irene Papas
2.0,"Alan Steel, Mary Arden, Sergio Ciani, Ivano Davoli, Giovanna Galletti, Aïché Nana, Charlie Charun, Gilberto Mazzi",Mary Arden
3.0,"George Hilton, Ennio Girolami, Marta Padovan, Alfonso De La Vega, Venancio Muro, Alfonso Rojas, Luis Marin",Ennio Girolami
4.0,"Patsy Kensit, Stéphane Freiss, Mouss Diouf, Anne-Marie Pisani, Joseph Momo, Jean-Marc Truong, An Luu",Stéphane Freiss
,...,...


<object>  Name: filmtv_movies, Number of rows: 53397, Number of columns: 2

In [11]:
# Computing the Number of actors
filmtv_movies.regexp(column = "actors", 
                     pattern = ",", 
                     method = "count",
                     name = "nb_actors")
filmtv_movies["nb_actors"].add(1)
filmtv_movies.select(["actors", "nb_actors"])

0,1,2
,actors,nb_actors
0.0,"Mickey Rourke, Steve Guttenberg, Ellen Barkin, Daniel Stern, Kevin Bacon, Timothy Daly, Paul Reiser, Kelle Kipp, Colette Blonigan",9
1.0,"Gian Maria Volonté, Irene Papas, Gabriele Ferzetti, Salvo Randone, Laura Nucci, Mario Scaccia, Luigi Pistilli, Leopoldo Trieste",8
2.0,"Alan Steel, Mary Arden, Sergio Ciani, Ivano Davoli, Giovanna Galletti, Aïché Nana, Charlie Charun, Gilberto Mazzi",8
3.0,"George Hilton, Ennio Girolami, Marta Padovan, Alfonso De La Vega, Venancio Muro, Alfonso Rojas, Luis Marin",7
4.0,"Patsy Kensit, Stéphane Freiss, Mouss Diouf, Anne-Marie Pisani, Joseph Momo, Jean-Marc Truong, An Luu",7
,...,...


<object>  Name: filmtv_movies, Number of rows: 53397, Number of columns: 2

### See Also

<table id="seealso">
    <tr><td><a href="../eval">vDataFrame.eval</a></td> <td>Evaluates a customized expression.</td></tr>
</table>