# vDataFrame.join

In [None]:
vDataFrame.join(input_relation, 
                on: dict = {},
                how: str = 'natural',
                expr1: list = ['*'],
                expr2: list = ['*'])

Joins the vDataFrame with another one or an input relation.

### Parameters

<table id="parameters">
    <tr> <th>Name</th> <th>Type</th> <th>Optional</th> <th>Description</th> </tr>
    <tr> <td><div class="param_name">input_relation</div></td> <td><div class="type">str / vDataFrame</div></td> <td><div class = "no">&#10060;</div></td> <td>Relation used to do the merging.</td> </tr>
    <tr> <td><div class="param_name">on</div></td> <td><div class="type">dict</div></td> <td><div class = "yes">&#10003;</div></td> <td>Dictionary of all the different keys. The dict must be similar to the following: {"relationA_key1": "relationB_key1" ..., "relationA_keyk": "relationB_keyk"} where relationA is the current vDataFrame and relationB is the input relation or the input vDataFrame.</td> </tr>
    <tr> <td><div class="param_name">how</div></td> <td><div class="type">str</div></td> <td><div class = "yes">&#10003;</div></td> <td>Join Type.<br>
                                                    <ul>
                                                        <li><b>left :</b> Left Join.</li>
                                                        <li><b>right :</b> Right Join.</li>
                                                        <li><b>cross :</b> Cross Join.</li>
                                                        <li><b>full :</b> Full Outer Join.</li>
                                                        <li><b>natural :</b> Natural Join.</li>
                                                        <li><b>inner :</b> Inner Join. </li>
                                                        </ul></td> </tr>
    <tr> <td><div class="param_name">expr1</div></td> <td><div class="type">list</div></td> <td><div class = "yes">&#10003;</div></td> <td>List of the different columns to select from the current vDataFrame. Pure SQL must be written. Aliases can also be given. 'column' or 'column AS my_new_alias' are correct. Aliases are recommended to keep the track of the different features and not have ambiguous names. </td> </tr>
    <tr> <td><div class="param_name">expr2</div></td> <td><div class="type">list</div></td> <td><div class = "yes">&#10003;</div></td> <td>List of the different columns to select from the current vDataFrame. Pure SQL must be written. Aliases can also be given. 'column' or 'column AS my_new_alias' are correct. Aliases are recommended to keep the track of the different features and not have ambiguous names. </td> </tr>
</table>

### Returns

<b>vDataFrame</b> : object result of the join.

### Example

In [13]:
from vertica_ml_python import *
flights = vDataFrame("public.flights")
airports = vDataFrame("public.airports")
airlines = vDataFrame("public.airlines")
print(flights)
print(airports)
print(airlines)

0,1,2,3,4,5,6
,departure_delay,origin_airport,scheduled_departure,airline,destination_airport,arrival_delay
0.0,-9,11433,2015-10-01 10:09:00,EV,10135,-2
1.0,-3,10397,2015-10-01 10:27:00,EV,10135,-14
2.0,-4,13930,2015-10-01 13:57:00,EV,10135,6
3.0,-3,11433,2015-10-01 14:02:00,EV,10135,-8
4.0,0,10397,2015-10-01 14:44:00,EV,10135,-1
,...,...,...,...,...,...


<object>  Name: flights, Number of rows: 4068736, Number of columns: 6


0,1,2,3,4,5,6,7
,COUNTRY,IATA_CODE,AIRPORT,LATITUDE,CITY,STATE,LONGITUDE
0.0,USA,ABE,Lehigh Valley International Airport,40.652360,Allentown,PA,-75.440400
1.0,USA,ABI,Abilene Regional Airport,32.411320,Abilene,TX,-99.681900
2.0,USA,ABQ,Albuquerque International Sunport,35.040220,Albuquerque,NM,-106.609190
3.0,USA,ABR,Aberdeen Regional Airport,45.449060,Aberdeen,SD,-98.421830
4.0,USA,ABY,Southwest Georgia Regional Airport,31.535520,Albany,GA,-84.194470
,...,...,...,...,...,...,...


<object>  Name: airports, Number of rows: 322, Number of columns: 7


0,1,2
,AIRLINE,IATA_CODE
0.0,American Airlines Inc.,AA
1.0,Alaska Airlines Inc.,AS
2.0,JetBlue Airways,B6
3.0,Delta Air Lines Inc.,DL
4.0,Atlantic Southeast Airlines,EV
,...,...


<object>  Name: airlines, Number of rows: 14, Number of columns: 2


In [14]:
# Cross Join
airports.join(airports, 
              how = "cross", 
              expr1 = ["IATA_CODE AS airport1", 
                       "LATITUDE AS airport1_latitude", 
                       "LONGITUDE AS airport1_longitude"],
              expr2 = ["IATA_CODE AS airport2", 
                       "LATITUDE AS airport2_latitude", 
                       "LONGITUDE AS airport2_longitude"])

0,1,2,3,4,5,6
,airport1,airport1_latitude,airport1_longitude,airport2,airport2_latitude,airport2_longitude
0.0,ABE,40.652360,-75.440400,ABE,40.652360,-75.440400
1.0,ABE,40.652360,-75.440400,ABI,32.411320,-99.681900
2.0,ABE,40.652360,-75.440400,ABQ,35.040220,-106.609190
3.0,ABE,40.652360,-75.440400,ABR,45.449060,-98.421830
4.0,ABE,40.652360,-75.440400,ABY,31.535520,-84.194470
,...,...,...,...,...,...


<object>  Name: join, Number of rows: 103684, Number of columns: 6

In [16]:
# Left Join
flights.join(airlines,
             how = "left",
             on = {"airline": "IATA_CODE"},
             expr1 = ["*"],
             expr2 = ["AIRLINE AS airline_long"])

0,1,2,3,4,5,6,7
,destination_airport,scheduled_departure,departure_delay,arrival_delay,origin_airport,airline,airline_long
0.0,ABE,2015-08-16 20:12:00,14,5,DTW,EV,Atlantic Southeast Airlines
1.0,ABE,2015-08-17 10:07:00,29,27,DTW,EV,Atlantic Southeast Airlines
2.0,ABE,2015-08-17 10:25:00,19,10,ATL,EV,Atlantic Southeast Airlines
3.0,ABE,2015-08-17 14:00:00,4,61,ORD,EV,Atlantic Southeast Airlines
4.0,ABE,2015-08-17 14:12:00,-5,-17,DTW,EV,Atlantic Southeast Airlines
,...,...,...,...,...,...,...


<object>  Name: join, Number of rows: 4068736, Number of columns: 7

### See Also

<table id="seealso">
    <tr><td><a href="../append">vDataFrame.append</a></td> <td>Merges the vDataFrame with another relation.</td></tr>
    <tr><td><a href="../groupby">vDataFrame.groupby</a></td> <td>Aggregates the vDataFrame.</td></tr>
    <tr><td><a href="../sort">vDataFrame.sort</a></td> <td>Sorts the vDataFrame.</td></tr>
</table>