##### DataFrame Styling Code
This code adds color and bold to the dataframes

In [1]:
def style_df(dataframe, key_cols, left_cols, right_cols, calc_cols):
    
    styles = pd.DataFrame('', index=dataframe.index, columns=dataframe.columns)    
    
    for col in dataframe.columns:
        
        if col in key_cols:
            styles[col] = "background-color: lightgreen"
            
        elif col in left_cols:
            styles[col] = "background-color: lightyellow"
            
        elif col in right_cols:
            styles[col] = "background-color: lightblue"
    
    for index, row in dataframe.iterrows():
        if row.get('item 🔑') in ['R ruler', 'L lamp']:
            styles.loc[index, 'item 🔑'] += "; font-weight: bold"
            
    return styles

cols = {
    'key_cols': ['item 🔑', 'item_x', 'item_y'],
    'left_cols': ['n_sold'],
    'right_cols': ['price'],
    'calc_cols': []
}

<br/>  

<br/>  

In [2]:
%load_ext cudf.pandas
import pandas as pd

# <i>Number Sold</i>

In [3]:
sales = pd.DataFrame({
    'n_sold': [3,4,1, 0],
    'item 🔑': ['pen', 'pencil', 'eraser', 'L lamp'],
})
sales                                                                                                                                        .style.apply(func=style_df, axis=None, subset=None, **cols)

Unnamed: 0,n_sold,item 🔑
0,3,pen
1,4,pencil
2,1,eraser
3,0,L lamp


# <i>Item Prices</i>

In [5]:
prices = pd.DataFrame({
    'item 🔑': ['pen', 'pencil', 'eraser', 'R ruler'],
    'price': [3.0, 1.0, 0.5, 2.0]
})
prices                                                                                                                                        .style.apply(func=style_df, axis=None, subset=None, **cols).format(precision=2) 

Unnamed: 0,item 🔑,price
0,pen,3.0
1,pencil,1.0
2,eraser,0.5
3,R ruler,2.0


<br/>  

<br/>  

<br/>  

# left 
## left 🔑s only

In [6]:
sales.merge(
    right=prices, 
    on='item 🔑', 
    how='left')                                                                                                                                        .style.apply(func=style_df, axis=None, subset=None, **cols).format(precision=2) 

Unnamed: 0,n_sold,item 🔑,price
0,3,pen,3.0
1,4,pencil,1.0
2,1,eraser,0.5
3,0,L lamp,


<br/>  

<br/>  

# right
## right 🔑s only

In [7]:
sales.merge(
    right=prices, 
    on='item 🔑', 
    how='right')                                                                                                                                        .style.apply(func=style_df, axis=None, **cols).format(precision=2) 

Unnamed: 0,item 🔑,price,n_sold
0,pen,3.0,3.0
1,pencil,1.0,4.0
2,eraser,0.5,1.0
3,R ruler,2.0,


<br/>  

<br/>  

# outer
## all 🔑s included

In [8]:
sales.merge(
    right=prices, 
    on='item 🔑', 
    how='outer')                                                                                                                                      .style.apply(func=style_df, axis=None, **cols).format(precision=2) 

Unnamed: 0,n_sold,item 🔑,price
0,0.0,L lamp,
1,,R ruler,2.0
2,1.0,eraser,0.5
3,3.0,pen,3.0
4,4.0,pencil,1.0


<br/>  

<br/>  

# inner (default)
## intersecting 🔑s only

In [9]:
sales.merge(
    right=prices, 
    on='item 🔑')                                                                                                                                       .style.apply(func=style_df, axis=None, subset=None, **cols).format(precision=2) 

Unnamed: 0,n_sold,item 🔑,price
0,3,pen,3.0
1,4,pencil,1.0
2,1,eraser,0.5


<br/>  

<br/>  

---

# pandas merge NVIDIA RAPIDS acceleration

### random float data

In [10]:
import numpy as np
import pandas as pd
pd

<module 'pandas' (ModuleAccelerator(fast=cudf, slow=pandas))>

In [11]:
N, M = 10_000_000, 50
df = pd.DataFrame(np.random.rand(N, M))
df.sample(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
7309947,0.737505,0.784523,0.326411,0.492196,0.239522,0.668334,0.490824,0.865769,0.026066,0.033399,...,0.029629,0.000375,0.056149,0.24746,0.341366,0.600834,0.288556,0.942583,0.134555,0.015289
8288236,0.073213,0.274211,0.861512,0.942729,0.175512,0.229811,0.435995,0.83264,0.119527,0.464138,...,0.941912,0.920293,0.104182,0.066676,0.395821,0.68303,0.771834,0.432414,0.743455,0.201548
2986337,0.966779,0.171261,0.919467,0.633235,0.724229,0.209623,0.292998,0.736985,0.110211,0.680585,...,0.388662,0.330595,0.949537,0.009096,0.267847,0.227156,0.097145,0.152026,0.822549,0.196654


<br/>  

<br/>  

# <u>left</u>
## left 🔑s
# <span style="color: green;">x 91 speedup</span> 🔥
#### RAPIDS OFF - 16.8 s
#### RAPIDS ON  - 0.184 s

In [12]:
%%time

left_df = df.merge(df, on=list(range(5)), how='left')
del left_df

CPU times: user 38.1 ms, sys: 64.5 ms, total: 103 ms
Wall time: 120 ms


<br/>  

<br/>  

# <u>right</u>
## right 🔑s
# <span style="color: green;">x 202 speedup</span> 🔥🔥
#### RAPIDS OFF - 16.7 s
#### RAPIDS ON  - 0.0827 s

In [13]:
%%time

right_df = df.merge(df, on=list(range(5)), how='right')
del right_df

CPU times: user 38.9 ms, sys: 38.2 ms, total: 77.2 ms
Wall time: 92.2 ms


<br/>  

<br/>  

# <u>outer</u> 
## all 🔑s 
# <span style="color: green;">x 162 speedup</span> 🔥🔥
#### RAPIDS OFF - 36.6 s
#### RAPIDS ON  - 0.226

In [14]:
%%time

outer_df = df.merge(df, on=list(range(5)), how='outer')
del outer_df

CPU times: user 84.4 ms, sys: 120 ms, total: 205 ms
Wall time: 249 ms


<br/>  

<br/>  

# <u>inner</u> 
## intersecting 🔑s
# <span style="color: green;">x 194 speedup</span> 🔥🔥
#### RAPIDS OFF - 2:23
#### RAPIDS ON  - 0.139

In [15]:
%%time

inner_df = df.merge(df, on=list(range(5)), how='inner')
del inner_df

CPU times: user 33.8 ms, sys: 36.5 ms, total: 70.3 ms
Wall time: 87.2 ms


<br/>  

<br/>  