## Combining Pandas Objects

In [4]:
import pandas as pd
import numpy as np


## Introduction

## Appending new rows to DataFrames

### How to do it...

In [5]:
names = pd.read_csv('datasets/names.csv')
names

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2


In [6]:
new_data_list = ['Aria', 1]
names.loc[4] = new_data_list
names

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1


In [7]:
names.loc['five'] = ['Zach', 3]
names

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1
five,Zach,3


In [8]:
names.loc[len(names)] = {'Name':'Zayd', 'Age':2}
names

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1
five,Zach,3
6,Zayd,2


In [9]:
names.loc[len(names)] = pd.Series({'Age':32, 'Name':'Dean'})
names

Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1
five,Zach,3
6,Zayd,2
7,Dean,32


In [10]:
names = pd.read_csv('datasets/names.csv')
names.append({'Name':'Aria', 'Age':1}, ignore_index=True)

  names.append({'Name':'Aria', 'Age':1}, ignore_index=True)


Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1


In [11]:
names.append({'Name':'Aria', 'Age':1}, ignore_index=True)

  names.append({'Name':'Aria', 'Age':1}, ignore_index=True)


Unnamed: 0,Name,Age
0,Cornelia,70
1,Abbas,69
2,Penelope,4
3,Niko,2
4,Aria,1


In [12]:
names.index = ['Canada', 'Canada', 'USA', 'USA']
names

Unnamed: 0,Name,Age
Canada,Cornelia,70
Canada,Abbas,69
USA,Penelope,4
USA,Niko,2


In [13]:
s = pd.Series({'Name': 'Zach', 'Age': 3}, name=len(names))
s

Name    Zach
Age        3
Name: 4, dtype: object

In [14]:
names.append(s)

  names.append(s)


Unnamed: 0,Name,Age
Canada,Cornelia,70
Canada,Abbas,69
USA,Penelope,4
USA,Niko,2
4,Zach,3


In [15]:
s1 = pd.Series({'Name': 'Zach', 'Age': 3}, name=len(names))
s2 = pd.Series({'Name': 'Zayd', 'Age': 2}, name='USA')
names.append([s1, s2])

  names.append([s1, s2])


Unnamed: 0,Name,Age
Canada,Cornelia,70
Canada,Abbas,69
USA,Penelope,4
USA,Niko,2
4,Zach,3
USA,Zayd,2


In [16]:
bball_16 = pd.read_csv('datasets/baseball16.csv')
bball_16

Unnamed: 0,playerID,yearID,stint,teamID,lgID,G,AB,R,H,2B,...,RBI,SB,CS,BB,SO,IBB,HBP,SH,SF,GIDP
0,altuvjo01,2016,1,HOU,AL,161,640,108,216,42,...,96.0,30.0,10.0,60,70.0,11.0,7.0,3.0,7.0,15.0
1,bregmal01,2016,1,HOU,AL,49,201,31,53,13,...,34.0,2.0,0.0,15,52.0,0.0,0.0,0.0,1.0,1.0
2,castrja01,2016,1,HOU,AL,113,329,41,69,16,...,32.0,2.0,1.0,45,123.0,0.0,1.0,1.0,0.0,9.0
3,correca01,2016,1,HOU,AL,153,577,76,158,36,...,96.0,13.0,3.0,75,139.0,5.0,5.0,0.0,3.0,12.0
4,gattiev01,2016,1,HOU,AL,128,447,58,112,19,...,72.0,2.0,1.0,43,127.0,6.0,4.0,0.0,5.0,12.0
5,gomezca01,2016,1,HOU,AL,85,295,27,62,16,...,29.0,13.0,2.0,21,100.0,2.0,4.0,3.0,0.0,11.0
6,gonzama01,2016,1,HOU,AL,141,484,55,123,26,...,51.0,12.0,6.0,22,118.0,1.0,5.0,6.0,1.0,16.0
7,gourryu01,2016,1,HOU,AL,36,130,13,34,7,...,15.0,1.0,1.0,5,12.0,0.0,1.0,0.0,1.0,7.0
8,kempto01,2016,1,HOU,AL,59,120,15,26,4,...,7.0,2.0,1.0,14,27.0,0.0,0.0,1.0,1.0,5.0
9,marisja01,2016,1,HOU,AL,118,287,40,60,18,...,21.0,10.0,5.0,16,83.0,0.0,3.0,4.0,1.0,4.0


In [17]:
data_dict = bball_16.iloc[0].to_dict()
data_dict

{'playerID': 'altuvjo01',
 'yearID': 2016,
 'stint': 1,
 'teamID': 'HOU',
 'lgID': 'AL',
 'G': 161,
 'AB': 640,
 'R': 108,
 'H': 216,
 '2B': 42,
 '3B': 5,
 'HR': 24,
 'RBI': 96.0,
 'SB': 30.0,
 'CS': 10.0,
 'BB': 60,
 'SO': 70.0,
 'IBB': 11.0,
 'HBP': 7.0,
 'SH': 3.0,
 'SF': 7.0,
 'GIDP': 15.0}

In [18]:
new_data_dict = {k: '' if isinstance(v, str) else
    np.nan for k, v in data_dict.items()}
new_data_dict

{'playerID': '',
 'yearID': nan,
 'stint': nan,
 'teamID': '',
 'lgID': '',
 'G': nan,
 'AB': nan,
 'R': nan,
 'H': nan,
 '2B': nan,
 '3B': nan,
 'HR': nan,
 'RBI': nan,
 'SB': nan,
 'CS': nan,
 'BB': nan,
 'SO': nan,
 'IBB': nan,
 'HBP': nan,
 'SH': nan,
 'SF': nan,
 'GIDP': nan}

### There's more...

In [19]:
random_data = []
for i in range(1000):   # doctest: +SKIP
    d = dict()
    for k, v in data_dict.items():
        if isinstance(v, str):
            d[k] = np.random.choice(list('abcde'))
        else:
            d[k] = np.random.randint(10)
    random_data.append(pd.Series(d, name=i + len(bball_16)))
random_data[0]

playerID    a
yearID      0
stint       7
teamID      b
lgID        e
G           3
AB          9
R           7
H           8
2B          9
3B          9
HR          7
RBI         8
SB          5
CS          6
BB          4
SO          6
IBB         1
HBP         9
SH          9
SF          1
GIDP        1
Name: 16, dtype: object

## Concatenating multiple DataFrames together

### How to do it...

In [20]:
stocks_2016 = pd.read_csv('datasets/stocks_2016.csv',
    index_col='Symbol')
stocks_2017 = pd.read_csv('datasets/stocks_2017.csv',
    index_col='Symbol')

In [21]:
stocks_2016

Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,80,95,110
TSLA,50,80,130
WMT,40,55,70


In [22]:
stocks_2017

Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,50,120,140
GE,100,30,40
IBM,87,75,95
SLB,20,55,85
TXN,500,15,23
TSLA,100,100,300


In [23]:
s_list = [stocks_2016, stocks_2017]
pd.concat(s_list)

Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,80,95,110
TSLA,50,80,130
WMT,40,55,70
AAPL,50,120,140
GE,100,30,40
IBM,87,75,95
SLB,20,55,85
TXN,500,15,23
TSLA,100,100,300


In [24]:
pd.concat(s_list, keys=['2016', '2017'],
   names=['Year', 'Symbol'])  

Unnamed: 0_level_0,Unnamed: 1_level_0,Shares,Low,High
Year,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016,AAPL,80,95,110
2016,TSLA,50,80,130
2016,WMT,40,55,70
2017,AAPL,50,120,140
2017,GE,100,30,40
2017,IBM,87,75,95
2017,SLB,20,55,85
2017,TXN,500,15,23
2017,TSLA,100,100,300


In [25]:
pd.concat(s_list, keys=['2016', '2017'],
    axis='columns', names=['Year', None])    

Year,2016,2016,2016,2017,2017,2017
Unnamed: 0_level_1,Shares,Low,High,Shares,Low,High
Symbol,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AAPL,80.0,95.0,110.0,50.0,120.0,140.0
TSLA,50.0,80.0,130.0,100.0,100.0,300.0
WMT,40.0,55.0,70.0,,,
GE,,,,100.0,30.0,40.0
IBM,,,,87.0,75.0,95.0
SLB,,,,20.0,55.0,85.0
TXN,,,,500.0,15.0,23.0


In [32]:
pd.concat(s_list, join='inner', keys=['2016', '2017'],
    axis='columns', names=['Year', None])

Year,2016,2016,2016,2017,2017,2017
Unnamed: 0_level_1,Shares,Low,High,Shares,Low,High
Symbol,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AAPL,80,95,110,50,120,140
TSLA,50,80,130,100,100,300


### There's more...

In [27]:
stocks_2016.append(stocks_2017)

  stocks_2016.append(stocks_2017)


Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,80,95,110
TSLA,50,80,130
WMT,40,55,70
AAPL,50,120,140
GE,100,30,40
IBM,87,75,95
SLB,20,55,85
TXN,500,15,23
TSLA,100,100,300


## Understanding the differences between concat, join, and merge

### How to do it...

In [36]:
from IPython.display import display_html
years = (2016, 2017, 2018)
stock_tables = [pd.read_csv(f'datasets/stocks_{year}.csv', index_col='Symbol') for year in years]

stocks_2016, stocks_2017, stocks_2018 = stock_tables
# stocks_2016
stock_tables

[        Shares  Low  High
 Symbol                   
 AAPL        80   95   110
 TSLA        50   80   130
 WMT         40   55    70,
         Shares  Low  High
 Symbol                   
 AAPL        50  120   140
 GE         100   30    40
 IBM         87   75    95
 SLB         20   55    85
 TXN        500   15    23
 TSLA       100  100   300,
         Shares  Low  High
 Symbol                   
 AAPL        40  135   170
 AMZN         8  900  1125
 TSLA        50  220   400]

In [35]:
stocks_2016

Unnamed: 0_level_0,Shares,Low,High
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,80,95,110
TSLA,50,80,130
WMT,40,55,70


In [39]:
pd.concat(stock_tables, keys=[2016, 2017, 2018])

Unnamed: 0_level_0,Unnamed: 1_level_0,Shares,Low,High
Unnamed: 0_level_1,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016,AAPL,80,95,110
2016,TSLA,50,80,130
2016,WMT,40,55,70
2017,AAPL,50,120,140
2017,GE,100,30,40
2017,IBM,87,75,95
2017,SLB,20,55,85
2017,TXN,500,15,23
2017,TSLA,100,100,300
2018,AAPL,40,135,170


In [29]:
pd.concat(dict(zip(years, stock_tables)), axis='columns')

Unnamed: 0_level_0,2016,2016,2016,2017,2017,2017,2018,2018,2018
Unnamed: 0_level_1,Shares,Low,High,Shares,Low,High,Shares,Low,High
Symbol,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
AAPL,80.0,95.0,110.0,50.0,120.0,140.0,40.0,135.0,170.0
TSLA,50.0,80.0,130.0,100.0,100.0,300.0,50.0,220.0,400.0
WMT,40.0,55.0,70.0,,,,,,
GE,,,,100.0,30.0,40.0,,,
IBM,,,,87.0,75.0,95.0,,,
SLB,,,,20.0,55.0,85.0,,,
TXN,,,,500.0,15.0,23.0,,,
AMZN,,,,,,,8.0,900.0,1125.0


In [42]:
stocks_2016.join(stocks_2017, lsuffix='_2016',
    rsuffix='_2017', how='outer')

Unnamed: 0_level_0,Shares_2016,Low_2016,High_2016,Shares_2017,Low_2017,High_2017
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,80.0,95.0,110.0,50.0,120.0,140.0
GE,,,,100.0,30.0,40.0
IBM,,,,87.0,75.0,95.0
SLB,,,,20.0,55.0,85.0
TSLA,50.0,80.0,130.0,100.0,100.0,300.0
TXN,,,,500.0,15.0,23.0
WMT,40.0,55.0,70.0,,,


In [45]:
other = [stocks_2017.add_suffix('_2017'),
    stocks_2018.add_suffix('_2018')]
stocks_2016.add_suffix('_2016').join(other, how='outer')

Unnamed: 0_level_0,Shares_2016,Low_2016,High_2016,Shares_2017,Low_2017,High_2017,Shares_2018,Low_2018,High_2018
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAPL,80.0,95.0,110.0,50.0,120.0,140.0,40.0,135.0,170.0
TSLA,50.0,80.0,130.0,100.0,100.0,300.0,50.0,220.0,400.0
WMT,40.0,55.0,70.0,,,,,,
GE,,,,100.0,30.0,40.0,,,
IBM,,,,87.0,75.0,95.0,,,
SLB,,,,20.0,55.0,85.0,,,
TXN,,,,500.0,15.0,23.0,,,
AMZN,,,,,,,8.0,900.0,1125.0


In [49]:
stock_join = stocks_2016.add_suffix('_2016').join(other,
    how='outer')
stock_concat = pd.concat(dict(zip(years,stock_tables)),
    axis='columns')
level_1 = stock_concat.columns.get_level_values(1)
level_0 = stock_concat.columns.get_level_values(0).astype(str)
stock_concat.columns = level_1 + '_' + level_0
stock_join.equals(stock_concat)

True

In [50]:
stocks_2016.merge(stocks_2017, left_index=True,
    right_index=True)

Unnamed: 0_level_0,Shares_x,Low_x,High_x,Shares_y,Low_y,High_y
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,80,95,110,50,120,140
TSLA,50,80,130,100,100,300


In [34]:
step1 = stocks_2016.merge(stocks_2017, left_index=True,
    right_index=True, how='outer',
    suffixes=('_2016', '_2017'))
stock_merge = step1.merge(stocks_2018.add_suffix('_2018'),
    left_index=True, right_index=True,
    how='outer')
stock_concat.equals(stock_merge)

False

In [51]:
names = ['prices', 'transactions']
food_tables = [pd.read_csv(f'datasets/food_{name}.csv') for name in names]
food_prices, food_transactions = food_tables
food_prices

Unnamed: 0,item,store,price,Date
0,pear,A,0.99,2017
1,pear,B,1.99,2017
2,peach,A,2.99,2017
3,peach,B,3.49,2017
4,banana,A,0.39,2017
5,banana,B,0.49,2017
6,steak,A,5.99,2017
7,steak,B,6.99,2017
8,steak,B,4.99,2015


In [36]:
food_transactions

Unnamed: 0,custid,item,store,quantity
0,1,pear,A,5
1,1,banana,A,10
2,2,steak,B,3
3,2,pear,B,1
4,2,peach,B,2
5,2,steak,B,1
6,2,coconut,B,4


In [37]:
food_transactions.merge(food_prices, on=['item', 'store'])    

Unnamed: 0,custid,item,store,quantity,price,Date
0,1,pear,A,5,0.99,2017
1,1,banana,A,10,0.39,2017
2,2,steak,B,3,6.99,2017
3,2,steak,B,3,4.99,2015
4,2,steak,B,1,6.99,2017
5,2,steak,B,1,4.99,2015
6,2,pear,B,1,1.99,2017
7,2,peach,B,2,3.49,2017


In [38]:
food_transactions.merge(food_prices.query('Date == 2017'),
    how='left')

Unnamed: 0,custid,item,store,quantity,price,Date
0,1,pear,A,5,0.99,2017.0
1,1,banana,A,10,0.39,2017.0
2,2,steak,B,3,6.99,2017.0
3,2,pear,B,1,1.99,2017.0
4,2,peach,B,2,3.49,2017.0
5,2,steak,B,1,6.99,2017.0
6,2,coconut,B,4,,


In [52]:
food_prices_join = food_prices.query('Date == 2017').set_index(['item', 'store'])
food_prices_join    

Unnamed: 0_level_0,Unnamed: 1_level_0,price,Date
item,store,Unnamed: 2_level_1,Unnamed: 3_level_1
pear,A,0.99,2017
pear,B,1.99,2017
peach,A,2.99,2017
peach,B,3.49,2017
banana,A,0.39,2017
banana,B,0.49,2017
steak,A,5.99,2017
steak,B,6.99,2017


In [53]:
food_transactions.join(food_prices_join, on=['item', 'store'])

Unnamed: 0,custid,item,store,quantity,price,Date
0,1,pear,A,5,0.99,2017.0
1,1,banana,A,10,0.39,2017.0
2,2,steak,B,3,6.99,2017.0
3,2,pear,B,1,1.99,2017.0
4,2,peach,B,2,3.49,2017.0
5,2,steak,B,1,6.99,2017.0
6,2,coconut,B,4,,


In [55]:
pd.concat([food_transactions.set_index(['item', 'store']),
           food_prices.set_index(['item', 'store'])],
          ) #  axis='columns'

Unnamed: 0_level_0,Unnamed: 1_level_0,custid,quantity,price,Date
item,store,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
pear,A,1.0,5.0,,
banana,A,1.0,10.0,,
steak,B,2.0,3.0,,
pear,B,2.0,1.0,,
peach,B,2.0,2.0,,
steak,B,2.0,1.0,,
coconut,B,2.0,4.0,,
pear,A,,,0.99,2017.0
pear,B,,,1.99,2017.0
peach,A,,,2.99,2017.0


### How it works...

### There's more...

In [None]:
# import glob
# df_list = []
# for filename in glob.glob('datasets/gas prices/*.csv'):
#     df_list.append(pd.read_csv(filename, index_col='Week',
#     parse_dates=['Week']))
# gas = pd.concat(df_list, axis='columns')
# gas

## Connecting to SQL databases

### How to do it...

In [60]:
from sqlalchemy import create_engine
engine = create_engine('sqlite:///datasets/chinook.db')

In [61]:
tracks = pd.read_sql_table('tracks', engine)
tracks

Unnamed: 0,TrackId,Name,AlbumId,MediaTypeId,GenreId,Composer,Milliseconds,Bytes,UnitPrice
0,1,For Those About To Rock (We Salute You),1,1,1,"Angus Young, Malcolm Young, Brian Johnson",343719,11170334,0.99
1,2,Balls to the Wall,2,2,1,,342562,5510424,0.99
2,3,Fast As a Shark,3,2,1,"F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...",230619,3990994,0.99
3,4,Restless and Wild,3,2,1,"F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...",252051,4331779,0.99
4,5,Princess of the Dawn,3,2,1,Deaffy & R.A. Smith-Diesel,375418,6290521,0.99
...,...,...,...,...,...,...,...,...,...
3498,3499,Pini Di Roma (Pinien Von Rom) \ I Pini Della V...,343,2,24,,286741,4718950,0.99
3499,3500,"String Quartet No. 12 in C Minor, D. 703 ""Quar...",344,2,24,Franz Schubert,139200,2283131,0.99
3500,3501,"L'orfeo, Act 3, Sinfonia (Orchestra)",345,2,24,Claudio Monteverdi,66639,1189062,0.99
3501,3502,"Quintet for Horn, Violin, 2 Violas, and Cello ...",346,2,24,Wolfgang Amadeus Mozart,221331,3665114,0.99


In [62]:
(pd.read_sql_table('genres', engine)
     .merge(tracks[['GenreId', 'Milliseconds']],
            on='GenreId', how='left') 
     .drop('GenreId', axis='columns')
)

Unnamed: 0,Name,Milliseconds
0,Rock,343719
1,Rock,342562
2,Rock,230619
3,Rock,252051
4,Rock,375418
...,...,...
3498,Classical,286741
3499,Classical,139200
3500,Classical,66639
3501,Classical,221331


In [63]:
(pd.read_sql_table('genres', engine)
     .merge(tracks[['GenreId', 'Milliseconds']],
            on='GenreId', how='left') 
     .drop('GenreId', axis='columns')
     .groupby('Name')
     ['Milliseconds']
     .mean()
     .pipe(lambda s_: pd.to_timedelta(s_, unit='ms'))
     .dt.floor('s')
     .sort_values()
)

Name
Rock And Roll        0 days 00:02:14
Opera                0 days 00:02:54
Hip Hop/Rap          0 days 00:02:58
Easy Listening       0 days 00:03:09
Bossa Nova           0 days 00:03:39
R&B/Soul             0 days 00:03:40
World                0 days 00:03:44
Pop                  0 days 00:03:49
Latin                0 days 00:03:52
Alternative & Punk   0 days 00:03:54
Soundtrack           0 days 00:04:04
Reggae               0 days 00:04:07
Alternative          0 days 00:04:24
Blues                0 days 00:04:30
Rock                 0 days 00:04:43
Jazz                 0 days 00:04:51
Classical            0 days 00:04:53
Heavy Metal          0 days 00:04:57
Electronica/Dance    0 days 00:05:02
Metal                0 days 00:05:09
Comedy               0 days 00:26:25
TV Shows             0 days 00:35:45
Drama                0 days 00:42:55
Science Fiction      0 days 00:43:45
Sci Fi & Fantasy     0 days 00:48:31
Name: Milliseconds, dtype: timedelta64[ns]

In [64]:
cust = pd.read_sql_table('customers', engine,
    columns=['CustomerId','FirstName',
    'LastName'])
invoice = pd.read_sql_table('invoices', engine,
    columns=['InvoiceId','CustomerId'])
ii = pd.read_sql_table('invoice_items', engine,
    columns=['InvoiceId', 'UnitPrice', 'Quantity'])
(cust
    .merge(invoice, on='CustomerId') 
    .merge(ii, on='InvoiceId')
)

Unnamed: 0,CustomerId,FirstName,LastName,InvoiceId,UnitPrice,Quantity
0,1,Luís,Gonçalves,98,1.99,1
1,1,Luís,Gonçalves,98,1.99,1
2,1,Luís,Gonçalves,121,0.99,1
3,1,Luís,Gonçalves,121,0.99,1
4,1,Luís,Gonçalves,121,0.99,1
...,...,...,...,...,...,...
2235,59,Puja,Srivastava,284,0.99,1
2236,59,Puja,Srivastava,284,0.99,1
2237,59,Puja,Srivastava,284,0.99,1
2238,59,Puja,Srivastava,284,0.99,1


In [65]:
(cust
    .merge(invoice, on='CustomerId') 
    .merge(ii, on='InvoiceId')
    .assign(Total=lambda df_:df_.Quantity * df_.UnitPrice)
    .groupby(['CustomerId', 'FirstName', 'LastName'])
    ['Total']
    .sum()
    .sort_values(ascending=False) 
)

CustomerId  FirstName  LastName    
6           Helena     Holý            49.62
26          Richard    Cunningham      47.62
57          Luis       Rojas           46.62
45          Ladislav   Kovács          45.62
46          Hugh       O'Reilly        45.62
37          Fynn       Zimmermann      43.62
24          Frank      Ralston         43.62
28          Julia      Barnett         43.62
25          Victor     Stevens         42.62
7           Astrid     Gruber          42.62
44          Terhi      Hämäläinen      41.62
43          Isabelle   Mercier         40.62
5           František  Wichterlová     40.62
48          Johannes   Van der Berg    40.62
20          Dan        Miller          39.62
42          Wyatt      Girard          39.62
34          João       Fernandes       39.62
22          Heather    Leacock         39.62
1           Luís       Gonçalves       39.62
17          Jack       Smith           39.62
3           François   Tremblay        39.62
4           Bjørn  

### How it works...

### There's more...

In [66]:
sql_string1 = '''
SELECT
    Name,
    time(avg(Milliseconds) / 1000, 'unixepoch') as avg_time
FROM (
      SELECT
          g.Name,
          t.Milliseconds
      FROM
          genres as g
      JOIN
          tracks as t on
          g.genreid == t.genreid
     )
GROUP BY Name
ORDER BY avg_time'''
pd.read_sql_query(sql_string1, engine)

Unnamed: 0,Name,avg_time
0,Rock And Roll,00:02:14
1,Opera,00:02:54
2,Hip Hop/Rap,00:02:58
3,Easy Listening,00:03:09
4,Bossa Nova,00:03:39
5,R&B/Soul,00:03:40
6,World,00:03:44
7,Pop,00:03:49
8,Latin,00:03:52
9,Alternative & Punk,00:03:54


In [67]:
sql_string2 = '''
   SELECT
         c.customerid,
         c.FirstName,
         c.LastName,
         sum(ii.quantity * ii.unitprice) as Total
   FROM
        customers as c
   JOIN
        invoices as i
        on c.customerid = i.customerid
   JOIN
       invoice_items as ii
       on i.invoiceid = ii.invoiceid
   GROUP BY
       c.customerid, c.FirstName, c.LastName
   ORDER BY
       Total desc'''

In [68]:
pd.read_sql_query(sql_string2, engine)

Unnamed: 0,CustomerId,FirstName,LastName,Total
0,6,Helena,Holý,49.62
1,26,Richard,Cunningham,47.62
2,57,Luis,Rojas,46.62
3,45,Ladislav,Kovács,45.62
4,46,Hugh,O'Reilly,45.62
5,37,Fynn,Zimmermann,43.62
6,24,Frank,Ralston,43.62
7,28,Julia,Barnett,43.62
8,25,Victor,Stevens,42.62
9,7,Astrid,Gruber,42.62
