In [9]:
import pandas as pd
import numpy as np

In [10]:
sales = pd.read_csv('sales_082020.csv', parse_dates=['date'])

sales.head()

Unnamed: 0,buyer_id,buyer_country,seller_id,seller_country,product_code,category,brand,currency,price,date
0,qzpjsi9t0o,GB,6oufpaot,GB,ydecnydr6obf,Dresses,,GBP,19.11,2020-08-03
1,o79pns1qwo,GB,50xki2yg,GB,38chj507o6h2,Outerwear,Adidas Originals,GBP,15.92,2020-08-09
2,sjgbjdkhqx,GB,50xki2yg,GB,i9ynyxj5tdp8,Tops - Mens,,GBP,6.37,2020-08-07
3,o79pns1qwo,GB,s0p322hh,GB,t50xe7moye8v,Tops - Mens,Berghaus,GBP,25.27,2020-08-09
4,o79pns1qwo,GB,dpkx192v,GB,hp4r9bjq68af,Bottoms - Womens,,GBP,8.9,2020-08-09


In [11]:
sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237671 entries, 0 to 237670
Data columns (total 10 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   buyer_id        237671 non-null  object        
 1   buyer_country   237671 non-null  object        
 2   seller_id       237671 non-null  object        
 3   seller_country  237671 non-null  object        
 4   product_code    237671 non-null  object        
 5   category        237655 non-null  object        
 6   brand           119330 non-null  object        
 7   currency        237671 non-null  object        
 8   price           237671 non-null  float64       
 9   date            237671 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), object(8)
memory usage: 18.1+ MB


In [12]:
print(f"{sales.shape[0]} rows & {sales.shape[1]} columns in this dataset.")

237671 rows & 10 columns in this dataset.


In [5]:
sales.currency.unique()

array(['GBP', 'USD', 'AUD', 'EUR', 'CAD', 'NZD', 'HKD', 'CHF', 'SEK',
       'DKK', 'ILS', 'PLN', 'THB', 'MXN', 'JPY', 'NOK', 'SGD', 'MYR',
       'TWD', 'HUF', 'CZK', 'BRL'], dtype=object)

In [13]:
print(f"{sales['seller_id'].nunique()} sellers in dataset.")

114227 sellers in dataset.


In [14]:
currency_rates = pd.read_csv('currency_rates_082020_EUR.csv', parse_dates=['date'])

currency_rates.head()

Unnamed: 0,currency,date,rate
0,EUR,2020-08-01,1.0
1,EUR,2020-08-02,1.0
2,EUR,2020-08-03,1.0
3,EUR,2020-08-04,1.0
4,EUR,2020-08-05,1.0


In [15]:
currency_rates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220 entries, 0 to 219
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   currency  220 non-null    object        
 1   date      220 non-null    datetime64[ns]
 2   rate      220 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 5.3+ KB


In [16]:
currency_rates['currency'].unique()

array(['EUR', 'GBP', 'USD', 'AUD', 'CAD', 'NZD', 'HKD', 'CHF', 'SEK',
       'DKK', 'ILS', 'PLN', 'THB', 'MXN', 'JPY', 'NOK', 'SGD', 'MYR',
       'TWD', 'HUF', 'CZK', 'BRL'], dtype=object)

In [17]:
sales_and_currency = sales.merge(currency_rates, how='left', on=['currency', 'date'])
sales_and_currency.head()


Unnamed: 0,buyer_id,buyer_country,seller_id,seller_country,product_code,category,brand,currency,price,date,rate
0,qzpjsi9t0o,GB,6oufpaot,GB,ydecnydr6obf,Dresses,,GBP,19.11,2020-08-03,0.89935
1,o79pns1qwo,GB,50xki2yg,GB,38chj507o6h2,Outerwear,Adidas Originals,GBP,15.92,2020-08-09,0.90278
2,sjgbjdkhqx,GB,50xki2yg,GB,i9ynyxj5tdp8,Tops - Mens,,GBP,6.37,2020-08-07,0.90286
3,o79pns1qwo,GB,s0p322hh,GB,t50xe7moye8v,Tops - Mens,Berghaus,GBP,25.27,2020-08-09,0.90278
4,o79pns1qwo,GB,dpkx192v,GB,hp4r9bjq68af,Bottoms - Womens,,GBP,8.9,2020-08-09,0.90278


In [18]:
sales['currency'].unique()

array(['GBP', 'USD', 'AUD', 'EUR', 'CAD', 'NZD', 'HKD', 'CHF', 'SEK',
       'DKK', 'ILS', 'PLN', 'THB', 'MXN', 'JPY', 'NOK', 'SGD', 'MYR',
       'TWD', 'HUF', 'CZK', 'BRL'], dtype=object)

In [19]:
sales_and_currency['price_€'] = (1 / sales_and_currency['rate']) * sales_and_currency['price']

In [20]:
print("Total sales are €{:,.0f}.".format(sales_and_currency['price_€'].sum()))

Total sales are €6,471,511.


In [21]:
(
    sales_and_currency
    .groupby('brand')
    .agg({'price_€': 'sum'})
    .sort_values(by='price_€', ascending=False)
    .round(2)
)

Unnamed: 0_level_0,price_€
brand,Unnamed: 1_level_1
Nike,385022.41
Adidas,114211.54
American Vintage,89939.91
Brandy Melville,87735.66
Dr. Martens,81491.62
...,...
butter LONDON,5.46
Ann Demeulemeester,5.26
Popular Sports,4.45
360 Cashmere,2.85


In [22]:
(
    sales_and_currency
    .groupby('brand')
    .agg({'price_€': 'sum'})
    .sort_values(by='price_€', ascending=False)
    .index[0]
)

'Nike'

In [23]:
(
    sales_and_currency
    .groupby('category')
    .agg({'brand': 'count'})
    .sort_values(by='brand', ascending=False)
)[:5]

Unnamed: 0_level_0,brand
category,Unnamed: 1_level_1
Tops - Womens,23844
Tops - Mens,20942
Bottoms - Womens,18252
Shoes,16532
Dresses,10604


In [24]:
sales_and_currency['product_code'].nunique()

226311

In [25]:
sales_and_currency[(sales_and_currency['category'] == 'Jewellery')\
                    & (sales_and_currency['brand'].isnull())]

Unnamed: 0,buyer_id,buyer_country,seller_id,seller_country,product_code,category,brand,currency,price,date,rate,price_€
23,94lh9gu3bu,US,dhsa2q6o,US,1m846u2l2dyl,Jewellery,,USD,16.29,2020-08-03,1.176250,13.849097
48,u8da411658,AU,qn7p4cv2,AU,dd4d4p2h4us2,Jewellery,,AUD,12.52,2020-08-07,1.646397,7.604484
49,9exo58j839,AU,qn7p4cv2,AU,6nogqtmgfjsn,Jewellery,,AUD,8.48,2020-08-09,1.646306,5.150926
50,9exo58j839,AU,qn7p4cv2,AU,w5ixs8y20z3n,Jewellery,,AUD,5.58,2020-08-09,1.646306,3.389406
51,9exo58j839,AU,qn7p4cv2,AU,g5usn39z8b7u,Jewellery,,AUD,5.37,2020-08-09,1.646306,3.261848
...,...,...,...,...,...,...,...,...,...,...,...,...
237545,rabu7nz8gn,US,rv5nmiwq,US,ikfosml5pjas,Jewellery,,USD,21.19,2020-08-05,1.187255,17.847893
237568,ytjobx2wzg,GB,qib5sh0l,GB,tn00dvezdp43,Jewellery,,GBP,1.15,2020-08-05,0.904500,1.271421
237569,ytjobx2wzg,GB,qib5sh0l,GB,eykndnmcb4g6,Jewellery,,GBP,1.15,2020-08-05,0.904500,1.271421
237616,p0jol2ieus,US,4pi8qok2,US,0ipj5zmes1cu,Jewellery,,USD,18.67,2020-08-08,1.178700,15.839484


In [26]:
sales_and_currency[(sales_and_currency['category'] == 'Jewellery')\
                    & (sales_and_currency['brand'].isnull())]\
                    ['product_code'].nunique()

10931