<h3>Import modules and packages</h3>

In [1]:
import pandas as pd

<h3>Load raw data</h3>

In [5]:
category_df = pd.read_csv('../data/categories.csv')
product_df = pd.read_csv('../data/products.csv')
supplier_df = pd.read_csv('../data/suppliers.csv')

# Print basic information for the loaded datasets
print(f'category_df = {category_df.shape}')
print(f'product_df = {product_df.shape}')
print(f'supplier_df = {supplier_df.shape}')

category_df = (8, 4)
product_df = (77, 10)
supplier_df = (29, 12)


In [7]:
category_df.sample(4)

Unnamed: 0,categoryID,categoryName,description,picture
5,6,Meat/Poultry,Prepared meats,0x151C2F00020000000D000E0014002100FFFFFFFF4269...
1,2,Condiments,Sweet and savory sauces relishes spreads and s...,0x151C2F00020000000D000E0014002100FFFFFFFF4269...
7,8,Seafood,Seaweed and fish,0x151C2F00020000000D000E0014002100FFFFFFFF4269...
3,4,Dairy Products,Cheeses,0x151C2F00020000000D000E0014002100FFFFFFFF4269...


In [8]:
product_df.sample(4)

Unnamed: 0,productID,productName,supplierID,categoryID,quantityPerUnit,unitPrice,unitsInStock,unitsOnOrder,reorderLevel,discontinued
74,75,Rhönbräu Klosterbier,12,1,24 - 0.5 l bottles,7.75,125,0,25,0
61,62,Tarte au sucre,29,3,48 pies,49.3,17,0,0,0
45,46,Spegesild,21,8,4 - 450 g glasses,12.0,95,0,0,0
11,12,Queso Manchego La Pastora,5,4,10 - 500 g pkgs.,38.0,86,0,0,0


In [9]:
supplier_df.sample(4)

Unnamed: 0,supplierID,companyName,contactName,contactTitle,address,city,region,postalCode,country,phone,fax,homePage
15,16,Bigfoot Breweries,Cheryl Saylor,Regional Account Rep.,3400 - 8th Avenue Suite 210,Bend,OR,97101,USA,(503) 555-9931,,
20,21,Lyngbysild,Niels Petersen,Sales Manager,Lyngbysild Fiskebakken 10,Lyngby,,2800,Denmark,43844108,43844115,
25,26,Pasta Buttini s.r.l.,Giovanni Giudici,Order Administrator,Via dei Gelsomini 153,Salerno,,84100,Italy,(089) 6547665,(089) 6547667,
11,12,Plutzer Lebensmittelgroßmärkte AG,Martin Bein,International Marketing Mgr.,Bogenallee 51,Frankfurt,,60439,Germany,(069) 992755,,Plutzer (on the World Wide Web)#http://www.mic...


<h3>Combine dataframes</h3>

In [11]:
product_category_df = pd.merge(product_df, category_df, on='categoryID')
product_category_df.head(1)

Unnamed: 0,productID,productName,supplierID,categoryID,quantityPerUnit,unitPrice,unitsInStock,unitsOnOrder,reorderLevel,discontinued,categoryName,description,picture
0,1,Chai,1,1,10 boxes x 20 bags,18.0,39,0,10,0,Beverages,Soft drinks coffees teas beers and ales,0x151C2F00020000000D000E0014002100FFFFFFFF4269...


In [14]:
product_category_df.columns

Index(['productID', 'productName', 'supplierID', 'categoryID',
       'quantityPerUnit', 'unitPrice', 'unitsInStock', 'unitsOnOrder',
       'reorderLevel', 'discontinued', 'categoryName', 'description',
       'picture'],
      dtype='object')

In [15]:
product_category_df.isna().sum()

productID          0
productName        0
supplierID         0
categoryID         0
quantityPerUnit    0
unitPrice          0
unitsInStock       0
unitsOnOrder       0
reorderLevel       0
discontinued       0
categoryName       0
description        0
picture            0
dtype: int64

In [16]:
supplier_df.sample(2)

Unnamed: 0,supplierID,companyName,contactName,contactTitle,address,city,region,postalCode,country,phone,fax,homePage
7,8,Specialty Biscuits Ltd.,Peter Wilson,Sales Representative,29 King's Way,Manchester,,M14 GSD,UK,(161) 555-4448,,
8,9,PB Knäckebröd AB,Lars Peterson,Sales Agent,Kaloadagatan 13,Göteborg,,S-345 67,Sweden,031-987 65 43,031-987 65 91,


In [18]:
product_category_supplier_df = pd.merge(
    product_category_df, 
    supplier_df, 
    on='supplierID',
    how='left'
)
product_category_supplier_df.sample(4)

Unnamed: 0,productID,productName,supplierID,categoryID,quantityPerUnit,unitPrice,unitsInStock,unitsOnOrder,reorderLevel,discontinued,...,contactName,contactTitle,address,city,region,postalCode,country,phone,fax,homePage
35,36,Inlagd Sill,17,8,24 - 250 g jars,19.0,112,0,20,0,...,Michael Björn,Sales Representative,Brovallavägen 231,Stockholm,,S-123 45,Sweden,08-123 45 67,,
52,53,Perth Pasties,24,6,48 pieces,32.8,0,0,0,1,...,Wendy Mackenzie,Sales Representative,170 Prince Edward Parade Hunter's Hill,Sydney,NSW,2042,Australia,(02) 555-5914,(02) 555-4873,G'day Mate (on the World Wide Web)#http://www....
19,20,Sir Rodney's Marmalade,8,3,30 gift boxes,81.0,40,0,0,0,...,Peter Wilson,Sales Representative,29 King's Way,Manchester,,M14 GSD,UK,(161) 555-4448,,
18,19,Teatime Chocolate Biscuits,8,3,10 boxes x 12 pieces,9.2,25,0,5,0,...,Peter Wilson,Sales Representative,29 King's Way,Manchester,,M14 GSD,UK,(161) 555-4448,,


In [19]:
product_category_supplier_df.isna().sum()

productID           0
productName         0
supplierID          0
categoryID          0
quantityPerUnit     0
unitPrice           0
unitsInStock        0
unitsOnOrder        0
reorderLevel        0
discontinued        0
categoryName        0
description         0
picture             0
companyName         0
contactName         0
contactTitle        0
address             0
city                0
region             51
postalCode          0
country             0
phone               0
fax                46
homePage           59
dtype: int64

<h3>Drop Null values</h3>

In [20]:
product_category_supplier_df["region"] = product_category_supplier_df["region"].replace({pd.NA: "Unknown"})
product_category_supplier_df["fax"] = product_category_supplier_df["fax"].replace({pd.NA: "Unknown"})
product_category_supplier_df["homePage"] = product_category_supplier_df["homePage"].replace({pd.NA: "Unknown"})

product_category_supplier_df.isna().sum()

productID          0
productName        0
supplierID         0
categoryID         0
quantityPerUnit    0
unitPrice          0
unitsInStock       0
unitsOnOrder       0
reorderLevel       0
discontinued       0
categoryName       0
description        0
picture            0
companyName        0
contactName        0
contactTitle       0
address            0
city               0
region             0
postalCode         0
country            0
phone              0
fax                0
homePage           0
dtype: int64