In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
product_data = pd.read_csv('jumia_products_raw.csv')
product_data.head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,AILYONS FK-0301 Stainless Steel 1.8L Electric ...,KSh 599,KSh 628,5%,/ailyons-fk-0301-stainless-steel-1.8l-electric...
1,"Vitron HTC4388FS - 43"" Smart Android Frameles...","KSh 19,499","KSh 28,599",32%,/vitron-htc4388fs-43-smart-android-frameless-t...
2,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,"KSh 1,022","KSh 1,460",30%,/nivea-radiant-beauty-advanced-care-lotion-for...
3,"Vitron HTC3200S 32"" Smart Frameless Android LE...","KSh 11,599","KSh 18,069",36%,/vitron-htc3200s-32-smart-frameless-android-le...
4,NIVEA UV Face Shine Control Cream SPF 50 - 50ml,KSh 849,"KSh 1,300",35%,/nivea-uv-face-shine-control-cream-spf-50-50ml...
5,NIVEA Perfect & Radiant Even Tone Day And Nigh...,KSh 849,"KSh 1,800",53%,/nivea-perfect-radiant-even-tone-day-and-night...
6,"NIVEA Pearl & Beauty Anti-Perspirant Rollon, 4...",KSh 692,"KSh 1,040",33%,/nivea-pearl-beauty-anti-perspirant-rollon-48h...
7,RichRipple LED Display Bluetooth Headsets 9D S...,KSh 469,KSh 920,49%,/richripple-led-display-bluetooth-headsets-9d-...
8,Starlink Standard Actuated Kit,"KSh 39,500","KSh 100,000",61%,/starlink-standard-actuated-kit-190334730.html
9,Garnier Even & Matte Vitamin C Booster Serum ...,"KSh 1,099","KSh 1,750",37%,/garnier-even-matte-vitamin-c-booster-serum-30...


In [3]:
product_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1886 entries, 0 to 1885
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Item name      1886 non-null   object
 1   Current price  1886 non-null   object
 2   Old price      1886 non-null   object
 3   Discount       1886 non-null   object
 4   Link           1886 non-null   object
dtypes: object(5)
memory usage: 73.8+ KB


## Data cleaning

In [4]:
# changin discount column to float
def change_disc_col(df):
    df['Discount'] = df['Discount'].str.replace('%','')
    df['Discount'] = df['Discount'].astype(float)
    return df.info()

change_disc_col(product_data)



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1886 entries, 0 to 1885
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Item name      1886 non-null   object 
 1   Current price  1886 non-null   object 
 2   Old price      1886 non-null   object 
 3   Discount       1886 non-null   float64
 4   Link           1886 non-null   object 
dtypes: float64(1), object(4)
memory usage: 73.8+ KB


In [5]:
# filtering out prices that have a range
ranged_items = product_data[product_data['Current price'].str.contains('-') | product_data['Old price'].str.contains('-')].copy()
# removing the rows from the original dataset
new_product_data = product_data.drop(ranged_items.index)
new_product_data

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,AILYONS FK-0301 Stainless Steel 1.8L Electric ...,KSh 599,KSh 628,5.0,/ailyons-fk-0301-stainless-steel-1.8l-electric...
1,"Vitron HTC4388FS - 43"" Smart Android Frameles...","KSh 19,499","KSh 28,599",32.0,/vitron-htc4388fs-43-smart-android-frameless-t...
2,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,"KSh 1,022","KSh 1,460",30.0,/nivea-radiant-beauty-advanced-care-lotion-for...
3,"Vitron HTC3200S 32"" Smart Frameless Android LE...","KSh 11,599","KSh 18,069",36.0,/vitron-htc3200s-32-smart-frameless-android-le...
4,NIVEA UV Face Shine Control Cream SPF 50 - 50ml,KSh 849,"KSh 1,300",35.0,/nivea-uv-face-shine-control-cream-spf-50-50ml...
...,...,...,...,...,...
1881,Garnier Brightening Night Serum With 10% Pure ...,"KSh 3,229","KSh 3,399",5.0,/garnier-brightening-night-serum-with-10-pure-...
1882,500in1 Mini Classic Games Double Game Player G...,"KSh 1,805","KSh 2,318",22.0,/generic-500in1-mini-classic-games-double-game...
1883,New 2023 Super Bright 500W UFO Shape LED Solar...,"KSh 9,000","KSh 14,500",38.0,/generic-new-2023-super-bright-500w-ufo-shape-...
1884,Flip Mobile Phone Double SIM Large Font Large ...,"KSh 3,970","KSh 5,700",30.0,/generic-flip-mobile-phone-double-sim-large-fo...


In [6]:
ranged_items

Unnamed: 0,Item name,Current price,Old price,Discount,Link
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,"KSh 1,059 - KSh 1,099","KSh 1,371 - KSh 1,913",43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...
78,Rugged Mommy Jeans Ladies Denim Jeans,"KSh 1,389 - KSh 1,489","KSh 1,500 - KSh 1,600",13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,"KSh 1,089 - KSh 1,289","KSh 1,500",27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...
82,Fashion Official Turkey Mens Longsleeve Shirt ...,"KSh 789 - KSh 1,189","KSh 1,500 - KSh 1,599",47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...
90,Fashion Navy Blue Official Mens Longsleeve Shi...,KSh 889 - KSh 929,"KSh 949 - KSh 1,500",41.0,/fashion-navy-blue-official-mens-longsleeve-sh...
...,...,...,...,...,...
1808,Fashion Sacha Buttercup Setting Powder,KSh 285 - KSh 500,KSh 500,43.0,/sacha-buttercup-setting-powder-fashion-mpg126...
1836,Alcohol Is Coming Hip Flask 210 ML Stainless S...,"KSh 1,900 - KSh 2,470","KSh 2,000 - KSh 2,600",5.0,/alcohol-is-coming-hip-flask-210-ml-stainless-...
1838,"Name Plate Small, Medium & Big",KSh 760 - KSh 950,"KSh 800 - KSh 1,000",5.0,/generic-name-plate-small-medium-big-161811911...
1855,Curtains Turquoise Blue 2Pcs 1.5M Each + FREE ...,"KSh 3,499 - KSh 4,499","KSh 4,700 - KSh 6,000",30.0,/generic-curtains-turquoise-blue-2pcs-1.5m-eac...


In [7]:
split_values = ranged_items['Current price'].str.split('-', expand=True)
ranged_items.loc[:, 'price_start_value'] = split_values[0].str.strip()
ranged_items.loc[:, 'price_end_value'] = split_values[1].str.strip()
ranged_items['price_end_value'].fillna(ranged_items['price_start_value'], inplace=True)

ranged_items

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ranged_items['price_end_value'].fillna(ranged_items['price_start_value'], inplace=True)


Unnamed: 0,Item name,Current price,Old price,Discount,Link,price_start_value,price_end_value
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,"KSh 1,059 - KSh 1,099","KSh 1,371 - KSh 1,913",43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...,"KSh 1,059","KSh 1,099"
78,Rugged Mommy Jeans Ladies Denim Jeans,"KSh 1,389 - KSh 1,489","KSh 1,500 - KSh 1,600",13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...,"KSh 1,389","KSh 1,489"
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,"KSh 1,089 - KSh 1,289","KSh 1,500",27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...,"KSh 1,089","KSh 1,289"
82,Fashion Official Turkey Mens Longsleeve Shirt ...,"KSh 789 - KSh 1,189","KSh 1,500 - KSh 1,599",47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...,KSh 789,"KSh 1,189"
90,Fashion Navy Blue Official Mens Longsleeve Shi...,KSh 889 - KSh 929,"KSh 949 - KSh 1,500",41.0,/fashion-navy-blue-official-mens-longsleeve-sh...,KSh 889,KSh 929
...,...,...,...,...,...,...,...
1808,Fashion Sacha Buttercup Setting Powder,KSh 285 - KSh 500,KSh 500,43.0,/sacha-buttercup-setting-powder-fashion-mpg126...,KSh 285,KSh 500
1836,Alcohol Is Coming Hip Flask 210 ML Stainless S...,"KSh 1,900 - KSh 2,470","KSh 2,000 - KSh 2,600",5.0,/alcohol-is-coming-hip-flask-210-ml-stainless-...,"KSh 1,900","KSh 2,470"
1838,"Name Plate Small, Medium & Big",KSh 760 - KSh 950,"KSh 800 - KSh 1,000",5.0,/generic-name-plate-small-medium-big-161811911...,KSh 760,KSh 950
1855,Curtains Turquoise Blue 2Pcs 1.5M Each + FREE ...,"KSh 3,499 - KSh 4,499","KSh 4,700 - KSh 6,000",30.0,/generic-curtains-turquoise-blue-2pcs-1.5m-eac...,"KSh 3,499","KSh 4,499"


In [8]:
split_values = ranged_items['Old price'].str.split('-', expand=True)
ranged_items.loc[:, 'old_price_start_value'] = split_values[0].str.strip()
ranged_items.loc[:, 'old_price_end_value'] = split_values[1].str.strip()
ranged_items['old_price_end_value'].fillna(ranged_items['old_price_start_value'], inplace=True)

ranged_items

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ranged_items['old_price_end_value'].fillna(ranged_items['old_price_start_value'], inplace=True)


Unnamed: 0,Item name,Current price,Old price,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,"KSh 1,059 - KSh 1,099","KSh 1,371 - KSh 1,913",43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...,"KSh 1,059","KSh 1,099","KSh 1,371","KSh 1,913"
78,Rugged Mommy Jeans Ladies Denim Jeans,"KSh 1,389 - KSh 1,489","KSh 1,500 - KSh 1,600",13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...,"KSh 1,389","KSh 1,489","KSh 1,500","KSh 1,600"
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,"KSh 1,089 - KSh 1,289","KSh 1,500",27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...,"KSh 1,089","KSh 1,289","KSh 1,500","KSh 1,500"
82,Fashion Official Turkey Mens Longsleeve Shirt ...,"KSh 789 - KSh 1,189","KSh 1,500 - KSh 1,599",47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...,KSh 789,"KSh 1,189","KSh 1,500","KSh 1,599"
90,Fashion Navy Blue Official Mens Longsleeve Shi...,KSh 889 - KSh 929,"KSh 949 - KSh 1,500",41.0,/fashion-navy-blue-official-mens-longsleeve-sh...,KSh 889,KSh 929,KSh 949,"KSh 1,500"
...,...,...,...,...,...,...,...,...,...
1808,Fashion Sacha Buttercup Setting Powder,KSh 285 - KSh 500,KSh 500,43.0,/sacha-buttercup-setting-powder-fashion-mpg126...,KSh 285,KSh 500,KSh 500,KSh 500
1836,Alcohol Is Coming Hip Flask 210 ML Stainless S...,"KSh 1,900 - KSh 2,470","KSh 2,000 - KSh 2,600",5.0,/alcohol-is-coming-hip-flask-210-ml-stainless-...,"KSh 1,900","KSh 2,470","KSh 2,000","KSh 2,600"
1838,"Name Plate Small, Medium & Big",KSh 760 - KSh 950,"KSh 800 - KSh 1,000",5.0,/generic-name-plate-small-medium-big-161811911...,KSh 760,KSh 950,KSh 800,"KSh 1,000"
1855,Curtains Turquoise Blue 2Pcs 1.5M Each + FREE ...,"KSh 3,499 - KSh 4,499","KSh 4,700 - KSh 6,000",30.0,/generic-curtains-turquoise-blue-2pcs-1.5m-eac...,"KSh 3,499","KSh 4,499","KSh 4,700","KSh 6,000"


In [9]:
ranged_items.drop(['Current price', 'Old price'], axis = 1, inplace=True)
ranged_items

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...,"KSh 1,059","KSh 1,099","KSh 1,371","KSh 1,913"
78,Rugged Mommy Jeans Ladies Denim Jeans,13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...,"KSh 1,389","KSh 1,489","KSh 1,500","KSh 1,600"
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...,"KSh 1,089","KSh 1,289","KSh 1,500","KSh 1,500"
82,Fashion Official Turkey Mens Longsleeve Shirt ...,47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...,KSh 789,"KSh 1,189","KSh 1,500","KSh 1,599"
90,Fashion Navy Blue Official Mens Longsleeve Shi...,41.0,/fashion-navy-blue-official-mens-longsleeve-sh...,KSh 889,KSh 929,KSh 949,"KSh 1,500"
...,...,...,...,...,...,...,...
1808,Fashion Sacha Buttercup Setting Powder,43.0,/sacha-buttercup-setting-powder-fashion-mpg126...,KSh 285,KSh 500,KSh 500,KSh 500
1836,Alcohol Is Coming Hip Flask 210 ML Stainless S...,5.0,/alcohol-is-coming-hip-flask-210-ml-stainless-...,"KSh 1,900","KSh 2,470","KSh 2,000","KSh 2,600"
1838,"Name Plate Small, Medium & Big",5.0,/generic-name-plate-small-medium-big-161811911...,KSh 760,KSh 950,KSh 800,"KSh 1,000"
1855,Curtains Turquoise Blue 2Pcs 1.5M Each + FREE ...,30.0,/generic-curtains-turquoise-blue-2pcs-1.5m-eac...,"KSh 3,499","KSh 4,499","KSh 4,700","KSh 6,000"


In [10]:
# removing KSh string
columns_to_change = ['price_start_value','price_end_value','old_price_start_value','old_price_end_value']
ranged_items[columns_to_change] = ranged_items[columns_to_change].replace('KSh', '', regex=True)
ranged_items.head(5)

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...,1059,1099,1371,1913
78,Rugged Mommy Jeans Ladies Denim Jeans,13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...,1389,1489,1500,1600
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...,1089,1289,1500,1500
82,Fashion Official Turkey Mens Longsleeve Shirt ...,47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...,789,1189,1500,1599
90,Fashion Navy Blue Official Mens Longsleeve Shi...,41.0,/fashion-navy-blue-official-mens-longsleeve-sh...,889,929,949,1500


In [11]:
# removing comma
columns_to_change = ['price_start_value','price_end_value','old_price_start_value','old_price_end_value']
ranged_items[columns_to_change] = ranged_items[columns_to_change].replace(',', '', regex=True)
ranged_items.head(5)

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...,1059,1099,1371,1913
78,Rugged Mommy Jeans Ladies Denim Jeans,13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...,1389,1489,1500,1600
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...,1089,1289,1500,1500
82,Fashion Official Turkey Mens Longsleeve Shirt ...,47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...,789,1189,1500,1599
90,Fashion Navy Blue Official Mens Longsleeve Shi...,41.0,/fashion-navy-blue-official-mens-longsleeve-sh...,889,929,949,1500


In [12]:
# converting the column to float
columns_to_change = ['price_start_value','price_end_value','old_price_start_value','old_price_end_value']
ranged_items[columns_to_change] = ranged_items[columns_to_change].astype(float)
ranged_items.info()

<class 'pandas.core.frame.DataFrame'>
Index: 165 entries, 45 to 1873
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Item name              165 non-null    object 
 1   Discount               165 non-null    float64
 2   Link                   165 non-null    object 
 3   price_start_value      165 non-null    float64
 4   price_end_value        165 non-null    float64
 5   old_price_start_value  165 non-null    float64
 6   old_price_end_value    165 non-null    float64
dtypes: float64(5), object(2)
memory usage: 10.3+ KB


In [13]:
ranged_items['current_price'] = (ranged_items['price_start_value'] + ranged_items['price_end_value'])/2
ranged_items['old_price'] = (ranged_items['old_price_start_value'] + ranged_items['old_price_end_value'])/2

ranged_items

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value,current_price,old_price
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...,1059.0,1099.0,1371.0,1913.0,1079.0,1642.0
78,Rugged Mommy Jeans Ladies Denim Jeans,13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...,1389.0,1489.0,1500.0,1600.0,1439.0,1550.0
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...,1089.0,1289.0,1500.0,1500.0,1189.0,1500.0
82,Fashion Official Turkey Mens Longsleeve Shirt ...,47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...,789.0,1189.0,1500.0,1599.0,989.0,1549.5
90,Fashion Navy Blue Official Mens Longsleeve Shi...,41.0,/fashion-navy-blue-official-mens-longsleeve-sh...,889.0,929.0,949.0,1500.0,909.0,1224.5
...,...,...,...,...,...,...,...,...,...
1808,Fashion Sacha Buttercup Setting Powder,43.0,/sacha-buttercup-setting-powder-fashion-mpg126...,285.0,500.0,500.0,500.0,392.5,500.0
1836,Alcohol Is Coming Hip Flask 210 ML Stainless S...,5.0,/alcohol-is-coming-hip-flask-210-ml-stainless-...,1900.0,2470.0,2000.0,2600.0,2185.0,2300.0
1838,"Name Plate Small, Medium & Big",5.0,/generic-name-plate-small-medium-big-161811911...,760.0,950.0,800.0,1000.0,855.0,900.0
1855,Curtains Turquoise Blue 2Pcs 1.5M Each + FREE ...,30.0,/generic-curtains-turquoise-blue-2pcs-1.5m-eac...,3499.0,4499.0,4700.0,6000.0,3999.0,5350.0


In [14]:
ranged_items.head()

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value,current_price,old_price
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...,1059.0,1099.0,1371.0,1913.0,1079.0,1642.0
78,Rugged Mommy Jeans Ladies Denim Jeans,13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...,1389.0,1489.0,1500.0,1600.0,1439.0,1550.0
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...,1089.0,1289.0,1500.0,1500.0,1189.0,1500.0
82,Fashion Official Turkey Mens Longsleeve Shirt ...,47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...,789.0,1189.0,1500.0,1599.0,989.0,1549.5
90,Fashion Navy Blue Official Mens Longsleeve Shi...,41.0,/fashion-navy-blue-official-mens-longsleeve-sh...,889.0,929.0,949.0,1500.0,909.0,1224.5


In [15]:
ranged_items.drop(columns = ['price_start_value','price_end_value','old_price_start_value', 'old_price_end_value'], inplace=True)
ranged_items.head()

Unnamed: 0,Item name,Discount,Link,current_price,old_price
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...,1079.0,1642.0
78,Rugged Mommy Jeans Ladies Denim Jeans,13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...,1439.0,1550.0
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...,1189.0,1500.0
82,Fashion Official Turkey Mens Longsleeve Shirt ...,47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...,989.0,1549.5
90,Fashion Navy Blue Official Mens Longsleeve Shi...,41.0,/fashion-navy-blue-official-mens-longsleeve-sh...,909.0,1224.5


In [16]:
ranged_items = ranged_items[['Item name','current_price','old_price','Discount','Link']]
ranged_items.rename(columns={'current_price':'Current price','old_price':'Old price'}, inplace=True)
ranged_items.head()

Unnamed: 0,Item name,Current price,Old price,Discount,Link
45,Fashion Men Shoes Sneakers Skateboarding Shoes...,1079.0,1642.0,43.0,/fashion-men-shoes-sneakers-skateboarding-shoe...
78,Rugged Mommy Jeans Ladies Denim Jeans,1439.0,1550.0,13.0,/generic-rugged-mommy-jeans-ladies-denim-jeans...
79,Fashion 6PCs Cutest Kids Comfy Pure Cotton Bo...,1189.0,1500.0,27.0,/fashion-6pcs-cutest-kids-comfy-pure-cotton-bo...
82,Fashion Official Turkey Mens Longsleeve Shirt ...,989.0,1549.5,47.0,/official-turkey-mens-longsleeve-shirt-slim-fi...
90,Fashion Navy Blue Official Mens Longsleeve Shi...,909.0,1224.5,41.0,/fashion-navy-blue-official-mens-longsleeve-sh...


In [17]:
new_product_data.head()

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,AILYONS FK-0301 Stainless Steel 1.8L Electric ...,KSh 599,KSh 628,5.0,/ailyons-fk-0301-stainless-steel-1.8l-electric...
1,"Vitron HTC4388FS - 43"" Smart Android Frameles...","KSh 19,499","KSh 28,599",32.0,/vitron-htc4388fs-43-smart-android-frameless-t...
2,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,"KSh 1,022","KSh 1,460",30.0,/nivea-radiant-beauty-advanced-care-lotion-for...
3,"Vitron HTC3200S 32"" Smart Frameless Android LE...","KSh 11,599","KSh 18,069",36.0,/vitron-htc3200s-32-smart-frameless-android-le...
4,NIVEA UV Face Shine Control Cream SPF 50 - 50ml,KSh 849,"KSh 1,300",35.0,/nivea-uv-face-shine-control-cream-spf-50-50ml...


In [18]:
columns = ['Current price', 'Old price']
new_product_data[columns] = new_product_data[columns].replace('KSh', '', regex=True)
new_product_data[columns] = new_product_data[columns].replace(',', '',regex=True)
new_product_data.head()

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,AILYONS FK-0301 Stainless Steel 1.8L Electric ...,599,628,5.0,/ailyons-fk-0301-stainless-steel-1.8l-electric...
1,"Vitron HTC4388FS - 43"" Smart Android Frameles...",19499,28599,32.0,/vitron-htc4388fs-43-smart-android-frameless-t...
2,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,1022,1460,30.0,/nivea-radiant-beauty-advanced-care-lotion-for...
3,"Vitron HTC3200S 32"" Smart Frameless Android LE...",11599,18069,36.0,/vitron-htc3200s-32-smart-frameless-android-le...
4,NIVEA UV Face Shine Control Cream SPF 50 - 50ml,849,1300,35.0,/nivea-uv-face-shine-control-cream-spf-50-50ml...


In [19]:
new_product_data[columns] = new_product_data[columns].astype(float)
new_product_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1721 entries, 0 to 1885
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Item name      1721 non-null   object 
 1   Current price  1721 non-null   float64
 2   Old price      1721 non-null   float64
 3   Discount       1721 non-null   float64
 4   Link           1721 non-null   object 
dtypes: float64(3), object(2)
memory usage: 80.7+ KB


In [20]:
products = pd.concat([new_product_data,ranged_items], axis=0)
products.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1886 entries, 0 to 1873
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Item name      1886 non-null   object 
 1   Current price  1886 non-null   float64
 2   Old price      1886 non-null   float64
 3   Discount       1886 non-null   float64
 4   Link           1886 non-null   object 
dtypes: float64(3), object(2)
memory usage: 88.4+ KB


Saving the cleaned data as csv file

In [26]:
products.to_csv('jumia_products_clean.csv', index=False)

## Exploratory Data Analysis(EDA)

In [21]:
products.head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,AILYONS FK-0301 Stainless Steel 1.8L Electric ...,599.0,628.0,5.0,/ailyons-fk-0301-stainless-steel-1.8l-electric...
1,"Vitron HTC4388FS - 43"" Smart Android Frameles...",19499.0,28599.0,32.0,/vitron-htc4388fs-43-smart-android-frameless-t...
2,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,1022.0,1460.0,30.0,/nivea-radiant-beauty-advanced-care-lotion-for...
3,"Vitron HTC3200S 32"" Smart Frameless Android LE...",11599.0,18069.0,36.0,/vitron-htc3200s-32-smart-frameless-android-le...
4,NIVEA UV Face Shine Control Cream SPF 50 - 50ml,849.0,1300.0,35.0,/nivea-uv-face-shine-control-cream-spf-50-50ml...
5,NIVEA Perfect & Radiant Even Tone Day And Nigh...,849.0,1800.0,53.0,/nivea-perfect-radiant-even-tone-day-and-night...
6,"NIVEA Pearl & Beauty Anti-Perspirant Rollon, 4...",692.0,1040.0,33.0,/nivea-pearl-beauty-anti-perspirant-rollon-48h...
7,RichRipple LED Display Bluetooth Headsets 9D S...,469.0,920.0,49.0,/richripple-led-display-bluetooth-headsets-9d-...
8,Starlink Standard Actuated Kit,39500.0,100000.0,61.0,/starlink-standard-actuated-kit-190334730.html
9,Garnier Even & Matte Vitamin C Booster Serum ...,1099.0,1750.0,37.0,/garnier-even-matte-vitamin-c-booster-serum-30...


In [22]:
products.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1886 entries, 0 to 1873
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Item name      1886 non-null   object 
 1   Current price  1886 non-null   float64
 2   Old price      1886 non-null   float64
 3   Discount       1886 non-null   float64
 4   Link           1886 non-null   object 
dtypes: float64(3), object(2)
memory usage: 88.4+ KB


In [23]:
# 10 most expensive products
products.sort_values(by = 'Current price', ascending=False).head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
1310,"Branding Combo Kit :24"" Vinyl Cutter Plotter +...",100999.0,129999.0,22.0,/generic-branding-combo-kit-24-vinyl-cutter-pl...
1846,Smartisan Nut Pro 2 5.99 Inch 6GB RAM 128GB RO...,96839.0,193677.0,50.0,/smartisan-nut-pro-2-5.99-inch-6gb-ram-128gb-r...
1362,LCDTouch Screen For Sony Xperia 5 IV,57895.0,82719.0,30.0,/generic-lcdtouch-screen-for-sony-xperia-5-iv-...
1263,Wireless Air Compression Leg Massager Recharge...,51994.0,89661.0,42.0,/generic-wireless-air-compression-leg-massager...
677,"LG 43UP7750 - 43"" 4K UHD Smart Frameless LED T...",49499.0,89999.0,45.0,/lg-43up7750-43-4k-uhd-smart-frameless-led-tv-...
1871,8 In 1 Combo Heat Press Machine Coffee Magic M...,49499.0,64999.0,24.0,/generic-8-in-1-combo-heat-press-machine-coffe...
1058,3840 X 2160 UHD 40 Pin LCD Screen And Digitize...,48260.0,80395.0,40.0,/generic-3840-x-2160-uhd-40-pin-lcd-screen-and...
1770,NEO Gleam Modern Led Ceiling Lihgts For Living...,42772.0,73704.0,42.0,/generic-neo-gleam-modern-led-ceiling-lihgts-f...
8,Starlink Standard Actuated Kit,39500.0,100000.0,61.0,/starlink-standard-actuated-kit-190334730.html
613,7000 Lumens Android 4.4 HD 1080P Bluetooth Wif...,36991.0,73874.0,50.0,/generic-7000-lumens-android-4.4-hd-1080p-blue...


In [24]:
# 10 cheapest products
products.sort_values(by = 'Current price', ascending=True).head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
1180,Nail Sticker Detachable Waterproof PET Nail Ar...,74.0,148.0,50.0,/generic-nail-sticker-detachable-waterproof-pe...
1415,2Pcs/Set Adjustable Couple Rings-Golden,81.0,159.0,49.0,/generic-2pcsset-adjustable-couple-rings-golde...
1129,12Pcs Cable Protector Solid Color Flexible Sil...,95.0,190.0,50.0,/generic-12pcs-cable-protector-solid-color-fle...
1070,Kitchen Bathroom Sink Sewer Strainer Filter Ne...,114.0,224.0,49.0,/generic-kitchen-bathroom-sink-sewer-strainer-...
343,Travel Power Adaptor With UK Socket Plug,116.0,200.0,42.0,/generic-travel-power-adaptor-with-uk-socket-p...
632,Fashion Finger Ring Geometric Shape Wedding ...,119.5,179.0,34.0,/fashion-finger-ring-geometric-shape-wedding-c...
717,ABS Reed Strength 2.5 For Alto Eb Saxophone Sa...,120.0,228.0,47.0,/generic-abs-reed-strength-2.5-for-alto-eb-sax...
1079,6 Pairs 12 PCS 3.8mm Soft Silicone In-Ear Earp...,120.0,227.0,47.0,/generic-6-pairs-12-pcs-3.8mm-soft-silicone-in...
1137,10Pcs Nail Art Sanding Buffer Buffing Block Po...,125.0,250.0,50.0,/generic-10pcs-nail-art-sanding-buffer-buffing...
996,Pull Fuel Tank Pointer To Full Hellaflush Refl...,126.0,193.0,35.0,/generic-pull-fuel-tank-pointer-to-full-hellaf...


In [25]:
# 10 prices with the highest discount
products.sort_values(by = 'Discount', ascending=False).head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
1645,"Fashion African Small Curly Wig, Female Short ...",1199.0,4100.0,71.0,/fashion-african-small-curly-wig-female-short-...
104,Fashion ELEGANT Men's PURE Leather Loafer Sh...,1214.0,3099.0,67.0,/fashion-elegant-mens-pure-leather-loafer-shoe...
1231,Kitchen Organizer Supplies Fridge Storage Rack...,350.0,999.0,65.0,/generic-kitchen-organizer-supplies-fridge-sto...
1354,Flangesio Popular Flip Flops Men Shoes High Qu...,2340.0,6500.0,64.0,/flangesio-popular-flip-flops-men-shoes-high-q...
8,Starlink Standard Actuated Kit,39500.0,100000.0,61.0,/starlink-standard-actuated-kit-190334730.html
310,Fashion Large Capacity Butterfly Tote Bag Wome...,650.0,1500.0,57.0,/fashion-large-capacity-butterfly-tote-bag-wom...
449,Geemy Professional Hair/Shaving Machine/kinyozi,1300.0,3000.0,57.0,/geemy-professional-hairshaving-machinekinyozi...
1640,Outdoor Solar Post Gate Light,2400.0,5500.0,56.0,/generic-outdoor-solar-post-gate-light-7241176...
245,Fashion Eyeshadow Eye Shadow Pans Palette Eye-...,429.0,982.0,56.0,/fashion-eyeshadow-eye-shadow-pans-palette-eye...
1825,"Tripod Stand For Camera/Phone,Up To 168CM",2449.0,5580.0,56.0,/long-neck-car-phone-holder-universal-mpg32808...
