In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
product_data = pd.read_csv('jumia_products.csv')
product_data.head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,NIVEA Perfect & Radiant Even Tone Day And Nigh...,KSh 999,"KSh 1,560",36%,/nivea-perfect-radiant-even-tone-day-and-night...
1,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,KSh 949,"KSh 1,460",35%,/nivea-radiant-beauty-advanced-care-lotion-for...
2,NIVEA Nourishing Cocoa Body Lotion With Cocoa ...,"KSh 1,174","KSh 1,302",10%,/nivea-nourishing-cocoa-body-lotion-with-cocoa...
3,"NIVEA Pearl & Beauty Anti-Perspirant Rollon, 4...",KSh 728,"KSh 1,040",30%,/nivea-pearl-beauty-anti-perspirant-rollon-48h...
4,NIVEA MEN Deep Antibacterial Anti-Perspirant R...,KSh 728,"KSh 1,040",30%,/nivea-men-deep-antibacterial-anti-perspirant-...
5,NIVEA Pearl & Beauty Black Pearl Fine Fragranc...,KSh 728,"KSh 1,040",30%,/nivea-pearl-beauty-black-pearl-fine-fragrance...
6,NIVEA Perfect & Radiant Luminous630 Anti Dark ...,"KSh 3,647","KSh 5,610",35%,/nivea-perfect-radiant-luminous630-anti-dark-m...
7,NIVEA Perfect & Radiant Even Tone Day Cream SP...,KSh 585,KSh 780,25%,/nivea-perfect-radiant-even-tone-day-cream-spf...
8,NIVEA Q10 Power Anti-Wrinkle Day Cream 50ml & ...,"KSh 2,496","KSh 3,840",35%,/nivea-q10-power-anti-wrinkle-day-cream-50ml-n...
9,Epson EcoTank L3250 A4 WIRELESS Printer (All-i...,"KSh 33,999","KSh 38,000",11%,/epson-ecotank-l3250-a4-wireless-printer-all-i...


In [3]:
product_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1705 entries, 0 to 1704
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Item name      1705 non-null   object
 1   Current price  1705 non-null   object
 2   Old price      1705 non-null   object
 3   Discount       1705 non-null   object
 4   Link           1705 non-null   object
dtypes: object(5)
memory usage: 66.7+ KB


## Data cleaning

In [4]:
# changin discount column to float
def change_disc_col(df):
    df['Discount'] = df['Discount'].str.replace('%','')
    df['Discount'] = df['Discount'].astype(float)
    return df.info()

change_disc_col(product_data)



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1705 entries, 0 to 1704
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Item name      1705 non-null   object 
 1   Current price  1705 non-null   object 
 2   Old price      1705 non-null   object 
 3   Discount       1705 non-null   float64
 4   Link           1705 non-null   object 
dtypes: float64(1), object(4)
memory usage: 66.7+ KB


In [5]:
# filtering out prices that have a range
ranged_items = product_data[product_data['Current price'].str.contains('-') | product_data['Old price'].str.contains('-')].copy()
# removing the rows from the original dataset
new_product_data = product_data.drop(ranged_items.index)
new_product_data

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,NIVEA Perfect & Radiant Even Tone Day And Nigh...,KSh 999,"KSh 1,560",36.0,/nivea-perfect-radiant-even-tone-day-and-night...
1,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,KSh 949,"KSh 1,460",35.0,/nivea-radiant-beauty-advanced-care-lotion-for...
2,NIVEA Nourishing Cocoa Body Lotion With Cocoa ...,"KSh 1,174","KSh 1,302",10.0,/nivea-nourishing-cocoa-body-lotion-with-cocoa...
3,"NIVEA Pearl & Beauty Anti-Perspirant Rollon, 4...",KSh 728,"KSh 1,040",30.0,/nivea-pearl-beauty-anti-perspirant-rollon-48h...
4,NIVEA MEN Deep Antibacterial Anti-Perspirant R...,KSh 728,"KSh 1,040",30.0,/nivea-men-deep-antibacterial-anti-perspirant-...
...,...,...,...,...,...
1700,MK TV Guard Voltage Protector For Digital Smar...,"KSh 1,150","KSh 2,000",43.0,/mk-tv-guard-voltage-protector-for-digital-sma...
1701,Kinbar High Quality Alarm Padlock Siren Alarm ...,"KSh 1,999","KSh 3,500",43.0,/kinbar-high-quality-alarm-padlock-siren-alarm...
1702,HOMMY 3D CARPETS,"KSh 4,000","KSh 6,000",33.0,/hommy-3d-carpets-123400646.html
1703,Canon GI-490 Black Ink Bottle +FREE EXECUTIVE PEN,"KSh 1,999","KSh 3,000",33.0,/canon-gi-490-black-ink-bottle-free-executive-...


In [6]:
ranged_items

Unnamed: 0,Item name,Current price,Old price,Discount,Link
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,"KSh 1,860","KSh 2,695 - KSh 3,576",48.0,/fashion-2024-mens-casual-high-top-shoes-runni...
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,KSh 320 - KSh 923,"KSh 320 - KSh 1,200",46.0,/kojie-san-skin-lightening-soap-original-3pcs-...
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,KSh 454 - KSh 499,KSh 789,42.0,/generic-crease-guard-shoe-protector-sneakers-...
94,Fashion Beautiful Girls Back To School Shoes L...,"KSh 1,020 - KSh 1,200","KSh 1,900 - KSh 2,500",56.0,/fashion-beautiful-girls-back-to-school-shoes-...
95,1Pcs Brown Curtain WITHOUT Sheers,"KSh 990 - KSh 2,560","KSh 1,000 - KSh 2,900",12.0,/generic-1pcs-brown-curtain-without-sheers-152...
...,...,...,...,...,...
1675,Always Stainless Steel Double Wall Insulated V...,"KSh 1,889 - KSh 2,398","KSh 2,500 - KSh 3,800",37.0,/stainless-steel-double-wall-insulated-vacuum-...
1677,Always Unbreakable Vacuum Thermos Flask IN Sta...,"KSh 1,770 - KSh 2,350","KSh 2,300 - KSh 4,000",43.0,/always-unbreakable-vacuum-thermos-flask-in-st...
1680,Waterproof Matress Protector And Cover,"KSh 2,275 - KSh 2,650","KSh 4,000 - KSh 5,200",50.0,/generic-waterproof-matress-protector-and-cove...
1681,Kamisafe Emergency Backup LED Lamp,"KSh 1,399 - KSh 2,965","KSh 1,700 - KSh 3,500",43.0,/kamisafe-emergency-backup-led-lamp-29653629.html


In [7]:
split_values = ranged_items['Current price'].str.split('-', expand=True)
ranged_items.loc[:, 'price_start_value'] = split_values[0].str.strip()
ranged_items.loc[:, 'price_end_value'] = split_values[1].str.strip()
ranged_items['price_end_value'].fillna(ranged_items['price_start_value'], inplace=True)

ranged_items

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ranged_items['price_end_value'].fillna(ranged_items['price_start_value'], inplace=True)


Unnamed: 0,Item name,Current price,Old price,Discount,Link,price_start_value,price_end_value
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,"KSh 1,860","KSh 2,695 - KSh 3,576",48.0,/fashion-2024-mens-casual-high-top-shoes-runni...,"KSh 1,860","KSh 1,860"
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,KSh 320 - KSh 923,"KSh 320 - KSh 1,200",46.0,/kojie-san-skin-lightening-soap-original-3pcs-...,KSh 320,KSh 923
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,KSh 454 - KSh 499,KSh 789,42.0,/generic-crease-guard-shoe-protector-sneakers-...,KSh 454,KSh 499
94,Fashion Beautiful Girls Back To School Shoes L...,"KSh 1,020 - KSh 1,200","KSh 1,900 - KSh 2,500",56.0,/fashion-beautiful-girls-back-to-school-shoes-...,"KSh 1,020","KSh 1,200"
95,1Pcs Brown Curtain WITHOUT Sheers,"KSh 990 - KSh 2,560","KSh 1,000 - KSh 2,900",12.0,/generic-1pcs-brown-curtain-without-sheers-152...,KSh 990,"KSh 2,560"
...,...,...,...,...,...,...,...
1675,Always Stainless Steel Double Wall Insulated V...,"KSh 1,889 - KSh 2,398","KSh 2,500 - KSh 3,800",37.0,/stainless-steel-double-wall-insulated-vacuum-...,"KSh 1,889","KSh 2,398"
1677,Always Unbreakable Vacuum Thermos Flask IN Sta...,"KSh 1,770 - KSh 2,350","KSh 2,300 - KSh 4,000",43.0,/always-unbreakable-vacuum-thermos-flask-in-st...,"KSh 1,770","KSh 2,350"
1680,Waterproof Matress Protector And Cover,"KSh 2,275 - KSh 2,650","KSh 4,000 - KSh 5,200",50.0,/generic-waterproof-matress-protector-and-cove...,"KSh 2,275","KSh 2,650"
1681,Kamisafe Emergency Backup LED Lamp,"KSh 1,399 - KSh 2,965","KSh 1,700 - KSh 3,500",43.0,/kamisafe-emergency-backup-led-lamp-29653629.html,"KSh 1,399","KSh 2,965"


In [8]:
split_values = ranged_items['Old price'].str.split('-', expand=True)
ranged_items.loc[:, 'old_price_start_value'] = split_values[0].str.strip()
ranged_items.loc[:, 'old_price_end_value'] = split_values[1].str.strip()
ranged_items['old_price_end_value'].fillna(ranged_items['old_price_start_value'], inplace=True)

ranged_items

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ranged_items['old_price_end_value'].fillna(ranged_items['old_price_start_value'], inplace=True)


Unnamed: 0,Item name,Current price,Old price,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,"KSh 1,860","KSh 2,695 - KSh 3,576",48.0,/fashion-2024-mens-casual-high-top-shoes-runni...,"KSh 1,860","KSh 1,860","KSh 2,695","KSh 3,576"
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,KSh 320 - KSh 923,"KSh 320 - KSh 1,200",46.0,/kojie-san-skin-lightening-soap-original-3pcs-...,KSh 320,KSh 923,KSh 320,"KSh 1,200"
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,KSh 454 - KSh 499,KSh 789,42.0,/generic-crease-guard-shoe-protector-sneakers-...,KSh 454,KSh 499,KSh 789,KSh 789
94,Fashion Beautiful Girls Back To School Shoes L...,"KSh 1,020 - KSh 1,200","KSh 1,900 - KSh 2,500",56.0,/fashion-beautiful-girls-back-to-school-shoes-...,"KSh 1,020","KSh 1,200","KSh 1,900","KSh 2,500"
95,1Pcs Brown Curtain WITHOUT Sheers,"KSh 990 - KSh 2,560","KSh 1,000 - KSh 2,900",12.0,/generic-1pcs-brown-curtain-without-sheers-152...,KSh 990,"KSh 2,560","KSh 1,000","KSh 2,900"
...,...,...,...,...,...,...,...,...,...
1675,Always Stainless Steel Double Wall Insulated V...,"KSh 1,889 - KSh 2,398","KSh 2,500 - KSh 3,800",37.0,/stainless-steel-double-wall-insulated-vacuum-...,"KSh 1,889","KSh 2,398","KSh 2,500","KSh 3,800"
1677,Always Unbreakable Vacuum Thermos Flask IN Sta...,"KSh 1,770 - KSh 2,350","KSh 2,300 - KSh 4,000",43.0,/always-unbreakable-vacuum-thermos-flask-in-st...,"KSh 1,770","KSh 2,350","KSh 2,300","KSh 4,000"
1680,Waterproof Matress Protector And Cover,"KSh 2,275 - KSh 2,650","KSh 4,000 - KSh 5,200",50.0,/generic-waterproof-matress-protector-and-cove...,"KSh 2,275","KSh 2,650","KSh 4,000","KSh 5,200"
1681,Kamisafe Emergency Backup LED Lamp,"KSh 1,399 - KSh 2,965","KSh 1,700 - KSh 3,500",43.0,/kamisafe-emergency-backup-led-lamp-29653629.html,"KSh 1,399","KSh 2,965","KSh 1,700","KSh 3,500"


In [9]:
ranged_items.drop(['Current price', 'Old price'], axis = 1, inplace=True)
ranged_items

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,48.0,/fashion-2024-mens-casual-high-top-shoes-runni...,"KSh 1,860","KSh 1,860","KSh 2,695","KSh 3,576"
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,46.0,/kojie-san-skin-lightening-soap-original-3pcs-...,KSh 320,KSh 923,KSh 320,"KSh 1,200"
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,42.0,/generic-crease-guard-shoe-protector-sneakers-...,KSh 454,KSh 499,KSh 789,KSh 789
94,Fashion Beautiful Girls Back To School Shoes L...,56.0,/fashion-beautiful-girls-back-to-school-shoes-...,"KSh 1,020","KSh 1,200","KSh 1,900","KSh 2,500"
95,1Pcs Brown Curtain WITHOUT Sheers,12.0,/generic-1pcs-brown-curtain-without-sheers-152...,KSh 990,"KSh 2,560","KSh 1,000","KSh 2,900"
...,...,...,...,...,...,...,...
1675,Always Stainless Steel Double Wall Insulated V...,37.0,/stainless-steel-double-wall-insulated-vacuum-...,"KSh 1,889","KSh 2,398","KSh 2,500","KSh 3,800"
1677,Always Unbreakable Vacuum Thermos Flask IN Sta...,43.0,/always-unbreakable-vacuum-thermos-flask-in-st...,"KSh 1,770","KSh 2,350","KSh 2,300","KSh 4,000"
1680,Waterproof Matress Protector And Cover,50.0,/generic-waterproof-matress-protector-and-cove...,"KSh 2,275","KSh 2,650","KSh 4,000","KSh 5,200"
1681,Kamisafe Emergency Backup LED Lamp,43.0,/kamisafe-emergency-backup-led-lamp-29653629.html,"KSh 1,399","KSh 2,965","KSh 1,700","KSh 3,500"


In [10]:
# removing KSh string
columns_to_change = ['price_start_value','price_end_value','old_price_start_value','old_price_end_value']
ranged_items[columns_to_change] = ranged_items[columns_to_change].replace('KSh', '', regex=True)
ranged_items.head(5)

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,48.0,/fashion-2024-mens-casual-high-top-shoes-runni...,1860,1860,2695,3576
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,46.0,/kojie-san-skin-lightening-soap-original-3pcs-...,320,923,320,1200
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,42.0,/generic-crease-guard-shoe-protector-sneakers-...,454,499,789,789
94,Fashion Beautiful Girls Back To School Shoes L...,56.0,/fashion-beautiful-girls-back-to-school-shoes-...,1020,1200,1900,2500
95,1Pcs Brown Curtain WITHOUT Sheers,12.0,/generic-1pcs-brown-curtain-without-sheers-152...,990,2560,1000,2900


In [11]:
# removing comma
columns_to_change = ['price_start_value','price_end_value','old_price_start_value','old_price_end_value']
ranged_items[columns_to_change] = ranged_items[columns_to_change].replace(',', '', regex=True)
ranged_items.head(5)

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,48.0,/fashion-2024-mens-casual-high-top-shoes-runni...,1860,1860,2695,3576
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,46.0,/kojie-san-skin-lightening-soap-original-3pcs-...,320,923,320,1200
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,42.0,/generic-crease-guard-shoe-protector-sneakers-...,454,499,789,789
94,Fashion Beautiful Girls Back To School Shoes L...,56.0,/fashion-beautiful-girls-back-to-school-shoes-...,1020,1200,1900,2500
95,1Pcs Brown Curtain WITHOUT Sheers,12.0,/generic-1pcs-brown-curtain-without-sheers-152...,990,2560,1000,2900


In [12]:
# converting the column to float
columns_to_change = ['price_start_value','price_end_value','old_price_start_value','old_price_end_value']
ranged_items[columns_to_change] = ranged_items[columns_to_change].astype(float)
ranged_items.info()

<class 'pandas.core.frame.DataFrame'>
Index: 203 entries, 59 to 1683
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Item name              203 non-null    object 
 1   Discount               203 non-null    float64
 2   Link                   203 non-null    object 
 3   price_start_value      203 non-null    float64
 4   price_end_value        203 non-null    float64
 5   old_price_start_value  203 non-null    float64
 6   old_price_end_value    203 non-null    float64
dtypes: float64(5), object(2)
memory usage: 12.7+ KB


In [13]:
ranged_items['current_price'] = (ranged_items['price_start_value'] + ranged_items['price_end_value'])/2
ranged_items['old_price'] = (ranged_items['old_price_start_value'] + ranged_items['old_price_end_value'])/2

ranged_items

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value,current_price,old_price
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,48.0,/fashion-2024-mens-casual-high-top-shoes-runni...,1860.0,1860.0,2695.0,3576.0,1860.0,3135.5
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,46.0,/kojie-san-skin-lightening-soap-original-3pcs-...,320.0,923.0,320.0,1200.0,621.5,760.0
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,42.0,/generic-crease-guard-shoe-protector-sneakers-...,454.0,499.0,789.0,789.0,476.5,789.0
94,Fashion Beautiful Girls Back To School Shoes L...,56.0,/fashion-beautiful-girls-back-to-school-shoes-...,1020.0,1200.0,1900.0,2500.0,1110.0,2200.0
95,1Pcs Brown Curtain WITHOUT Sheers,12.0,/generic-1pcs-brown-curtain-without-sheers-152...,990.0,2560.0,1000.0,2900.0,1775.0,1950.0
...,...,...,...,...,...,...,...,...,...
1675,Always Stainless Steel Double Wall Insulated V...,37.0,/stainless-steel-double-wall-insulated-vacuum-...,1889.0,2398.0,2500.0,3800.0,2143.5,3150.0
1677,Always Unbreakable Vacuum Thermos Flask IN Sta...,43.0,/always-unbreakable-vacuum-thermos-flask-in-st...,1770.0,2350.0,2300.0,4000.0,2060.0,3150.0
1680,Waterproof Matress Protector And Cover,50.0,/generic-waterproof-matress-protector-and-cove...,2275.0,2650.0,4000.0,5200.0,2462.5,4600.0
1681,Kamisafe Emergency Backup LED Lamp,43.0,/kamisafe-emergency-backup-led-lamp-29653629.html,1399.0,2965.0,1700.0,3500.0,2182.0,2600.0


In [14]:
ranged_items.head()

Unnamed: 0,Item name,Discount,Link,price_start_value,price_end_value,old_price_start_value,old_price_end_value,current_price,old_price
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,48.0,/fashion-2024-mens-casual-high-top-shoes-runni...,1860.0,1860.0,2695.0,3576.0,1860.0,3135.5
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,46.0,/kojie-san-skin-lightening-soap-original-3pcs-...,320.0,923.0,320.0,1200.0,621.5,760.0
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,42.0,/generic-crease-guard-shoe-protector-sneakers-...,454.0,499.0,789.0,789.0,476.5,789.0
94,Fashion Beautiful Girls Back To School Shoes L...,56.0,/fashion-beautiful-girls-back-to-school-shoes-...,1020.0,1200.0,1900.0,2500.0,1110.0,2200.0
95,1Pcs Brown Curtain WITHOUT Sheers,12.0,/generic-1pcs-brown-curtain-without-sheers-152...,990.0,2560.0,1000.0,2900.0,1775.0,1950.0


In [15]:
ranged_items.drop(columns = ['price_start_value','price_end_value','old_price_start_value', 'old_price_end_value'], inplace=True)
ranged_items.head()

Unnamed: 0,Item name,Discount,Link,current_price,old_price
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,48.0,/fashion-2024-mens-casual-high-top-shoes-runni...,1860.0,3135.5
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,46.0,/kojie-san-skin-lightening-soap-original-3pcs-...,621.5,760.0
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,42.0,/generic-crease-guard-shoe-protector-sneakers-...,476.5,789.0
94,Fashion Beautiful Girls Back To School Shoes L...,56.0,/fashion-beautiful-girls-back-to-school-shoes-...,1110.0,2200.0
95,1Pcs Brown Curtain WITHOUT Sheers,12.0,/generic-1pcs-brown-curtain-without-sheers-152...,1775.0,1950.0


In [16]:
ranged_items = ranged_items[['Item name','current_price','old_price','Discount','Link']]
ranged_items.rename(columns={'current_price':'Current price','old_price':'Old price'}, inplace=True)
ranged_items.head()

Unnamed: 0,Item name,Current price,Old price,Discount,Link
59,Fashion 2024 Mens Casual High-Top Shoes Runnin...,1860.0,3135.5,48.0,/fashion-2024-mens-casual-high-top-shoes-runni...
80,Kojic Acid Soap Body Skin Lightening Soap- 3pcs,621.5,760.0,46.0,/kojie-san-skin-lightening-soap-original-3pcs-...
83,Crease Guard Shoe Protector Sneakers Toe Caps ...,476.5,789.0,42.0,/generic-crease-guard-shoe-protector-sneakers-...
94,Fashion Beautiful Girls Back To School Shoes L...,1110.0,2200.0,56.0,/fashion-beautiful-girls-back-to-school-shoes-...
95,1Pcs Brown Curtain WITHOUT Sheers,1775.0,1950.0,12.0,/generic-1pcs-brown-curtain-without-sheers-152...


In [17]:
new_product_data.head()

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,NIVEA Perfect & Radiant Even Tone Day And Nigh...,KSh 999,"KSh 1,560",36.0,/nivea-perfect-radiant-even-tone-day-and-night...
1,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,KSh 949,"KSh 1,460",35.0,/nivea-radiant-beauty-advanced-care-lotion-for...
2,NIVEA Nourishing Cocoa Body Lotion With Cocoa ...,"KSh 1,174","KSh 1,302",10.0,/nivea-nourishing-cocoa-body-lotion-with-cocoa...
3,"NIVEA Pearl & Beauty Anti-Perspirant Rollon, 4...",KSh 728,"KSh 1,040",30.0,/nivea-pearl-beauty-anti-perspirant-rollon-48h...
4,NIVEA MEN Deep Antibacterial Anti-Perspirant R...,KSh 728,"KSh 1,040",30.0,/nivea-men-deep-antibacterial-anti-perspirant-...


In [18]:
columns = ['Current price', 'Old price']
new_product_data[columns] = new_product_data[columns].replace('KSh', '', regex=True)
new_product_data[columns] = new_product_data[columns].replace(',', '',regex=True)
new_product_data.head()

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,NIVEA Perfect & Radiant Even Tone Day And Nigh...,999,1560,36.0,/nivea-perfect-radiant-even-tone-day-and-night...
1,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,949,1460,35.0,/nivea-radiant-beauty-advanced-care-lotion-for...
2,NIVEA Nourishing Cocoa Body Lotion With Cocoa ...,1174,1302,10.0,/nivea-nourishing-cocoa-body-lotion-with-cocoa...
3,"NIVEA Pearl & Beauty Anti-Perspirant Rollon, 4...",728,1040,30.0,/nivea-pearl-beauty-anti-perspirant-rollon-48h...
4,NIVEA MEN Deep Antibacterial Anti-Perspirant R...,728,1040,30.0,/nivea-men-deep-antibacterial-anti-perspirant-...


In [19]:
new_product_data[columns] = new_product_data[columns].astype(float)
new_product_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1502 entries, 0 to 1704
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Item name      1502 non-null   object 
 1   Current price  1502 non-null   float64
 2   Old price      1502 non-null   float64
 3   Discount       1502 non-null   float64
 4   Link           1502 non-null   object 
dtypes: float64(3), object(2)
memory usage: 70.4+ KB


In [20]:
products = pd.concat([new_product_data,ranged_items], axis=0)
products.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1705 entries, 0 to 1683
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Item name      1705 non-null   object 
 1   Current price  1705 non-null   float64
 2   Old price      1705 non-null   float64
 3   Discount       1705 non-null   float64
 4   Link           1705 non-null   object 
dtypes: float64(3), object(2)
memory usage: 79.9+ KB


## Exploratory Data Analysis(EDA)

In [25]:
products.head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
0,NIVEA Perfect & Radiant Even Tone Day And Nigh...,999.0,1560.0,36.0,/nivea-perfect-radiant-even-tone-day-and-night...
1,NIVEA Radiant & Beauty Advanced Care Lotion Fo...,949.0,1460.0,35.0,/nivea-radiant-beauty-advanced-care-lotion-for...
2,NIVEA Nourishing Cocoa Body Lotion With Cocoa ...,1174.0,1302.0,10.0,/nivea-nourishing-cocoa-body-lotion-with-cocoa...
3,"NIVEA Pearl & Beauty Anti-Perspirant Rollon, 4...",728.0,1040.0,30.0,/nivea-pearl-beauty-anti-perspirant-rollon-48h...
4,NIVEA MEN Deep Antibacterial Anti-Perspirant R...,728.0,1040.0,30.0,/nivea-men-deep-antibacterial-anti-perspirant-...
5,NIVEA Pearl & Beauty Black Pearl Fine Fragranc...,728.0,1040.0,30.0,/nivea-pearl-beauty-black-pearl-fine-fragrance...
6,NIVEA Perfect & Radiant Luminous630 Anti Dark ...,3647.0,5610.0,35.0,/nivea-perfect-radiant-luminous630-anti-dark-m...
7,NIVEA Perfect & Radiant Even Tone Day Cream SP...,585.0,780.0,25.0,/nivea-perfect-radiant-even-tone-day-cream-spf...
8,NIVEA Q10 Power Anti-Wrinkle Day Cream 50ml & ...,2496.0,3840.0,35.0,/nivea-q10-power-anti-wrinkle-day-cream-50ml-n...
9,Epson EcoTank L3250 A4 WIRELESS Printer (All-i...,33999.0,38000.0,11.0,/epson-ecotank-l3250-a4-wireless-printer-all-i...


In [23]:
products.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1705 entries, 0 to 1683
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Item name      1705 non-null   object 
 1   Current price  1705 non-null   float64
 2   Old price      1705 non-null   float64
 3   Discount       1705 non-null   float64
 4   Link           1705 non-null   object 
dtypes: float64(3), object(2)
memory usage: 79.9+ KB


In [28]:
# 10 most expensive products
products.sort_values(by = 'Current price', ascending=False).head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
703,"Tecno Phantom V Flip 5G, 256GB + 8GB RAM (Sing...",97999.0,99999.0,2.0,/tecno-phantom-v-flip-5g-256gb-8gb-ram-single-...
429,"Ecoflow Portable Power Station RIVER 2 Pro, 76...",84999.0,106999.0,21.0,/ecoflow-portable-power-station-river-2-pro-76...
1097,Samsung Galaxy S21 Ultra 5G 6.8″ 128GB + 12GB ...,68900.0,78900.0,13.0,/galaxy-s21-ultra-5g-6.8-128gb-12gb-black-sams...
1157,Samsung Galaxy S21 Ultra 5G 6.8″ 128GB + 12GB ...,68900.0,78900.0,13.0,/galaxy-s21-ultra-5g-6.8-128gb-12gb-black-sams...
1340,"Oppo Reno10 5G, 6.7"", 8+256, 64MP, 3D Curved S...",56999.0,57999.0,2.0,/oppo-reno10-5g-6.7-8256-64mp-3d-curved-screen...
1258,"Oppo Reno 10 5G, 6.7'', 8GB RAM + 256GB, 64MP+...",54399.0,79999.0,32.0,/oppo-reno-10-5g-6.7-8gb-ram-256gb-64mp32gb8gb...
214,"Oppo Reno 10 5G, 6.7''l.. 8GB + 256GB, 64MP, (...",53500.0,70000.0,24.0,/reno-10-5g-6.7l..-8gb-256gb-64mp-dual-sim-500...
1186,"Samsung Galaxy S20 Ultra 6.9"" 5G 128GB SmartPh...",52900.0,82900.0,36.0,/galaxy-s20-ultra-6.9-5g-128gb-smartphones-bla...
639,"Vitron 55"" Inch Smart-Android,Bluetooth TV,FRA...",47999.0,90000.0,47.0,/55-inch-smart-androidbluetooth-tvframelessult...
1514,"Vitron 55"" UHD Frameless 4K Android TV+FREE AC...",47990.0,55000.0,13.0,/vitron-55-uhd-frameless-4k-android-tvfree-acc...


In [29]:
# 10 cheapest products
products.sort_values(by = 'Current price', ascending=True).head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
1422,Portable Mini OTG Adapter Type-C To USB Conver...,43.0,86.0,50.0,/generic-portable-mini-otg-adapter-type-c-to-u...
1103,2Pcs High Speed Male to Female Type-C to USB O...,51.0,102.0,50.0,/2pcs-high-speed-male-to-female-type-c-to-usb-...
1163,2Pcs High Speed Male to Female Type-C to USB O...,51.0,102.0,50.0,/2pcs-high-speed-male-to-female-type-c-to-usb-...
874,Type-C OTG Adapter Connector Fast Charging Alu...,62.0,124.0,50.0,/generic-type-c-otg-adapter-connector-fast-cha...
1606,Fashion Vintage Turtle Charm Handmade Woven An...,75.0,150.0,50.0,/fashion-vintage-turtle-charm-handmade-woven-a...
921,Fashion Moon Shape Star Pendant Alloy Luxury C...,78.0,156.0,50.0,/fashion-moon-shape-star-pendant-alloy-luxury-...
79,Fashion Women Faux Pearl Ear Studs Earrings We...,82.0,164.0,50.0,/fashion-women-faux-pearl-ear-studs-earrings-w...
906,Fashion Women Faux Pearl Ear Studs Earrings We...,82.0,164.0,50.0,/fashion-women-faux-pearl-ear-studs-earrings-w...
350,Fashion Love Heart Pendant Chain Necklace Wome...,83.0,165.0,50.0,/fashion-love-heart-pendant-chain-necklace-wom...
424,Silicone BBQ Sauce Oil Brush Handle Cake Butte...,90.0,180.0,50.0,/generic-silicone-bbq-sauce-oil-brush-handle-c...


In [31]:
# 10 prices with the highest discount
products.sort_values(by = 'Discount', ascending=False).head(10)

Unnamed: 0,Item name,Current price,Old price,Discount,Link
232,Winner Transfer Laser Light 5 Sheets Transfer ...,99.0,468.0,79.0,/winner-transfer-laser-light-5-sheets-transfer...
1502,Fashion Back To School Shoes -Quality Genuine ...,939.0,1825.0,70.0,/back-to-school-shoes-quality-genuine-leather-...
1550,FreshLook Beauty Eye Contact Lens,949.0,1500.0,60.0,/beauty-eye-contact-lens-freshlook-mpg420487.html
372,Fashion Men Glossy Casual Brogue Official Leat...,2540.0,5999.0,58.0,/fashion-men-glossy-casual-brogue-official-lea...
94,Fashion Beautiful Girls Back To School Shoes L...,1110.0,2200.0,56.0,/fashion-beautiful-girls-back-to-school-shoes-...
1510,L.A. Girl High Definition Concealer Pro Concea...,209.0,450.0,54.0,/l.a.-girl-high-definition-concealer-pro-conce...
461,Kids Fleece Blankets/Throw Blankets,699.0,1500.0,53.0,/generic-kids-fleece-blanketsthrow-blankets-15...
653,Oraimo Trimmer With 4 Guided Combs,1650.0,3499.0,53.0,/oraimo-trimmer-with-4-guided-combs-71964194.html
1166,"Safety Jogger Boots - Oil, H20, & Pierce Proof",2799.0,6000.0,53.0,/generic-safety-jogger-boots-oil-h20-pierce-pr...
1106,"Safety Jogger Boots - Oil, H20, & Pierce Proof",2799.0,6000.0,53.0,/generic-safety-jogger-boots-oil-h20-pierce-pr...
