# ECommerce Product Price Analysis

Objective: To predict the sales price of a product based on various features
Dataset: Scraped from Shopee
Product: Shin Ramen

## Import Libraries & Dataset

In [1]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

## Display all columns of the dataframe
pd.pandas.set_option('display.max_columns',None)

In [27]:
df = pd.read_csv('raw_shin ramen_shopee.csv')

# Show rows and columns
print(df.shape)

(500, 13)


In [28]:
df.head()

Unnamed: 0,prod_name,prod_desc,prod_price,prod_rating,prod_no_rating,prod_no_sold,sell_name,sell_rating,sell_no_products,sell_resprate,sell_resptime,sell_follower,sell_joined
0,Preferred+\n【Ready Stock 】 [Halal] NongShim Sh...,,RM12.50,5.0,5.0,421,49shopp,23.5k,411,98%,within hours,3 years ago,9.8k
1,Halal Nongshim Ramen (Shin Ramyun / Kimchi Ram...,Expiry date :\nShin ramyun - December 2021\nKi...,RM15.90 - RM48.00,4.9,4.9,68,newlineresources.online,2.1k,57,81%,within hours,12 months ago,1.2k
2,Halal Nongshim Shin Ramyun/Kimchi Ramen/ neogu...,"As part of the marketing strategies, Nongshim ...",RM63.50,,0.0,0,newlineresources.online,2.1k,57,81%,within hours,12 months ago,1.2k
3,Preferred+\nSHIN • KOREA KOREAN RAMEN POT ○ YE...,**Get Korean Spoon & Chopsticks at Only RM10 w...,RM13.80 - RM69.80,4.9,4.9,788,imshin.,25.2k,421,99%,within hours,5 years ago,25.4k
4,Preferred+\n【Ready Stock】 (HALAL) Korea Nongsh...,1. Nongshim Shin Ramyun Bowl (117g)\n- Octobe...,RM4.50,5.0,5.0,967,49shopp,23.5k,411,98%,within hours,3 years ago,9.8k


## Data Cleaning

In [29]:
# Make a list of features which has missing values
features_with_na = [features for features in df.columns if df[features].isnull().sum()>1]

# Print the feature name and percentage of missing values
for feature in features_with_na: 
    print(feature,np.round(df[feature].isnull().mean(),4), '% missing values')

prod_desc 0.004 % missing values
prod_rating 0.71 % missing values


In [30]:
# Find out the data types of each columns
df.dtypes

prod_name            object
prod_desc            object
prod_price           object
prod_rating         float64
prod_no_rating      float64
prod_no_sold         object
sell_name            object
sell_rating          object
sell_no_products     object
sell_resprate        object
sell_resptime        object
sell_follower        object
sell_joined          object
dtype: object

In [31]:
# Swap names for sell_follower with sell_joined
df[['sell_follower','sell_joined']] = df[['sell_joined','sell_follower']]

In [35]:
def value_to_float(x):
    if 'k' in x:
        return int(float(x.replace('k','')))*1000
    return int(x)

In [36]:
def col_value_to_int(df,col_lst):
    for col in col_lst:
        df[col] = df[col].apply(value_to_int)

In [37]:
col_list = ['sell_rating','sell_no_products','sell_follower']
col_value_to_float(df,col_list)

In [38]:
df.head()

Unnamed: 0,prod_name,prod_desc,prod_price,prod_rating,prod_no_rating,prod_no_sold,sell_name,sell_rating,sell_no_products,sell_resprate,sell_resptime,sell_follower,sell_joined
0,Preferred+\n【Ready Stock 】 [Halal] NongShim Sh...,,RM12.50,5.0,5.0,421,49shopp,23000,411,98%,within hours,9000,3 years ago
1,Halal Nongshim Ramen (Shin Ramyun / Kimchi Ram...,Expiry date :\nShin ramyun - December 2021\nKi...,RM15.90 - RM48.00,4.9,4.9,68,newlineresources.online,2000,57,81%,within hours,1000,12 months ago
2,Halal Nongshim Shin Ramyun/Kimchi Ramen/ neogu...,"As part of the marketing strategies, Nongshim ...",RM63.50,,0.0,0,newlineresources.online,2000,57,81%,within hours,1000,12 months ago
3,Preferred+\nSHIN • KOREA KOREAN RAMEN POT ○ YE...,**Get Korean Spoon & Chopsticks at Only RM10 w...,RM13.80 - RM69.80,4.9,4.9,788,imshin.,25000,421,99%,within hours,25000,5 years ago
4,Preferred+\n【Ready Stock】 (HALAL) Korea Nongsh...,1. Nongshim Shin Ramyun Bowl (117g)\n- Octobe...,RM4.50,5.0,5.0,967,49shopp,23000,411,98%,within hours,9000,3 years ago


In [50]:
a = 'RM1.00 - RM5.00'
a.split("-")

['RM1.00']

In [43]:
## Transform days/months/years 
# Assume a month has 30 days for all 
def to_days(x):
    x = x.lower()
    temp_list = x.split()
    if 'year' in x:
        return int(temp_list[0])*12*30
    elif 'month' in x:
        return int(temp_list[0])*30
    else:
        return int(temp_list[0])

In [44]:
df['sell_joined'] = df['sell_joined'].apply(to_days)

In [47]:
# Turn percentage into actual number
df['sell_resprate'] = df['sell_resprate'].str.rstrip('%').astype('float')/100.0

In [54]:
## Clean price column

# Remove the currency
df['prod_price'] = df['prod_price'].apply(lambda x: x.replace('RM',''))

# Only take the minimum price if got range
df['prod_price'] = df['prod_price'].apply(lambda x: int(x.split('-')[0])


Unnamed: 0,prod_name,prod_desc,prod_price,prod_rating,prod_no_rating,prod_no_sold,sell_name,sell_rating,sell_no_products,sell_resprate,sell_resptime,sell_follower,sell_joined
0,Preferred+\n【Ready Stock 】 [Halal] NongShim Sh...,,12.50,5.0,5.0,421,49shopp,23000,411,0.98,within hours,9000,1080
1,Halal Nongshim Ramen (Shin Ramyun / Kimchi Ram...,Expiry date :\nShin ramyun - December 2021\nKi...,15.90 - 48.00,4.9,4.9,68,newlineresources.online,2000,57,0.81,within hours,1000,360
2,Halal Nongshim Shin Ramyun/Kimchi Ramen/ neogu...,"As part of the marketing strategies, Nongshim ...",63.50,,0.0,0,newlineresources.online,2000,57,0.81,within hours,1000,360
3,Preferred+\nSHIN • KOREA KOREAN RAMEN POT ○ YE...,**Get Korean Spoon & Chopsticks at Only RM10 w...,13.80 - 69.80,4.9,4.9,788,imshin.,25000,421,0.99,within hours,25000,1800
4,Preferred+\n【Ready Stock】 (HALAL) Korea Nongsh...,1. Nongshim Shin Ramyun Bowl (117g)\n- Octobe...,4.50,5.0,5.0,967,49shopp,23000,411,0.98,within hours,9000,1080
