# Importing the libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Reading the file
df = pd.read_csv('smartphones data.csv')

In [2]:
# Checking the shape of dataframe
df.shape

(1020, 11)

In [3]:
# Reading first 4 rows of the dataframe
df.head(4)

Unnamed: 0,model,price,rating,sim,processor,ram,battery,display,camera,card,os
0,OnePlus 11 5G,"₹54,999",89.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13
1,OnePlus Nord CE 2 Lite 5G,"₹19,989",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12
2,Samsung Galaxy A14 5G,"₹16,499",75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13
3,Motorola Moto G62 5G,"₹14,999",81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12


# Data cleaning

### Quality Issues

1. **model** - some brands are written diiferently like OPPO in model column `consistency`
2. **price** - has unneccesary '₹' symbol and has comma between numbers and phone Namotel at index 608 has a price of 99 `Done`
4. **ratings** - missing values `completeness`
5. **processor** - has some incorrect values for some samsung phones(row # -642,647,649,659,667,701,750,759,819,859,883,884,919,927,929,932,1002) `validity`
6. There is ipod on row 756 `validity`
7. **memory** - incorrect values in rows (441,485,534,553,584,610,613,642,647,649,659,667,701,750,759,819,859,884,919,927,929,932,990,1002) `validity`
8. **battery** - incorrect values in rows(113,151,309,365,378,441,450,553,584,610,613,630,642,647,649,659,667,701,750,756,759,764,819,855,859,884,915,916,927,929,932,990,1002) `validity`
9. **display** - sometimes frequency is not available `completeness`
10. **display** - incorrect values in rows(378,441,450,553,584,610,613,630,642,647,649,659,667,701,750,759,764,819,859,884,915,916,927,929,932,990,1002) `validity`
11. certain phones are foldable and the info is scattered `validity`
12. **camera** - words like Dual, Triple and Quad are used to represent number of cameras and front and rear cameras are separated by '&'
13. **camera** - problem with rows (100,113,151,157,161,238,273,308,309,323,324,365,367,378,394,441,450,484,506,534,553,571,572,575,584,610,613,615,630,642,647,649,659,667,684,687,705,711,723,728,750,756,759,764,792,819,846,854,855,858,883,884,896,915,916,927,929,932,945,956,990,995,1002,1016
) `validity`
14. **card** - sometimes contains info about os and camera `validity`
15. **os** - sometimes contains info about bluetooth and fm radio `validity`
16. **os** - issue with rows (324,378) `validity`
17. **os** - sometimes contains os version name like lollipop `consistency`
18. missing values in camera, card and os `completeness`
19. datatype  of price and rating is incorrect `validity`

In [4]:
# let's remove the Rupee symbol and commas in price feature
def treat_price(value):
    rs = value[1:]
    rs = int(rs.replace(",",""))
    return rs

# Applying the function on price feature 
df['price'] = df['price'].apply(treat_price)

# Removing the row with price = 99, because it is not possible
df.drop(index = 608,inplace=True)

In [8]:
df.head(5)

Unnamed: 0,model,price,rating,sim,processor,ram,battery,display,camera,card,os,Has_5G,Has_NFC
0,OnePlus 11 5G,54999,89.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi, NFC","Snapdragon 8 Gen2, Octa Core, 3.2 GHz Processor","12 GB RAM, 256 GB inbuilt",5000 mAh Battery with 100W Fast Charging,"6.7 inches, 1440 x 3216 px, 120 Hz Display wit...",50 MP + 48 MP + 32 MP Triple Rear & 16 MP Fron...,Memory Card Not Supported,Android v13,1,1
1,OnePlus Nord CE 2 Lite 5G,19989,81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 33W Fast Charging,"6.59 inches, 1080 x 2412 px, 120 Hz Display wi...",64 MP + 2 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,0
2,Samsung Galaxy A14 5G,16499,75.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Exynos 1330, Octa Core, 2.4 GHz Processor","4 GB RAM, 64 GB inbuilt",5000 mAh Battery with 15W Fast Charging,"6.6 inches, 1080 x 2408 px, 90 Hz Display with...",50 MP + 2 MP + 2 MP Triple Rear & 13 MP Front ...,"Memory Card Supported, upto 1 TB",Android v13,1,0
3,Motorola Moto G62 5G,14999,81.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Snapdragon 695, Octa Core, 2.2 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with Fast Charging,"6.55 inches, 1080 x 2400 px, 120 Hz Display wi...",50 MP + 8 MP + 2 MP Triple Rear & 16 MP Front ...,"Memory Card (Hybrid), upto 1 TB",Android v12,1,0
4,Realme 10 Pro Plus,24999,82.0,"Dual Sim, 3G, 4G, 5G, VoLTE, Wi-Fi","Dimensity 1080, Octa Core, 2.6 GHz Processor","6 GB RAM, 128 GB inbuilt",5000 mAh Battery with 67W Fast Charging,"6.7 inches, 1080 x 2412 px, 120 Hz Display wit...",108 MP + 8 MP + 2 MP Triple Rear & 16 MP Front...,Memory Card Not Supported,Android v13,1,0


Now we will create 3 features out of sim feature { Has_5g , Has_dual_Sim, Is_NFC } 

- NFC stands for Near Field Communication, which is a short-range wireless communication technology used in smartphones and other devices. NFC allows devices to exchange data with each other when they are close together, typically within a few centimeters. It operates at a frequency of 13.56 MHz and is designed for low-power, high-speed communication.

In [6]:
# Let's create Has_5g feature
Has_5G = []
for value in df['sim']:
    value = value.split(',')
    if ' 5G' in value:
        Has_5G.append(1)
    else:
        Has_5G.append(0)
        
df['Has_5G'] = Has_5G

In [7]:
# Let's create a feature Has_NFC 
Has_NFC = []
for value in df['sim']:
    value = value.split(',')
    if ' NFC' in value:
        Has_NFC.append(1)
    else:
        Has_NFC.append(0)
        
df['Has_NFC'] = Has_NFC

In [51]:
network

[[' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' VoLTE'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' VoLTE'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' VoLTE'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G', ' 4G', ' 5G'],
 [' 3G'

In [37]:
network = pd.Series(network)
network

0       [ 3G,  4G,  5G]
1       [ 3G,  4G,  5G]
2       [ 3G,  4G,  5G]
3       [ 3G,  4G,  5G]
4       [ 3G,  4G,  5G]
             ...       
1014    [ 3G,  4G,  5G]
1015    [ 3G,  4G,  5G]
1016    [ 3G,  4G,  5G]
1017    [ 3G,  4G,  5G]
1018    [ 3G,  4G,  5G]
Length: 1019, dtype: object

## Data Assessing





### Tidiness Issues

1. **sim** - can be split into 3 cols has_5g, has_NFC, has_IR_Blaster
2. **ram** - can be split into 2 cols RAM and ROM
3. **processor** - can be split into processor name, cores and cpu speed.
4. **battery** - can be split into battery capacity, fast_charging_available
5. **display** - can be split into size, resolution_width, resolution_height and frequency
6. **camera** - can be split into front and rear camera
7. **card** - can be split into supported, extended_upto