In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
pd.set_option('max.columns',None)
import numpy as np
import warnings
warnings.filterwarnings('ignore')

### Loading Data

In [95]:
data = pd.read_excel('Datasets/Modified_carvago_data.xlsx')
data.rename(columns={"Car Name": "Car_Name", "Kms driven": "Kms_driven",'First registration':'First_registration','Fuel type':'Fuel_type','Drive type':'Drive_type'},inplace=True)


### Initial Data Analysis

In [96]:
data.shape

(10020, 17)

In [97]:
data.dtypes

Index                    int64
Car_Name                object
Kms_driven              object
First_registration      object
Power                   object
Transmission            object
Fuel_type               object
Vendor                  object
Ratings                float64
Location                object
Price in Euro           object
Price without Vat       object
With or without Tax     object
Drive_type              object
Features_1              object
Features_2              object
Features_3              object
dtype: object

In [98]:
data.isna().sum()

Index                     0
Car_Name                  0
Kms_driven                0
First_registration        0
Power                     0
Transmission              0
Fuel_type               422
Vendor                    0
Ratings                3562
Location                  0
Price in Euro             1
Price without Vat         1
With or without Tax    4376
Drive_type             7430
Features_1              810
Features_2              951
Features_3             4564
dtype: int64

In [100]:
data[data['Kms_driven'].str.endswith(' km')].shape,data[data['Kms_driven'].str.endswith(' km')==False].shape

((9975, 17), (45, 17))

In [101]:
data[data['Kms_driven'].str.endswith(' km')==False]

Unnamed: 0,Index,Car_Name,Kms_driven,First_registration,Power,Transmission,Fuel_type,Vendor,Ratings,Location,Price in Euro,Price without Vat,With or without Tax,Drive_type,Features_1,Features_2,Features_3
535,535,Volkswagen T-Roc TSI 110 kW,1/2022,110 kW,Automatic,Petrol,,Dealership,,Poland,23 249 €,19 214 €,without 21% VAT,,Parking assist system self-steering,Digital cockpit,
570,570,Hyundai i30 118 kW,1/2022,118 kW,Manual,Petrol,,Dealership,,Poland,21 399 €,17 685 €,without 21% VAT,,Heated front seats,Android auto,Bluetooth
591,591,Audi Q2 35 TFSI S 110 kW,1/2022,110 kW,Automatic,Petrol,,Dealership,,Poland,31 099 €,25 702 €,without 21% VAT,,Digital cockpit,Automatic A/C,
626,626,Audi Q5 40 TDI Quattro S tronic 150 kW,1/2022,150 kW,Automatic,Diesel,4x4,Dealership,,Poland,49 349 €,40 784 €,without 21% VAT,,Adaptive cruise control,Bang & Olufsen audio,Bluetooth
633,633,Audi Q5 40 TDI Quattro S tronic 150 kW,1/2022,150 kW,Automatic,Diesel,4x4,Dealership,,Poland,53 649 €,44 338 €,without 21% VAT,,Automatic A/C,Alarm,USB
777,777,Audi Q2 35 TFSI S 110 kW,1/2022,110 kW,Automatic,Petrol,,Dealership,,Poland,31 749 €,26 239 €,without 21% VAT,,Digital cockpit,Automatic A/C,
816,816,Audi A4 150 kW,1/2022,150 kW,Automatic,Petrol,4x4,Dealership,,Poland,47 049 €,38 883 €,without 21% VAT,,LED headlights,Digital cockpit,Navigation system
860,860,Hyundai i30 118 kW,1/2022,118 kW,Manual,Petrol,,Dealership,,Poland,22 099 €,18 264 €,without 21% VAT,,Heated front seats,Android auto,Bluetooth
1171,1171,Audi Q5 40 TDI Quattro S tronic 150 kW,1/2022,150 kW,Automatic,Diesel,4x4,Dealership,,Poland,55 699 €,46 032 €,without 21% VAT,,LED headlights,Adaptive cruise control,Bang & Olufsen audio
1177,1177,BMW X1 110 kW,2/2022,110 kW,Automatic,Diesel,,Dealership,4.5,Italy,38 099 €,Not deductible,,,,,


### Handling misplaced values

In [102]:
            
def get_feature_name(value):
    if value != np.nan:
        if '/' in value:
            if value.split('/')[0].isnumeric() and value.split('/')[1].isnumeric():
                return 'First_registration'
        elif 'kW' in value:
            return 'Power'
        elif value in ['Automatic', 'Manual']:
            return 'Transmission'
        elif value in ['Diesel','Petrol','Electric','CNG','Hydrogen','LPG','Other fuel type','Hybrid']:
            return 'Fuel_type'
        elif value in ['2x4','4x4']:
            return 'Drive_type'
        elif 'km' in value:
            return 'Kms_driven'
            
    else :
        return 'none'

In [103]:
columns = ['Kms_driven','First_registration','Power','Transmission','Fuel_type','Drive_type']

In [104]:
value = '' 
name = ''


def update_value(val,f_name,idx):
    #print(data.loc[idx,f_name])
    if str(data.loc[idx,f_name]) != 'nan':
        global value
        value = data.loc[idx,f_name]
    else:
        value = ''
    data.loc[idx,f_name] = val
    return


for i in data.values:
    skip = 0
    for c in range(0,len(columns)):
        if c == 0 or skip==1:
            if str(data.loc[i[0],columns[c]])!='nan':
                value = str(data.loc[i[0],columns[c]])
                skip = 0
            elif str(data.loc[i[0],columns[c]])=='nan':
                if name==columns[c]:
                    data.loc[i[0],columns[c]] = value
                else:
                    continue
                
        if len(value)==0:
            continue
        name = get_feature_name(value)
        if name == columns[c]:
            skip = 1
            continue
        v = value
        n = name
        #print(v,n,i[0])
        update_value(v,n,i[0])


In [105]:
data[data['Kms_driven'].str.endswith(' km')==False]

Unnamed: 0,Index,Car_Name,Kms_driven,First_registration,Power,Transmission,Fuel_type,Vendor,Ratings,Location,Price in Euro,Price without Vat,With or without Tax,Drive_type,Features_1,Features_2,Features_3
535,535,Volkswagen T-Roc TSI 110 kW,1/2022,1/2022,110 kW,Automatic,Petrol,Dealership,,Poland,23 249 €,19 214 €,without 21% VAT,,Parking assist system self-steering,Digital cockpit,
570,570,Hyundai i30 118 kW,1/2022,1/2022,118 kW,Manual,Petrol,Dealership,,Poland,21 399 €,17 685 €,without 21% VAT,,Heated front seats,Android auto,Bluetooth
591,591,Audi Q2 35 TFSI S 110 kW,1/2022,1/2022,110 kW,Automatic,Petrol,Dealership,,Poland,31 099 €,25 702 €,without 21% VAT,,Digital cockpit,Automatic A/C,
626,626,Audi Q5 40 TDI Quattro S tronic 150 kW,1/2022,1/2022,150 kW,Automatic,Diesel,Dealership,,Poland,49 349 €,40 784 €,without 21% VAT,4x4,Adaptive cruise control,Bang & Olufsen audio,Bluetooth
633,633,Audi Q5 40 TDI Quattro S tronic 150 kW,1/2022,1/2022,150 kW,Automatic,Diesel,Dealership,,Poland,53 649 €,44 338 €,without 21% VAT,4x4,Automatic A/C,Alarm,USB
777,777,Audi Q2 35 TFSI S 110 kW,1/2022,1/2022,110 kW,Automatic,Petrol,Dealership,,Poland,31 749 €,26 239 €,without 21% VAT,,Digital cockpit,Automatic A/C,
816,816,Audi A4 150 kW,1/2022,1/2022,150 kW,Automatic,Petrol,Dealership,,Poland,47 049 €,38 883 €,without 21% VAT,4x4,LED headlights,Digital cockpit,Navigation system
860,860,Hyundai i30 118 kW,1/2022,1/2022,118 kW,Manual,Petrol,Dealership,,Poland,22 099 €,18 264 €,without 21% VAT,,Heated front seats,Android auto,Bluetooth
1171,1171,Audi Q5 40 TDI Quattro S tronic 150 kW,1/2022,1/2022,150 kW,Automatic,Diesel,Dealership,,Poland,55 699 €,46 032 €,without 21% VAT,4x4,LED headlights,Adaptive cruise control,Bang & Olufsen audio
1177,1177,BMW X1 110 kW,2/2022,2/2022,110 kW,Automatic,Diesel,Dealership,4.5,Italy,38 099 €,Not deductible,,,,,


### Cleaning Kms_driven

In [106]:
data['Kms_driven'] = data['Kms_driven'].apply(lambda x:x.strip())
data['Kms_driven'] = data['Kms_driven'].apply(lambda x: x[:-3] if x[-2:]=='km' else x)
data['Kms_driven'] = data['Kms_driven'].apply(lambda x:x.replace('\xa0',''))
data['Kms_driven'] = data['Kms_driven'].apply(lambda x:x.strip())

In [107]:
data[data['Kms_driven'].str.contains(r'[@#&$%+-/*]')]

Unnamed: 0,Index,Car_Name,Kms_driven,First_registration,Power,Transmission,Fuel_type,Vendor,Ratings,Location,Price in Euro,Price without Vat,With or without Tax,Drive_type,Features_1,Features_2,Features_3
535,535,Volkswagen T-Roc TSI 110 kW,1/2022,1/2022,110 kW,Automatic,Petrol,Dealership,,Poland,23 249 €,19 214 €,without 21% VAT,,Parking assist system self-steering,Digital cockpit,
570,570,Hyundai i30 118 kW,1/2022,1/2022,118 kW,Manual,Petrol,Dealership,,Poland,21 399 €,17 685 €,without 21% VAT,,Heated front seats,Android auto,Bluetooth
591,591,Audi Q2 35 TFSI S 110 kW,1/2022,1/2022,110 kW,Automatic,Petrol,Dealership,,Poland,31 099 €,25 702 €,without 21% VAT,,Digital cockpit,Automatic A/C,
626,626,Audi Q5 40 TDI Quattro S tronic 150 kW,1/2022,1/2022,150 kW,Automatic,Diesel,Dealership,,Poland,49 349 €,40 784 €,without 21% VAT,4x4,Adaptive cruise control,Bang & Olufsen audio,Bluetooth
633,633,Audi Q5 40 TDI Quattro S tronic 150 kW,1/2022,1/2022,150 kW,Automatic,Diesel,Dealership,,Poland,53 649 €,44 338 €,without 21% VAT,4x4,Automatic A/C,Alarm,USB
777,777,Audi Q2 35 TFSI S 110 kW,1/2022,1/2022,110 kW,Automatic,Petrol,Dealership,,Poland,31 749 €,26 239 €,without 21% VAT,,Digital cockpit,Automatic A/C,
816,816,Audi A4 150 kW,1/2022,1/2022,150 kW,Automatic,Petrol,Dealership,,Poland,47 049 €,38 883 €,without 21% VAT,4x4,LED headlights,Digital cockpit,Navigation system
860,860,Hyundai i30 118 kW,1/2022,1/2022,118 kW,Manual,Petrol,Dealership,,Poland,22 099 €,18 264 €,without 21% VAT,,Heated front seats,Android auto,Bluetooth
1171,1171,Audi Q5 40 TDI Quattro S tronic 150 kW,1/2022,1/2022,150 kW,Automatic,Diesel,Dealership,,Poland,55 699 €,46 032 €,without 21% VAT,4x4,LED headlights,Adaptive cruise control,Bang & Olufsen audio
1177,1177,BMW X1 110 kW,2/2022,2/2022,110 kW,Automatic,Diesel,Dealership,4.5,Italy,38 099 €,Not deductible,,,,,


### Imputing Kms_driven with mean

In [108]:
selrows = data[data['Kms_driven'].str.contains(r'[@#&$%+-/*]')]
mean_km = data.drop(selrows.index)['Kms_driven'].astype(float).mean()
mean_km = str(round(mean_km,0))
mean_km

'29275.0'

In [109]:
data.query("Kms_driven == '29275.0'")

Unnamed: 0,Index,Car_Name,Kms_driven,First_registration,Power,Transmission,Fuel_type,Vendor,Ratings,Location,Price in Euro,Price without Vat,With or without Tax,Drive_type,Features_1,Features_2,Features_3


In [110]:
count = 0
for i in selrows.index:
    data.loc[i,'Kms_driven'] = '29275.0'
    count = count+1

print(count)

45


In [111]:
data['Kms_driven'] = data['Kms_driven'].astype(float)

### First_registration

In [112]:
data[data['First_registration'].str.contains(r'[@#&$%+-/*]')].shape

(10020, 17)

In [113]:
data['First_registration'] = pd.to_datetime(data['First_registration'],format='%m/%Y')

### cleaning Power feature

In [114]:
data['Power'] = data['Power'].apply(lambda x:x.strip())
data['Power'] = data['Power'].apply(lambda x: x[:-3] if x[-2:]=='kW' else x)
data['Power'] = data['Power'].apply(lambda x:x.strip())

### Getting missing value from car name

In [115]:
for i in data[data['Power'].str.isalpha()].values:
            data.loc[i[0],'Power'] = i[1].split()[-2]

In [116]:
data[data['Power'].str.isalpha()]

Unnamed: 0,Index,Car_Name,Kms_driven,First_registration,Power,Transmission,Fuel_type,Vendor,Ratings,Location,Price in Euro,Price without Vat,With or without Tax,Drive_type,Features_1,Features_2,Features_3


In [117]:
data['Power'].unique()

array(['140', '147', '145', '80', '110', '240', '55', '115', '113', '143',
       '202', '85', '81', '100', '169', '177', '150', '75', '190', '49',
       '118', '210', '74', '155', '250', '117', '76', '86', '66', '92',
       '412', '50', '88', '152', '33', '180', '129', '221', '215', '93',
       '53', '195', '335', '128', '135', '146', '149', '73', '165', '125',
       '103', '184', '241', '127', '61', '96', '97', '90', '111', '163',
       '213', '84', '170', '196', '82', '121', '239', '114', '141', '132',
       '154', '200', '64', '133', '70', '142', '179', '87', '68', '182',
       '214', '107', '112', '48', '178', '191', '131', '120', '280', '78',
       '157', '259', '185', '235', '72', '320', '176', '134', '44', '186',
       '116', '294', '279', '410', '301', '62', '260', '430', '60', '77',
       '295', '309', '331', '208', '228', '245', '130', '54', '386', '51',
       '160', '270', '89', '105', '225', '71', '450', '162', '287', '217',
       '69', '230', '300', '275', '20

In [118]:
data['Power'] = data['Power'].astype(int)

### Transmission

In [27]:
data['Transmission'].unique()

array(['Automatic', 'Manual'], dtype=object)

### Fuel_type

In [28]:
data['Fuel_type'].unique()

array(['Diesel', 'Hybrid', 'Petrol', 'Electric', 'LPG', 'Other fuel type',
       'CNG', 'Hydrogen'], dtype=object)

### Vendor

In [29]:
data['Vendor'].unique()

array(['Dealership', 'Used cars seller'], dtype=object)

### Deleting Ratings and index

In [119]:
del data['Ratings']

In [120]:
del data['Index']
data.reset_index(drop=True,inplace=True)
data

Unnamed: 0,Car_Name,Kms_driven,First_registration,Power,Transmission,Fuel_type,Vendor,Location,Price in Euro,Price without Vat,With or without Tax,Drive_type,Features_1,Features_2,Features_3
0,BMW 520 d 140 kW,32125.0,2019-01-01,140,Automatic,Diesel,Dealership,Germany,32 899 €,27 189 €,without 21% VAT,,Parking assist system self-steering,LED headlights,
1,Kia Sorento 2.2 CRDi AWD 147 kW,63400.0,2018-07-01,147,Automatic,Diesel,Dealership,Germany,34 649 €,28 636 €,without 21% VAT,4x4,Ventilated front seats,Parking assist system self-steering,
2,Opel Grandland X 147 kW,2700.0,2020-06-01,147,Automatic,Hybrid,Dealership,Italy,35 349 €,29 214 €,without 21% VAT,4x4,LED headlights,Electric adjustable front seats,
3,BMW 320 d 140 kW,26145.0,2021-03-01,140,Automatic,Diesel,Dealership,Germany,39 349 €,32 520 €,without 21% VAT,,Parking assist system self-steering,LED headlights,
4,BMW 320 d Sport Line 140 kW,28199.0,2021-02-01,140,Automatic,Diesel,Dealership,Germany,40 099 €,33 140 €,without 21% VAT,,Parking assist system self-steering,LED headlights,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10015,BMW 340 Gran Turismo i 240 kW,47800.0,2018-11-01,240,Automatic,Petrol,Dealership,Germany,37 249 €,30 784 €,without 21% VAT,,LED headlights,Keyless entry,Electric adjustable front seats
10016,Volvo V60 Momentum Pro 120 kW,16500.0,2021-04-01,120,Automatic,Petrol,Dealership,Germany,37 249 €,30 784 €,without 21% VAT,,Heated rear seats,LED headlights,Digital cockpit
10017,Audi S3 2.0 TFSI Quattro 228 kW,33002.0,2018-07-01,228,Automatic,Petrol,Dealership,Germany,40 099 €,33 140 €,without 21% VAT,4x4,LED headlights,Keyless entry,Heated front seats
10018,Mercedes-Benz GLC 350 d 4Matic Coupe 190 kW,63000.0,2018-11-01,190,Automatic,Diesel,Used cars seller,Germany,42 549 €,35 164 €,without 21% VAT,4x4,LED headlights,Keyless entry,Electric adjustable front seats


### Cleaning Price_Euro

In [121]:
data.rename(columns={"Price in Euro": "Price_Euro"},inplace=True)
data = data[data['Price_Euro'].notna()]
data.reset_index(drop=True,inplace=True)

In [122]:
data['Temp_price'] = data['Price_Euro']

In [123]:
for i in range(0,data.shape[0]):
    try:
        if data.loc[i,'Temp_price'][-1:]=='€':
            data.loc[i,'Temp_price'] = data.loc[i,'Temp_price'][:-1]
            data.loc[i,'Temp_price'] = data.loc[i,'Temp_price'].strip()
            data.loc[i,'Temp_price'] = data.loc[i,'Temp_price'].replace('\xa0','')
            
    except:
        print("Something went wrong")
        print(i)

In [124]:
data['Price_Euro'] = data['Temp_price']
data['Price_Euro'] = data['Price_Euro'].astype(float)
del data['Temp_price']

### Deleting 'Price without Vat' 'With or without Tax'

In [125]:
del data['Price without Vat']
del data['With or without Tax']

### Drive_type

In [126]:
data['Drive_type'].unique(),data['Drive_type'].isna().sum()

(array([nan, '4x4'], dtype=object), 7134)

In [127]:
data['Drive_type'].fillna('2x4',inplace=True)

In [128]:
data.isna().sum(),data.shape,data.dtypes

(Car_Name                 0
 Kms_driven               0
 First_registration       0
 Power                    0
 Transmission             0
 Fuel_type                0
 Vendor                   0
 Location                 0
 Price_Euro               0
 Drive_type               0
 Features_1             810
 Features_2             951
 Features_3            4563
 dtype: int64,
 (10019, 13),
 Car_Name                      object
 Kms_driven                   float64
 First_registration    datetime64[ns]
 Power                          int64
 Transmission                  object
 Fuel_type                     object
 Vendor                        object
 Location                      object
 Price_Euro                   float64
 Drive_type                    object
 Features_1                    object
 Features_2                    object
 Features_3                    object
 dtype: object)

### Handling Car Features

In [129]:
f1=pd.get_dummies(data['Features_1']) # creating dummy variables for feature1
f2=pd.get_dummies(data['Features_2']) # creating dummy variables for feature2
f3=pd.get_dummies(data['Features_3']) # creating dummy variables for feature3

features_dummy= pd.concat([f1,f2,f3], axis=1)

features_dummy.info()

features_same_name=features_dummy.groupby(level=0,axis=1).sum()

features_same_name
features_score=features_same_name.sum(axis=1)
features_score

data = pd.concat([data,features_score], axis=1)
data = data.rename(columns={0: 'features_score'})
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10019 entries, 0 to 10018
Columns: 224 entries, Adaptive cruise control to Xenon headlights
dtypes: uint8(224)
memory usage: 2.1 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10019 entries, 0 to 10018
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Car_Name            10019 non-null  object        
 1   Kms_driven          10019 non-null  float64       
 2   First_registration  10019 non-null  datetime64[ns]
 3   Power               10019 non-null  int64         
 4   Transmission        10019 non-null  object        
 5   Fuel_type           10019 non-null  object        
 6   Vendor              10019 non-null  object        
 7   Location            10019 non-null  object        
 8   Price_Euro          10019 non-null  float64       
 9   Drive_type          10019 non-null  object        
 10  Features_1          9209 non-nul

### Droping car features

In [130]:
data.drop(labels=['Features_1','Features_2','Features_3'],axis=1,inplace=True)

In [131]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10019 entries, 0 to 10018
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   Car_Name            10019 non-null  object        
 1   Kms_driven          10019 non-null  float64       
 2   First_registration  10019 non-null  datetime64[ns]
 3   Power               10019 non-null  int64         
 4   Transmission        10019 non-null  object        
 5   Fuel_type           10019 non-null  object        
 6   Vendor              10019 non-null  object        
 7   Location            10019 non-null  object        
 8   Price_Euro          10019 non-null  float64       
 9   Drive_type          10019 non-null  object        
 10  features_score      10019 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(2), object(6)
memory usage: 861.1+ KB


### Handling Duplicated

In [132]:
duplicates = data[data.duplicated()]
duplicates.shape

(89, 11)

In [133]:
duplicates.sort_values(by='Car_Name').head(10)

Unnamed: 0,Car_Name,Kms_driven,First_registration,Power,Transmission,Fuel_type,Vendor,Location,Price_Euro,Drive_type,features_score
4980,Abarth 595 Competizione 132 kW,6668.0,2021-03-01,132,Manual,Petrol,Dealership,Germany,29849.0,2x4,3
1099,Abarth 595 Competizione 132 kW,12922.0,2020-12-01,132,Manual,Petrol,Dealership,Germany,27649.0,2x4,3
8274,Alfa Romeo Giulia 140 kW,13200.0,2021-01-01,140,Automatic,Diesel,Dealership,Italy,43949.0,2x4,1
5664,Alfa Romeo Giulia 2.2 155 kW,19478.0,2020-08-01,155,Automatic,Diesel,Dealership,Italy,41699.0,4x4,3
3237,Alfa Romeo Giulia 2.2 155 kW,19478.0,2020-08-01,155,Automatic,Diesel,Dealership,Italy,41699.0,4x4,3
3704,Alfa Romeo Giulia 2.2 AT8 140 kW,26000.0,2021-08-01,140,Automatic,Diesel,Dealership,Italy,39649.0,2x4,3
8406,Alfa Romeo Giulia 2.2 AT8 140 kW,26000.0,2021-08-01,140,Automatic,Diesel,Dealership,Italy,39649.0,2x4,3
9087,Alfa Romeo Giulia 2.2 AT8 140 kW,22599.0,2020-01-01,140,Automatic,Diesel,Dealership,Italy,35999.0,2x4,3
8650,Alfa Romeo Giulietta 1.6 16V TCT 88 kW,46100.0,2019-05-01,88,Automatic,Diesel,Dealership,Italy,18449.0,2x4,1
9933,Alfa Romeo Stelvio 154 kW,51093.0,2020-07-01,154,Automatic,Diesel,Dealership,Italy,51699.0,2x4,3


In [134]:
temp_data = data
temp_data.shape

(10019, 11)

In [135]:
temp_data.drop_duplicates(keep='first',inplace=True)
temp_data.shape

(9930, 11)

In [139]:
import sweetviz as sv
sweet_report = sv.analyze(temp_data,target_feat='Price_Euro')
sweet_report.show_html('carsvago_EDA.html')

                                             |          | [  0%]   00:00 -> (? left)

Report carsvago_EDA.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.


In [137]:
temp_data.dtypes

Car_Name                      object
Kms_driven                   float64
First_registration    datetime64[ns]
Power                          int64
Transmission                  object
Fuel_type                     object
Vendor                        object
Location                      object
Price_Euro                   float64
Drive_type                    object
features_score                 int64
dtype: object

#### Export to csv

In [138]:
temp_data.to_csv('Carsvago_cleaned_data.csv')

In [None]:
# value = '' 
# name = ''


# def update_value(val,f_name,idx):
#     print(data.loc[idx,f_name])
#     if str(data.loc[idx,f_name]) != 'nan':
#         global value
#         value = data.loc[idx,f_name]
#     else:
#         value = ''
#     data.loc[idx,f_name] = val
#     return



# #for i in data[data['Kms_driven'].str.endswith(' km')==False].head(1).values:
# #['Kms_driven','First_registration','Power','Transmission','Fuel_type','Drive_type']

# for i in data.values:
#     skip = 0
#     for c in range(0,len(columns)):
#         if c == 0 or skip==1:
#             value = str(data.loc[i[0],columns[c]])
#             skip = 0
#         if len(value)==0:
#             continue
#         name = get_feature_name(value)
#         if name == columns[c]:
#             skip = 1
#             continue
#         v = value
#         n = name
#         print(v,n,i[0])
#         update_value(v,n,i[0])