In [95]:
import pandas as pd
import numpy as np

# Q1. Pandas version

In [96]:
print(pd.__version__)

2.3.1


# Getting the data

In [97]:
df = pd.read_csv('https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv')

In [98]:
df.head()

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


# Q2. Records count

In [99]:
l = len(df)
print('Total Number of Records', l)

Total Number of Records 9704


# Q3. Fuel types

In [100]:
df.fuel_type.nunique()

2

# Q4. Missing values

In [101]:
#df.isnull().sum() or

miss_col = df.isnull().sum()
miss_col_val = miss_col[miss_col > 0]
print(len(miss_col_val))

4


# Q5. Max fuel efficiency

In [102]:
# using filter function 
df[df['origin'] == 'Asia']['fuel_efficiency_mpg'].max()

# or 
# #df.groupby('origin').fuel_efficiency_mpg.max()

np.float64(23.759122836520497)

# Question 6. mean value of horsepower 

In [103]:
# Step 1: mean value of horsepower
mean_before = df.horsepower.mean()
print(mean_before)

149.65729212983547


In [104]:

# Step 2: Most frequent value of horsepower
most_frequent= df.horsepower.mode()
# Step 3: Fill missing values with the most frequent value
# df['horsepower'] = df['horsepower'].fillna(most_frequent)
df.horsepower.fillna(most_frequent[0])
mean_after = df.horsepower.mean()
print(mean_after)
# Step 4: Has the mean changed?
print("Changed?", "Yes, it increased" if mean_after > mean_before 
      else "Yes, it decreased" if mean_after < mean_before 
      else "No")

149.65729212983547
Changed? No


# Q7. Sum of weights

In [132]:
# 1. Select all the cars from Asia
df_asia = df[df['origin'] == 'Asia']
df_asia

Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
8,250,1.0,174.0,2714.219310,10.3,2016,Asia,Diesel,Front-wheel drive,-1.0,16.823554
12,320,5.0,145.0,2783.868974,15.1,2010,Asia,Diesel,All-wheel drive,1.0,16.175820
14,200,6.0,160.0,3582.687368,14.9,2007,Asia,Diesel,All-wheel drive,0.0,11.871091
20,150,3.0,197.0,2231.808142,18.7,2011,Asia,Gasoline,Front-wheel drive,1.0,18.889083
21,160,4.0,133.0,2659.431451,,2016,Asia,Gasoline,Front-wheel drive,-1.0,16.077730
...,...,...,...,...,...,...,...,...,...,...,...
9688,260,4.0,,3948.404625,15.5,2018,Asia,Diesel,All-wheel drive,-1.0,11.054830
9692,180,3.0,188.0,3680.341381,18.0,2016,Asia,Gasoline,Front-wheel drive,1.0,11.711653
9693,280,2.0,148.0,2545.070139,15.6,2012,Asia,Diesel,All-wheel drive,0.0,17.202782
9698,180,1.0,131.0,3107.427820,13.2,2005,Asia,Gasoline,Front-wheel drive,-2.0,13.933716


In [134]:
# 2. Select only columns vehicle_weight and model_year
df_asia_subset = df_asia[['vehicle_weight', 'model_year']]

df_asia_subset

Unnamed: 0,vehicle_weight,model_year
8,2714.219310,2016
12,2783.868974,2010
14,3582.687368,2007
20,2231.808142,2011
21,2659.431451,2016
...,...,...
9688,3948.404625,2018
9692,3680.341381,2016
9693,2545.070139,2012
9698,3107.427820,2005


In [136]:
# 3. Select the first 7 values
df_asia_seven_row = df_asia_subset.head(7)
df_asia_seven_row

Unnamed: 0,vehicle_weight,model_year
8,2714.21931,2016
12,2783.868974,2010
14,3582.687368,2007
20,2231.808142,2011
21,2659.431451,2016
34,2844.227534,2014
38,3761.994038,2019


In [139]:
# 4. Get the underlying NumPy array. Let's call it X
X = df_asia_seven_row.values
#X = df_asia_seven_row.to_numpy()
X

array([[2714.21930965, 2016.        ],
       [2783.86897424, 2010.        ],
       [3582.68736772, 2007.        ],
       [2231.8081416 , 2011.        ],
       [2659.43145076, 2016.        ],
       [2844.22753389, 2014.        ],
       [3761.99403819, 2019.        ]])

In [140]:
# 5. Compute matrix-matrix multiplication between the transpose of X and X. To get the transpose, use X.T. Let's call the result XTX.
XTX = X.T.dot(X)
XTX

array([[62248334.33150762, 41431216.5073268 ],
       [41431216.5073268 , 28373339.        ]])

In [141]:
# 6. Invert XTX
XTX_inv = np.linalg.inv(XTX)
XTX_inv

array([[ 5.71497081e-07, -8.34509443e-07],
       [-8.34509443e-07,  1.25380877e-06]])

In [142]:
# 7. Create an array y with values 
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])
y

array([1100, 1300,  800,  900, 1000, 1100, 1200])

In [144]:
# 8. Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w
w = XTX_inv.dot(X.T).dot(y)
w

array([0.01386421, 0.5049067 ])

In [147]:
# 9.  Sum of all the elements of the result
print(w.sum())

0.5187709081074016
