In [1]:
import pandas as pd
import numpy as np

### Q1. Pandas version 

In [2]:
# Getting the version of Pandas I use
pd.__version__

'1.1.3'

### Getting the data

In [3]:
# Reading the csv file
df = pd.read_csv('laptops.csv')
df.head()


Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,Asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,Alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,Asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,MSI,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,HP,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


### Q2. Records count

In [11]:
# Calculating the number of records in the dataset
num_records = len(df)
print(f"The number of records in the laptops dataset is {num_records}")

The number of records in the laptops dataset is 2160


### Q3. Laptop brands

In [25]:
# Calulating the number of unique laptop brands in the dataset
unique_brands = df['Brand'].unique()
num_unique_brands = len(unique_brands)
print(f"The number of unique laptop brands in the dataset is {num_unique_brands}")

The number of unique laptop brands in the dataset is 27


### Q4. Missing values

In [38]:
# Finding how many columns in the dataset have missing values

num_of_columns_with_null_values = df.isnull().any().sum()
print(f"The number of columns with null values is {num_of_columns_with_null_values}")


The number of columns with null values is 3


### Q5. Maximum final price

In [42]:
max_dell_price = df[df['Brand'] == 'Dell']['Final Price'].max()
print(f"The maximum price of the Dell notebooks is {max_dell_price}")

The maximum price of the Dell notebooks is 3936.0


### Q6. Median value of Screen

In [4]:
# 1. Calculating the median screen size
median_screen_size = df['Screen'].median()
print(f"The median screen size is {median_screen_size}")

The median screen size is 15.6


In [5]:
# 2. Calculating the most frequent screen size
most_frequent_screen_size = df['Screen'].mode().values[0]
print(f"The most frequent screen size is {most_frequent_screen_size}")

The most frequent screen size is 15.6


In [6]:
# 3. Filling the missing values in the screen column with the most frequent value from the previous step
df['Screen'].fillna((median_screen_size), inplace=True)

In [89]:
# 4. Check that there are no more rows where the screen size is null
df[df['Screen'].isna()]

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price


In [92]:
# Recalculate the median screen size after filling the missing values
new_most_frequent_screen_size = df['Screen'].mode().values[0]
print(f"The new most frequent screen size is {new_most_frequent_screen_size}")

The new most frequent screen size is 15.6


### Q7. Sum of weights

In [8]:
# 1. Select all the "Innjoo" laptops from the dataset.
innjoo_laptops = df[df['Brand'] == 'Innjoo']

In [9]:
# 2. Select only columns RAM, Storage, Screen
innjoo_laptops = innjoo_laptops[['RAM', 'Storage', 'Screen']]

In [10]:
# 3. Get the underlying NumPy array and call it X.
X = innjoo_laptops.values

In [14]:
# 4. Computing the matrix-matrix multiplication between the transpose of X and X and calling the result XTX.
XTX = X.T.dot(X)
XTX

array([[2.52000e+02, 8.32000e+03, 5.59800e+02],
       [8.32000e+03, 3.68640e+05, 1.73952e+04],
       [5.59800e+02, 1.73952e+04, 1.28196e+03]])

In [15]:
# 5. Computing the inverse of XTX.

XTX_inverse = np.linalg.inv(XTX)
XTX_inverse

array([[ 2.78025381e-01, -1.51791334e-03, -1.00809855e-01],
       [-1.51791334e-03,  1.58286725e-05,  4.48052175e-04],
       [-1.00809855e-01,  4.48052175e-04,  3.87214888e-02]])

In [16]:
# 6. Creating an array y with values [1100, 1300, 800, 900, 1000, 1100].
y = np.array([1100, 1300, 800, 900, 1000, 1100])

In [22]:
# 7. Multiplying the inverse of XTX with the transpose of X, and then multiplying the result by y. Calling the result w.
w = XTX_inverse @ X.T @ y
w

array([45.58076606,  0.42783519, 45.29127938])

In [23]:
# Calculating the sum of all elements of the w array
w.sum()

91.29988062995542