In [13]:
import pandas as pd
import numpy as np

In [3]:
pd.__version__

'2.1.3'

In [5]:
# Load the dataset
df = pd.read_csv('https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv')

# Preview the first few rows
print(df.head())

                                              Laptop Status   Brand  \
0  ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...    New    Asus   
1  Alurin Go Start Intel Celeron N4020/8GB/256GB ...    New  Alurin   
2  ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...    New    Asus   
3  MSI Katana GF66 12UC-082XES Intel Core i7-1270...    New     MSI   
4  HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...    New      HP   

        Model            CPU  RAM  Storage Storage type       GPU  Screen  \
0  ExpertBook  Intel Core i5    8      512          SSD       NaN    15.6   
1          Go  Intel Celeron    8      256          SSD       NaN    15.6   
2  ExpertBook  Intel Core i3    8      256          SSD       NaN    15.6   
3      Katana  Intel Core i7   16     1000          SSD  RTX 3050    15.6   
4         15S  Intel Core i5   16      512          SSD       NaN    15.6   

  Touch  Final Price  
0    No      1009.00  
1    No       299.00  
2    No       789.00  
3    No      1199.

In [6]:
# Get the number of rows (records)
record_count = df.shape[0]

# Output the result
print(f"Number of records: {record_count}")

Number of records: 2160


In [7]:
# Find the number of unique brands
unique_brands = df['Brand'].nunique()

# Output the result
print(f"Number of unique laptop brands: {unique_brands}")

Number of unique laptop brands: 27


In [8]:
# Check how many columns have missing values
missing_columns = df.isnull().sum()

# Count how many columns have at least one missing value
columns_with_missing_values = (missing_columns > 0).sum()

# Output the result
print(f"Number of columns with missing values: {columns_with_missing_values}")

Number of columns with missing values: 3


In [10]:
# Filter for Dell notebooks and find the maximum final price
max_dell_price = df[df['Brand'] == 'Dell']['Final Price'].max()

# Output the result
print(f"The maximum final price of Dell notebooks: {max_dell_price}")

The maximum final price of Dell notebooks: 3936.0


In [11]:
# 1. Find the median value of the Screen column
original_median_screen = df['Screen'].median()

# 2. Find the most frequent (mode) value of the Screen column
most_frequent_screen = df['Screen'].mode()[0]

# 3. Fill missing values in the Screen column with the most frequent value
df['Screen'].fillna(most_frequent_screen, inplace=True)

# 4. Recalculate the median of the Screen column
new_median_screen = df['Screen'].median()

# Check if the median has changed
has_changed = original_median_screen != new_median_screen

# Output the results
print(f"Original median of Screen: {original_median_screen}")
print(f"Most frequent value of Screen: {most_frequent_screen}")
print(f"New median of Screen after filling missing values: {new_median_screen}")
print(f"Has the median changed? {'Yes' if has_changed else 'No'}")

Original median of Screen: 15.6
Most frequent value of Screen: 15.6
New median of Screen after filling missing values: 15.6
Has the median changed? No


In [14]:
# 1. Select all the "Innjoo" laptops and specific columns
innjoo_laptops = df[df['Brand'] == 'Innjoo'][['RAM', 'Storage', 'Screen']]

# 2. Convert the DataFrame to a NumPy array (X)
X = innjoo_laptops.to_numpy()

# 3. Compute XTX (X.T @ X)
XTX = X.T @ X

# 4. Compute the inverse of XTX
XTX_inv = np.linalg.inv(XTX)

# 5. Create an array y with values [1100, 1300, 800, 900, 1000, 1100]
y = np.array([1100, 1300, 800, 900, 1000, 1100])

# 6. Multiply XTX_inv with X.T and then by y to get the result w
w = XTX_inv @ X.T @ y

# 7. Compute the sum of all elements of w
sum_of_w = w.sum()

# Output the result
print(f"Sum of all elements of w: {sum_of_w}")

Sum of all elements of w: 91.29988062995588
