In [1]:
import pandas as pd
laptops = pd.read_csv('C:\\Users\\VChernik\\Downloads\\laptops.csv', encoding="Windows-1251", index_col=1)

In [2]:
# нормализация заголовков
def clean_data(strn):
    strn = strn.strip()
    strn = strn.replace("Operating System", "os")
    strn = strn.replace(" ", "_")
    strn = strn.replace("(", "")
    strn = strn.replace(")", "")
    strn = strn.lower()
    return strn

laptops.columns = [clean_data(data) for data in laptops.columns]

In [3]:
# чистим экраны и память
laptops["screen_size"] = laptops["screen_size"].str.replace('"','').astype(float)
laptops.rename({"screen_size": "screen_size_inches"}, axis=1, inplace=True)
laptops["ram"] = laptops["ram"].str.replace("GB", "").astype(int)
laptops.rename({"ram": "ram_gb"}, axis = 1, inplace = True)

In [4]:
# чистим вес и цену
laptops["weight"] = (laptops["weight"]
                     .str.replace("kgs", "")
                     .str.replace("kg", "")
                     .astype(float)
                    )

laptops.rename({"weight": "weight_kg"}, axis = 1, inplace = True)
weight_describe = laptops["weight_kg"].describe()

laptops["price_euros"] = (laptops["price_euros"]
                         .str.replace(",",".")
                         .astype(float)
                         )

In [5]:
# чистим проц и видео
laptops["gpu_manufacturer"] = (laptops["gpu"]
                                    .str.split(n=1,expand=True)
                                    .iloc[:,0]
                               )

laptops["cpu_manufacturer"] = (laptops["cpu"]
                              .str.split(n=1, expand=True)
                              .iloc[:, 0]
                              )

In [6]:
laptops["cpu_speed_ghz"] = (laptops["cpu"]
                           .str.replace("GHz", "")
                           .str.rsplit(n=1, expand=True)
                           .iloc[:, 1]
                           .astype(float)
                           )

In [7]:
screen_res = laptops["screen"].str.rsplit(n=1, expand=True)
screen_res.loc[screen_res[0].isnull(), 1] = screen_res[0]
laptops["screen_resolution"] = (screen_res[1]
                                    .str.split(n=1,expand=True)
                                    .iloc[:,0]
                                    )

In [8]:
mapping_dict = {
    'Android': 'Android',
    'Chrome OS': 'Chrome OS',
    'Linux': 'Linux',
    'Mac OS': 'macOS',
    'No OS': 'No OS',
    'Windows': 'Windows',
    'macOS': 'macOS'
}

laptops["os"] = laptops["os"].map(mapping_dict)

In [9]:
laptops_no_null_rows = laptops.dropna()
laptops_no_null_cols = laptops.dropna(axis=1)

In [10]:
value_counts_before = laptops.loc[laptops["os_version"].isnull(), "os"].value_counts()
laptops.loc[laptops["os"] == "macOS", "os_version"] = "X"
laptops.loc[laptops["os"] == "No OS", "os_version"] = "Version Unknown"
value_counts_after = laptops.loc[laptops["os_version"].isnull(), "os"].value_counts()

## Challenge: Extracting Storage Information
- Clean the storage column, creating four new columns:
 - storage_1_capacity_gb, with float dtype.
 - storage_1_type.
 - storage_2_capacity_gb, with float dtype. If there is only one drive, this column should be null.
 - storage_2_type. If there is only one drive, this column should be null.

If needed, don't forget to strip the columns of any extra whitespace.
- Drop the original storage column and any temporary columns you made while completing the exercise.

In [11]:
laptops['storage'] = (laptops['storage']
                     .str.replace('TB', '000GB')
                     )
laptop_storages = (laptops['storage']
                   .str.split('+', expand=True)
                  )

first_drive = laptop_storages.iloc[:, 0].str.split('GB', expand=True)
second_drive = laptop_storages.iloc[:, 1].str.split('GB', expand=True)

storage_1_capacity_gb = first_drive[0].astype(float)
storage_1_type = first_drive[1]
storage_2_capacity_gb = second_drive[0].astype(float)
storage_2_type = second_drive[1]

laptops['storage_1_capacity_gb'] = storage_1_capacity_gb
laptops['storage_1_type'] = storage_1_type
laptops['storage_2_capacity_gb'] = storage_2_capacity_gb
laptops['storage_2_type'] = storage_2_type
laptops = laptops.drop('storage', axis=1)

In [None]:
# Are laptops made by Apple more expensive than those by other manufacturers?
# What is the best value laptop with a screen size of 15" or more?
# Which laptop has the most storage space?