In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# a) Read the data with pandas and describe the data
data = pd.read_csv("/content/housing.csv")
data = data.drop('ocean_proximity',axis=1)
print("Description of the data:")
print(data.describe())

# b) Find data type and shape of each column
print("\nData types of each column:")
print(data.dtypes)
print("\nShape of the data:")
print(data.shape)

# c) Find the null values and fill them with mean of that column
null_values = data.isnull().sum()
print("\nNull values in the data:")
print(null_values)

# Fill null values with mean
data.fillna(data.mean(), inplace=True)

# d) Find features and target variables
features = data.drop(columns=['median_house_value'])
target = data['median_house_value']
print(features)
print(target)

# e) Split the data into train and test
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# f) Normalize the data with min-max scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("\nThe Normalized Data:\n")
print("Train Scaled\n")
print(X_train_scaled)
print("\nTest Scaled\n")
print(X_test_scaled)

Description of the data:
          longitude      latitude  housing_median_age   total_rooms  \
count  20640.000000  20640.000000        20640.000000  20640.000000   
mean    -119.569704     35.631861           28.639486   2635.763081   
std        2.003532      2.135952           12.585558   2181.615252   
min     -124.350000     32.540000            1.000000      2.000000   
25%     -121.800000     33.930000           18.000000   1447.750000   
50%     -118.490000     34.260000           29.000000   2127.000000   
75%     -118.010000     37.710000           37.000000   3148.000000   
max     -114.310000     41.950000           52.000000  39320.000000   

       total_bedrooms    population    households  median_income  \
count    20433.000000  20640.000000  20640.000000   20640.000000   
mean       537.870553   1425.476744    499.539680       3.870671   
std        421.385070   1132.462122    382.329753       1.899822   
min          1.000000      3.000000      1.000000       0.49990