## 1. Importing all necessary libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

## 2. Loading dataset

In [None]:
def load_dataset():
    url = "https://storage.googleapis.com/qwasar-public/track-ds/boston.csv"
    dataset = pd.read_csv(url)
    return dataset

In [None]:
boston_dataframe = load_dataset()

In [None]:
print(boston_dataframe)

## 3. Printing summary of the dataset

In [None]:
def print_summarize_dataset(dataset):
    print("Dataset dimension:")
    print(dataset.shape)
    print("First 10 rows of dataset:")
    print(dataset.head(10))
    print("Statistical summary:")
    print(dataset.describe())

In [None]:
print_summarize_dataset(boston_dataframe)

## 4. Cleaning and Pre-processing

In [None]:
def clean_dataset(boston_dataframe):
    boston_dataframe.dropna(inplace=True)

In [None]:
clean_dataset(boston_dataframe)

## 5. Data Analysis

In [None]:
# Plotting each attribute in a histogram
def print_histograms(boston_dataframe):
    boston_dataframe.hist(layout=(4,4), bins=50, figsize=(14,10))
    plt.show()

In [None]:
print_histograms(boston_dataframe)

In [None]:
# Function that computes correlations matrix of dataset
def compute_correlations_matrix(boston_dataframe):
    corr_matrix = boston_dataframe.corr()
    #corr_dict = {}
    #for i in corr_matrix.columns:
    #    corr_dict[f'{i}'] = corr_matrix.loc[f'{i}']
    return corr_matrix

In [None]:
correlations = compute_correlations_matrix(boston_dataframe)
print(correlations['MDEV'])

In [None]:
# Plotting scatter matrix
def print_scatter_matrix(boston_dataframe):
    pd.plotting.scatter_matrix(boston_dataframe, figsize=(14,10))

In [None]:
print_scatter_matrix(boston_dataframe)

In [None]:
# Plotting MDEV in function of RM
boston_dataframe.loc[:,['RM','MDEV']].plot.scatter(x='RM', y='MDEV')

In [None]:
# Plotting MDEV as a function of LSTAT, AGE, and CRIME
features = ['LSTAT', 'AGE', 'CRIM']
fig, axes = plt.subplots(1,3)
for i in range(3):
    boston_dataframe.loc[:,[f'{features[i]}','MDEV']].plot.scatter(x=f'{features[i]}', y='MDEV', s=5, figsize=(14,3), ax=axes[i])

In [None]:
# Printing the correlation coefficients for LSTAT
print(correlations['LSTAT'])

## 6. Prediction