In [None]:
# Import packages
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.preprocessing import LabelEncoder, StandardScaler, KBinsDiscretizer  
from sklearn.neighbors import LocalOutlierFactor
from sklearn.tree import ExtraTreeClassifier
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from statsmodels.api import OLS

# Load and inspect data
df = pd.read_excel('data.xlsx')
print(df.shape)
print(df.isnull().sum())

# Data cleaning
df.dropna(subset=['num_blocked','thalassemia'], inplace=True)  
imputer = KNNImputer()
df['num_blocked'] = imputer.fit_transform(df[['num_blocked']])

# Encoding  
label_encoder = LabelEncoder()
df['gender'] = label_encoder.fit_transform(df['gender'])

df = df.replace(['Yes'], 1) 
df = df.replace(['No'], -1)

# Detect outliers
lof = LocalOutlierFactor()  
df['depression_out'] = lof.fit_predict(df[['depression']])
df = df[df['depression_out'] != -1]

# Feature scaling  
standard_scaler = StandardScaler()
df['depression'] = standard_scaler.fit_transform(df[['depression']])

discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal')
df['heart_rate'] = discretizer.fit_transform(df[['heart_rate']]) 

# Analysis
target = 'heart_disease'
X = df.drop(target, axis=1)
y = df[target]

model = ExtraTreeClassifier()
model.fit(X, y)
print(dict(zip(X.columns, model.feature_importances_)))

# Regression example  
X = df['TV']
y = df['Sales']

model = OLS(y, sm.add_constant(X)).fit()
print(model.summary())