### Function for Correlation Based Selection (threshold)

In [3]:
import pandas as pd

def select_manual(data):
    
    correlation = data.corr().abs()

    # Sort the corr() by log_inst_review
    correlation_target = correlation['log_inst_review'].abs().sort_values(ascending=False)

    # Select features with threshold value
    selected_features = correlation_target[correlation_target > 0.2].index

    # Drop log_inst_review
    selected_features = selected_features.drop('log_inst_review')
    
    return selected_features

### Variance Threshold Selection

In [4]:
from sklearn.feature_selection import VarianceThreshold

def select_variance(data,threshold):
    # Import library for VarianceThreshold and since we already assign our target, only drop step is applied
    x = data.drop("log_inst_review", axis=1)
    
    # Apply the variance threshold method and get selected feature names
    vt = VarianceThreshold(threshold=threshold)
    vt.fit(x)
    pd.DataFrame({'Variance': vt.variances_, 'select_status': vt.get_support()}, index=x.columns)
  
    # Save it to selected features
    selected_features = x.iloc[:,vt.get_support()]
    
    return selected_features

### Select K-Best method

In [5]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression 
from sklearn.model_selection import train_test_split

def select_best(data, num):
    # Since we already assign our target, only drop step is applied
    features = data.drop("log_inst_review", axis=1)
    
    # Select k=num
    kbest = SelectKBest(score_func=f_regression, k=num)
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

    kbest.fit_transform(X_train, y_train)
    # Save it to selected features
    selected_features = features.iloc[:,kbest.get_support()]
    
    return selected_features

### Create the make_poly function

In [6]:
from sklearn.preprocessing import PolynomialFeatures

def make_poly(data):
    # polinomial degree 2
    pf = PolynomialFeatures(degree=2, include_bias=False, interaction_only=True)
    fit_pf = pf.fit(data) 
    poly = pd.DataFrame(pf.transform(data), columns=fit_pf.get_feature_names_out(data.columns))
    
    return poly