## import libraries

In [32]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

### Reading data

In [33]:
dataframe = pd.read_csv('../data/WA_Fn-UseC_-Marketing-Customer-Value-Analysis.csv')

#### Split dependent & independent variables

In [46]:
x = dataframe.drop(columns=['Customer Lifetime Value','Customer','Effective To Date'])
y = dataframe['Customer Lifetime Value']


### Handling categorical data

In [35]:
categorical_features = x.select_dtypes(include=['object','string']).columns.tolist()
ct = ColumnTransformer(
    transformers=[
        ('encoder', OneHotEncoder(drop='first'), categorical_features),
    ],remainder='passthrough'
)

x = ct.fit_transform(x)



### training, testing & validating data


In [36]:
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0)

In [37]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Training model

In [38]:
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(X_train,y_train)

y_pred = regressor.predict(X_test)

In [39]:
from sklearn.metrics import r2_score
print("R2 score: " , r2_score(y_test, y_pred))

R2 score:  0.12308841396851022


In [47]:
import joblib
artifacts ={
    "model": regressor,
    "encoder":ct,
    "scaler":sc
}

joblib.dump(artifacts,'LinearRegresion.joblib')

['LinearRegresion.joblib']