## Get relevant packages

In [1]:
from pathlib import Path
from typing import List, Tuple
import pandas as pd
import sys

_root = Path.cwd()

if not _root.joinpath("prometeus").exists():
    sys.path.insert(0, "../")
    
from prometeus.pca.analyzer import PCAnalyzer
from prometeus.fa.analyzer import FAnalyzer
from prometeus.pca.plot import PCAPlot
from prometeus.fa.plot import FAPlot
from sklearn.preprocessing import StandardScaler

## Prepare DataFrame

In [2]:
def preprocess_data_cluster(df: pd.DataFrame, clusters: List[int] = None) -> Tuple[pd.DataFrame, pd.Series]:
    df.dropna(how='any', inplace=True)
    if clusters is not None:
        df = df[df['Cluster'].isin(clusters)]
    clusters = df['Cluster'].astype(int).astype(str)
    if 'Unnamed: 0' not in df.columns:
        df.drop(columns=['SERVICE', 'MONTH_SQN', 'CUST_SQN', 'Cluster'], inplace=True)
    else:
        df.drop(columns=['Unnamed: 0', 'SERVICE', 'MONTH_SQN', 'CUST_SQN', 'Cluster'], inplace=True)
    return df, clusters


In [3]:
normalize_col = ['SH_CUST_RATING', 'CUST_LVL_MOB_ARPU_6M', 'TENURE_MOBILE', 'CUST_LVL_MOB_CNT', 'CONTRACT_DURATION', 'SUBS_TENURE',
                'LIFETIME_CONTRACTS_CNT', 'AUSAGE_SOCIALNET', 'AUSAGE_VIDEO', 'AUSAGE_COMMUNICATIONS', 'AUSAGE_NETFLIX',
                'AUSAGE_ECOMMERCE', 'AUSAGE_GAMES', 'AUSAGE_MUSIC', 'AUSAGE_MAIL','AUSAGE_LIFESTYLE', 'AUSAGE_NEWS', 'AUSAGE_TRANSPORTATION',
                'AUSAGE_NEWS', 'AUSAGE_TRANSPORTATION', 'USAGE_SOCIALNET', 'USAGE_STREAMINGVIDEO', 'USAGE_GAMES', 'USAGE_ENTERTAINMENT',
                'USAGE_MUSIC', 'USAGE_TRANSPORTATION', 'USAGE_SHOPPING', 'USAGE_ECOMMERCE', 'USAGE_SPORTS', 'USAGE_NEWS', 'USAGE_TRAVEL']

def normalize(df: pd.DataFrame) -> pd.DataFrame:
    scaler = StandardScaler()
    scaler.fit(df[normalize_col])
    scaled_target = df[normalize_col].copy()

    df[normalize_col]= scaler.transform(scaled_target)
    return df.fillna(0)

## Using Prometeus Library to get plots

### 1. Main

In [None]:
df = pd.read_csv('Data/clustered_data_20210414.csv')
df, clusters = preprocess_data_cluster(df)

PCAPlot(normalize(df), clusters).generate_graphs(by='cum_var', n_components=22, is_filter=True)

### 2. Micro Segment Cluster 0 & 1

In [4]:
df1 = pd.read_csv('Data/micro_segment_cluster0&1.csv')
df1, clusters1 = preprocess_data_cluster(df1)

PCAPlot(normalize(df1), clusters1).generate_graphs(by='cum_var', is_filter=True)

2D bi-plot Picture saved at same directory!
3D bi-plot Picture saved at same directory!


Tab(children=(Output(layout=Layout(width='100%')), Output(layout=Layout(width='100%')), Output(layout=Layout(w…

### 3. Micro Segment Cluster 2

In [None]:
df2 = pd.read_csv('Data/micro_segment_cluster2.csv')
df2, clusters2 = preprocess_data_cluster(df2)

PCAPlot(normalize(df2), clusters2).generate_graphs(by='cum_var', is_filter=True)

### 3. Micro Segment Cluster 4

In [None]:
df4 = pd.read_csv('Data/micro_segment_cluster4.csv')
df4, clusters4 = preprocess_data_cluster(df4)

PCAPlot(normalize(df4), clusters4).generate_graphs(by='scree', threshold=0.75, is_filter=True)