# Imports

In [6]:
!pip install supabase

Collecting supabase
  Downloading supabase-2.10.0-py3-none-any.whl.metadata (10 kB)
Collecting gotrue<3.0.0,>=2.10.0 (from supabase)
  Downloading gotrue-2.10.0-py3-none-any.whl.metadata (6.0 kB)
Collecting postgrest<0.19,>=0.18 (from supabase)
  Downloading postgrest-0.18.0-py3-none-any.whl.metadata (3.4 kB)
Collecting realtime<3.0.0,>=2.0.0 (from supabase)
  Downloading realtime-2.0.6-py3-none-any.whl.metadata (6.7 kB)
Collecting storage3<0.10.0,>=0.9.0 (from supabase)
  Downloading storage3-0.9.0-py3-none-any.whl.metadata (1.8 kB)
Collecting supafunc<0.8.0,>=0.7.0 (from supabase)
  Downloading supafunc-0.7.0-py3-none-any.whl.metadata (1.1 kB)
Collecting deprecation<3.0.0,>=2.1.0 (from postgrest<0.19,>=0.18->supabase)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting websockets<14,>=11 (from realtime<3.0.0,>=2.0.0->supabase)
  Downloading websockets-13.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.m

In [7]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, RandomForestRegressor
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, mean_absolute_error, mean_squared_error, r2_score
from imblearn.combine import SMOTEENN
from scipy.stats import uniform, randint
import plotly.graph_objs as go
import matplotlib as mpl
import matplotlib.patches as mpatches
from plotly import tools
from plotly.subplots import make_subplots
from plotly.offline import iplot
import kagglehub
import shutil
import os
from sklearn.ensemble import VotingClassifier
import warnings
import pandas as pd
from supabase import create_client, Client
import uuid
warnings.filterwarnings('ignore')

# Downloading Dataset

In [2]:
path = kagglehub.dataset_download("blastchar/telco-customer-churn")
content_dir = '/content'

shutil.move(path, os.path.join(content_dir, os.path.basename(path)))
print("Dataset moved to content directory:", os.path.join(content_dir, os.path.basename(path)))

Downloading from https://www.kaggle.com/api/v1/datasets/download/blastchar/telco-customer-churn?dataset_version_number=1...


100%|██████████| 172k/172k [00:00<00:00, 25.5MB/s]

Extracting files...
Dataset moved to content directory: /content/1





In [39]:
df = pd.read_csv("/content/1/WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [40]:
df = df.dropna()
df = df[~df.isin(['', ' ']).any(axis=1)]
df = df.drop_duplicates()
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7032 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7032 non-null   object 
 1   gender            7032 non-null   object 
 2   SeniorCitizen     7032 non-null   int64  
 3   Partner           7032 non-null   object 
 4   Dependents        7032 non-null   object 
 5   tenure            7032 non-null   int64  
 6   PhoneService      7032 non-null   object 
 7   MultipleLines     7032 non-null   object 
 8   InternetService   7032 non-null   object 
 9   OnlineSecurity    7032 non-null   object 
 10  OnlineBackup      7032 non-null   object 
 11  DeviceProtection  7032 non-null   object 
 12  TechSupport       7032 non-null   object 
 13  StreamingTV       7032 non-null   object 
 14  StreamingMovies   7032 non-null   object 
 15  Contract          7032 non-null   object 
 16  PaperlessBilling  7032 non-null   object 
 17  

# Converting dataframe into supabase ingestable csvs

In [41]:
# Customers Table
customers_df = df[['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
                   'tenure', 'MonthlyCharges', 'TotalCharges']]
customers_df.columns = ['customer_id', 'gender', 'is_senior_citizen', 'has_partner',
                        'has_dependents', 'tenure', 'monthly_charges', 'total_charges']

# Convert boolean columns
customers_df['is_senior_citizen'] = customers_df['is_senior_citizen'].astype(bool)
customers_df['has_partner'] = (customers_df['has_partner'] == 'Yes')
customers_df['has_dependents'] = (customers_df['has_dependents'] == 'Yes')

# Phone Services Table
phone_services_df = df[['customerID', 'PhoneService', 'MultipleLines']]
phone_services_df.columns = ['customer_id', 'has_phone_service', 'multiple_lines']

# Convert boolean column
phone_services_df['has_phone_service'] = (phone_services_df['has_phone_service'] == 'Yes')

# Internet Services Table
internet_services_df = df[['customerID', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
                            'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']]
internet_services_df.columns = ['customer_id', 'internet_service', 'online_security',
                                'online_backup', 'device_protection', 'tech_support',
                                'streaming_tv', 'streaming_movies']

# Billing Table
billing_df = df[['customerID', 'Contract', 'PaperlessBilling', 'PaymentMethod']]
billing_df.columns = ['customer_id', 'contract_type', 'paperless_billing', 'payment_method']

# Convert boolean column
billing_df['paperless_billing'] = (billing_df['paperless_billing'] == 'Yes')

# Export to CSV
customers_df.to_csv('customers.csv', index=False)
phone_services_df.to_csv('phone_services.csv', index=False)
internet_services_df.to_csv('internet_services.csv', index=False)
billing_df.to_csv('billing.csv', index=False)

print("CSV files exported successfully!")

CSV files exported successfully!
