# Churn Case Part 1

## Reading all CSV files with Pandas

In [None]:
import pandas as pd

In [None]:
customer = pd.read_csv("./files/customer.csv")
customer.head()

In [None]:
cust_loc = pd.read_csv("./files/cust_loc.csv")
cust_loc.head()

In [None]:
cust_services = pd.read_csv("./files/cust_services.csv")
cust_services.head()

In [None]:
cust_account = pd.read_csv("./files/cust_account.csv")
cust_account.head()

In [None]:
cust_churn = pd.read_csv("./files/cust_churn.csv")
cust_churn.head()

## Create database and tables

In [None]:
import sqlite3

In [None]:
conn = sqlite3.connect("churn.db")
cur = conn.cursor()

In [None]:
# Create customer table
customer.to_sql("customer", conn, if_exists="replace", index=False)

In [None]:
# Createa customer location table
cust_loc.to_sql("cust_loc", conn, if_exists="replace", index=False)

In [None]:
# Createa customer services table
cust_services.to_sql("cust_services", conn, if_exists="replace", index=False)

In [None]:
# Createa customer account table
cust_account.to_sql("cust_account", conn, if_exists="replace", index=False)

In [None]:
# Createa customer churn table
cust_churn.to_sql("cust_churn", conn, if_exists="replace", index=False)

## Information about tables

In [None]:
def table_info(conn, cursor):
    """Prints out all of the columns of every table in the DB.
    
    Arguments:
        conn: database connection object
        cursor: cursor object
    """
    
    tables = cursor.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
    for table_name in tables:
        table_name = table_name[0]
        table = pd.read_sql_query("SELECT * FROM {} LIMIT 0".format(table_name), conn)
        print(table_name)
        for col in table.columns:
            print("\t"+ col)
        print()

In [None]:
table_info(conn, cur)

## Join all tables

In [None]:
cur.execute("""
ALTER TABLE cust_loc
RENAME COLUMN Cust_ID TO CustomerID""")

In [None]:
cur.execute("""
SELECT
*
FROM customer
INNER JOIN cust_loc
USING (CustomerID)
""")
cur.fetchone()

In [None]:
cust_loc = cust_loc.rename(columns={"Cust_ID": "CustomerID"})

In [None]:
cust_loc.head(10)

In [None]:
pd.concat([customer, cust_loc], join="inner", axis=1)

In [None]:
# Rename remaining columns
cust_services = cust_services.rename(columns={"Cust_ID": "CustomerID"})
cust_account = cust_account.rename(columns={"Account_id": "CustomerID"})
cust_churn = cust_churn.rename(columns={"Id": "CustomerID"})

In [None]:
# join dataframes
dfs_to_join = [customer, cust_loc, cust_services, cust_account, cust_churn]

In [None]:
churn_all = pd.concat(dfs_to_join, join="inner", axis=1)

In [None]:
churn_all.columns.duplicated() # Find the columns which are duplicates.

In [None]:
churn_all = churn_all.loc[:, ~churn_all.columns.duplicated()]  # drop all the duplicate columns from the churn dataset.

In [None]:
churn_all.head(10)

In [None]:
# Create churn_all table
churn_all.to_sql("churn_all", conn, if_exists="replace", index=False)

In [None]:
churn_all.to_csv("churn_all.csv")