In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *
import pandas as pd
#col, from_utc_timestamp,upper,lower,initcap,concat_ws,regexp_replace,isnan,when,count,coalesce

In [2]:
spark=SparkSession.builder.master('local[1]').appName('Credit Card Management System').getOrCreate()

### DATA WRANGLING

Cleaning file 'cdw_sapp_custmer.json'

In [3]:
df_cust=spark.read.json('cdw_sapp_custmer.json')
df_cust.show(4)

+------+----------------+------------+-------------+-------------------+----------+----------+--------+----------+---------+--------------------+-----------+---------+-----------------+
|APT_NO|  CREDIT_CARD_NO|   CUST_CITY| CUST_COUNTRY|         CUST_EMAIL|CUST_PHONE|CUST_STATE|CUST_ZIP|FIRST_NAME|LAST_NAME|        LAST_UPDATED|MIDDLE_NAME|      SSN|      STREET_NAME|
+------+----------------+------------+-------------+-------------------+----------+----------+--------+----------+---------+--------------------+-----------+---------+-----------------+
|   656|4210653310061055|     Natchez|United States|AHooper@example.com|   1237818|        MS|   39120|      Alec|   Hooper|2018-04-21T12:49:...|         Wm|123456100|Main Street North|
|   829|4210653310102868|Wethersfield|United States|EHolman@example.com|   1238933|        CT|   06109|      Etta|   Holman|2018-04-21T12:49:...|    Brendan|123453023|    Redwood Drive|
|   683|4210653310116272|     Huntley|United States|WDunham@example.co

In [4]:
#checking for duplicates
df_cust.groupBy(df_cust.columns).count().where('count>1').show()
(df_cust.distinct().count())==(df_cust.count())

+------+--------------+---------+------------+----------+----------+----------+--------+----------+---------+------------+-----------+---+-----------+-----+
|APT_NO|CREDIT_CARD_NO|CUST_CITY|CUST_COUNTRY|CUST_EMAIL|CUST_PHONE|CUST_STATE|CUST_ZIP|FIRST_NAME|LAST_NAME|LAST_UPDATED|MIDDLE_NAME|SSN|STREET_NAME|count|
+------+--------------+---------+------------+----------+----------+----------+--------+----------+---------+------------+-----------+---+-----------+-----+
+------+--------------+---------+------------+----------+----------+----------+--------+----------+---------+------------+-----------+---+-----------+-----+



True

In [5]:
def check_nulls(df):
    navalues=df.select([
    (
        count(when((isnan(c) | col(c).isNull()), c)) if t not in ("timestamp", "date")
        else count(when(col(c).isNull(), c))
    ).alias(c)
    for c, t in df.dtypes if c in df.columns])
    return navalues

check_nulls(df_cust).show()

+------+--------------+---------+------------+----------+----------+----------+--------+----------+---------+------------+-----------+---+-----------+
|APT_NO|CREDIT_CARD_NO|CUST_CITY|CUST_COUNTRY|CUST_EMAIL|CUST_PHONE|CUST_STATE|CUST_ZIP|FIRST_NAME|LAST_NAME|LAST_UPDATED|MIDDLE_NAME|SSN|STREET_NAME|
+------+--------------+---------+------------+----------+----------+----------+--------+----------+---------+------------+-----------+---+-----------+
|     0|             0|        0|           0|         0|         0|         0|       0|         0|        0|           0|          0|  0|          0|
+------+--------------+---------+------------+----------+----------+----------+--------+----------+---------+------------+-----------+---+-----------+



In [6]:
df_cust=df_cust.withColumn('SSN',col('SSN').cast('int'))
df_cust=df_cust.withColumn('CUST_ZIP',col('CUST_ZIP').cast('int'))
df_cust=df_cust.withColumn('LAST_UPDATED',to_timestamp('LAST_UPDATED'))
df_cust=df_cust.withColumn('FIRST_NAME',initcap(col('FIRST_NAME')))
df_cust=df_cust.withColumn('MIDDLE_NAME',lower('MIDDLE_NAME'))
df_cust=df_cust.withColumn('LAST_NAME',initcap(col('LAST_NAME')))
df_cust=df_cust.withColumn('FULL_STREET_ADDRESS',concat_ws(',',df_cust.APT_NO,df_cust.STREET_NAME))
df_cust=df_cust.withColumn('CUST_PHONE',regexp_replace(rpad(col('CUST_PHONE'),10,'0'),r'^(\d{3})(\d{3})(\d{4})$','($1)$2-$3'))

df_cust.printSchema()

root
 |-- APT_NO: string (nullable = true)
 |-- CREDIT_CARD_NO: string (nullable = true)
 |-- CUST_CITY: string (nullable = true)
 |-- CUST_COUNTRY: string (nullable = true)
 |-- CUST_EMAIL: string (nullable = true)
 |-- CUST_PHONE: string (nullable = true)
 |-- CUST_STATE: string (nullable = true)
 |-- CUST_ZIP: integer (nullable = true)
 |-- FIRST_NAME: string (nullable = true)
 |-- LAST_NAME: string (nullable = true)
 |-- LAST_UPDATED: timestamp (nullable = true)
 |-- MIDDLE_NAME: string (nullable = true)
 |-- SSN: integer (nullable = true)
 |-- STREET_NAME: string (nullable = true)
 |-- FULL_STREET_ADDRESS: string (nullable = false)



In [7]:
df_cust.show(2)

+------+----------------+------------+-------------+-------------------+-------------+----------+--------+----------+---------+-------------------+-----------+---------+-----------------+--------------------+
|APT_NO|  CREDIT_CARD_NO|   CUST_CITY| CUST_COUNTRY|         CUST_EMAIL|   CUST_PHONE|CUST_STATE|CUST_ZIP|FIRST_NAME|LAST_NAME|       LAST_UPDATED|MIDDLE_NAME|      SSN|      STREET_NAME| FULL_STREET_ADDRESS|
+------+----------------+------------+-------------+-------------------+-------------+----------+--------+----------+---------+-------------------+-----------+---------+-----------------+--------------------+
|   656|4210653310061055|     Natchez|United States|AHooper@example.com|(123)781-8000|        MS|   39120|      Alec|   Hooper|2018-04-21 12:49:02|         wm|123456100|Main Street North|656,Main Street N...|
|   829|4210653310102868|Wethersfield|United States|EHolman@example.com|(123)893-3000|        CT|    6109|      Etta|   Holman|2018-04-21 12:49:02|    brendan|12345

In [8]:
df_cust=df_cust.drop(*['APT_NO','STREET_NAME'])

In [9]:
df_cust.columns

['CREDIT_CARD_NO',
 'CUST_CITY',
 'CUST_COUNTRY',
 'CUST_EMAIL',
 'CUST_PHONE',
 'CUST_STATE',
 'CUST_ZIP',
 'FIRST_NAME',
 'LAST_NAME',
 'LAST_UPDATED',
 'MIDDLE_NAME',
 'SSN',
 'FULL_STREET_ADDRESS']

In [10]:
col_order=['SSN','FIRST_NAME','MIDDLE_NAME','LAST_NAME','FULL_STREET_ADDRESS','CUST_CITY','CUST_STATE',
 'CUST_COUNTRY','CUST_ZIP','CUST_PHONE','CUST_EMAIL','LAST_UPDATED']
df_cust=df_cust.select(*col_order)

In [11]:
df_cust.show(truncate=False)

+---------+----------+-----------+---------+---------------------+------------+----------+-------------+--------+-------------+----------------------+-------------------+
|SSN      |FIRST_NAME|MIDDLE_NAME|LAST_NAME|FULL_STREET_ADDRESS  |CUST_CITY   |CUST_STATE|CUST_COUNTRY |CUST_ZIP|CUST_PHONE   |CUST_EMAIL            |LAST_UPDATED       |
+---------+----------+-----------+---------+---------------------+------------+----------+-------------+--------+-------------+----------------------+-------------------+
|123456100|Alec      |wm         |Hooper   |656,Main Street North|Natchez     |MS        |United States|39120   |(123)781-8000|AHooper@example.com   |2018-04-21 12:49:02|
|123453023|Etta      |brendan    |Holman   |829,Redwood Drive    |Wethersfield|CT        |United States|6109    |(123)893-3000|EHolman@example.com   |2018-04-21 12:49:02|
|123454487|Wilber    |ezequiel   |Dunham   |683,12th Street East |Huntley     |IL        |United States|60142   |(124)301-8000|WDunham@example.co

In [12]:
#pip install mysql-connector-python

In [13]:
from mysql.connector import Error
#Write to MySQL Table
def write_to_db(df,table_name):
    try:
        df.write.format("jdbc")\
        .option("driver","com.mysql.cj.jdbc.Driver")\
        .option("url", "jdbc:mysql://localhost:3306/creditcard_capstone")\
        .option("dbtable", table_name)\
        .option("user", "root")\
        .option("password", "admin")\
        .save()
        print(f'{table_name} table is created')
    except Exception as e:
        print('Error while connecting to MySQL',e)


In [14]:
write_to_db(df_cust,'CDW_SAPP_CUSTOMER')

Error while connecting to MySQL Table or view 'CDW_SAPP_CUSTOMER' already exists. SaveMode: ErrorIfExists.


Cleaning file 'cdw_sapp_branch.json'

In [15]:
df_branch=spark.read.json('cdw_sapp_branch.json')
check_nulls(df_branch).show()

+-----------+-----------+-----------+------------+------------+-------------+----------+------------+
|BRANCH_CITY|BRANCH_CODE|BRANCH_NAME|BRANCH_PHONE|BRANCH_STATE|BRANCH_STREET|BRANCH_ZIP|LAST_UPDATED|
+-----------+-----------+-----------+------------+------------+-------------+----------+------------+
|          0|          0|          0|           0|           0|            0|         0|           0|
+-----------+-----------+-----------+------------+------------+-------------+----------+------------+



In [16]:
#checking for duplicates
df_branch.groupBy(df_branch.columns).count().where('count>1').show()
(df_branch.distinct().count())==(df_branch.count())

+-----------+-----------+-----------+------------+------------+-------------+----------+------------+-----+
|BRANCH_CITY|BRANCH_CODE|BRANCH_NAME|BRANCH_PHONE|BRANCH_STATE|BRANCH_STREET|BRANCH_ZIP|LAST_UPDATED|count|
+-----------+-----------+-----------+------------+------------+-------------+----------+------------+-----+
+-----------+-----------+-----------+------------+------------+-------------+----------+------------+-----+



True

In [17]:
df_branch.show(3)

+-----------------+-----------+------------+------------+------------+-----------------+----------+--------------------+
|      BRANCH_CITY|BRANCH_CODE| BRANCH_NAME|BRANCH_PHONE|BRANCH_STATE|    BRANCH_STREET|BRANCH_ZIP|        LAST_UPDATED|
+-----------------+-----------+------------+------------+------------+-----------------+----------+--------------------+
|        Lakeville|          1|Example Bank|  1234565276|          MN|     Bridle Court|     55044|2018-04-18T16:51:...|
|          Huntley|          2|Example Bank|  1234618993|          IL|Washington Street|     60142|2018-04-18T16:51:...|
|SouthRichmondHill|          3|Example Bank|  1234985926|          NY|    Warren Street|     11419|2018-04-18T16:51:...|
+-----------------+-----------+------------+------------+------------+-----------------+----------+--------------------+
only showing top 3 rows



In [18]:
df_branch.printSchema()

root
 |-- BRANCH_CITY: string (nullable = true)
 |-- BRANCH_CODE: long (nullable = true)
 |-- BRANCH_NAME: string (nullable = true)
 |-- BRANCH_PHONE: string (nullable = true)
 |-- BRANCH_STATE: string (nullable = true)
 |-- BRANCH_STREET: string (nullable = true)
 |-- BRANCH_ZIP: long (nullable = true)
 |-- LAST_UPDATED: string (nullable = true)



In [19]:
cols=['BRANCH_CODE','BRANCH_ZIP']
for col_name in cols:
    df_branch=df_branch.withColumn(col_name,col(col_name).cast('int'))
df_branch=df_branch.withColumn('BRANCH_ZIP',coalesce(col('BRANCH_ZIP'),lit(999999)))
df_branch=df_branch.withColumn('BRANCH_PHONE',regexp_replace(col('BRANCH_PHONE'),'^(\d{3})(\d{3})(\d{4})$','($1)$2-$3'))
df_branch=df_branch.withColumn('LAST_UPDATED',to_timestamp('LAST_UPDATED'))

In [20]:
df_branch.show(3)

+-----------------+-----------+------------+-------------+------------+-----------------+----------+-------------------+
|      BRANCH_CITY|BRANCH_CODE| BRANCH_NAME| BRANCH_PHONE|BRANCH_STATE|    BRANCH_STREET|BRANCH_ZIP|       LAST_UPDATED|
+-----------------+-----------+------------+-------------+------------+-----------------+----------+-------------------+
|        Lakeville|          1|Example Bank|(123)456-5276|          MN|     Bridle Court|     55044|2018-04-18 16:51:47|
|          Huntley|          2|Example Bank|(123)461-8993|          IL|Washington Street|     60142|2018-04-18 16:51:47|
|SouthRichmondHill|          3|Example Bank|(123)498-5926|          NY|    Warren Street|     11419|2018-04-18 16:51:47|
+-----------------+-----------+------------+-------------+------------+-----------------+----------+-------------------+
only showing top 3 rows



In [21]:
col_order=['BRANCH_CODE','BRANCH_NAME','BRANCH_STREET','BRANCH_CITY','BRANCH_STATE','BRANCH_ZIP','BRANCH_PHONE','LAST_UPDATED']
df_branch=df_branch.select(*col_order)
df_branch.show(3)

+-----------+------------+-----------------+-----------------+------------+----------+-------------+-------------------+
|BRANCH_CODE| BRANCH_NAME|    BRANCH_STREET|      BRANCH_CITY|BRANCH_STATE|BRANCH_ZIP| BRANCH_PHONE|       LAST_UPDATED|
+-----------+------------+-----------------+-----------------+------------+----------+-------------+-------------------+
|          1|Example Bank|     Bridle Court|        Lakeville|          MN|     55044|(123)456-5276|2018-04-18 16:51:47|
|          2|Example Bank|Washington Street|          Huntley|          IL|     60142|(123)461-8993|2018-04-18 16:51:47|
|          3|Example Bank|    Warren Street|SouthRichmondHill|          NY|     11419|(123)498-5926|2018-04-18 16:51:47|
+-----------+------------+-----------------+-----------------+------------+----------+-------------+-------------------+
only showing top 3 rows



In [22]:
write_to_db(df_branch,'CDW_SAPP_BRANCH')

Error while connecting to MySQL Table or view 'CDW_SAPP_BRANCH' already exists. SaveMode: ErrorIfExists.


Cleaning file 'cdw_sapp_credit.json'

In [23]:
df_credit=spark.read.json('cdw_sapp_credit.json')
check_nulls(df_credit).show()

+-----------+--------------+--------+---+-----+--------------+----------------+-----------------+----+
|BRANCH_CODE|CREDIT_CARD_NO|CUST_SSN|DAY|MONTH|TRANSACTION_ID|TRANSACTION_TYPE|TRANSACTION_VALUE|YEAR|
+-----------+--------------+--------+---+-----+--------------+----------------+-----------------+----+
|          0|             0|       0|  0|    0|             0|               0|                0|   0|
+-----------+--------------+--------+---+-----+--------------+----------------+-----------------+----+



In [24]:
df_credit.show(3)

+-----------+----------------+---------+---+-----+--------------+----------------+-----------------+----+
|BRANCH_CODE|  CREDIT_CARD_NO| CUST_SSN|DAY|MONTH|TRANSACTION_ID|TRANSACTION_TYPE|TRANSACTION_VALUE|YEAR|
+-----------+----------------+---------+---+-----+--------------+----------------+-----------------+----+
|        114|4210653349028689|123459988| 14|    2|             1|       Education|             78.9|2018|
|         35|4210653349028689|123459988| 20|    3|             2|   Entertainment|            14.24|2018|
|        160|4210653349028689|123459988|  8|    7|             3|         Grocery|             56.7|2018|
+-----------+----------------+---------+---+-----+--------------+----------------+-----------------+----+
only showing top 3 rows



In [25]:
#checking for duplicates
print(df_credit.distinct().count()==df_credit.count())
df_credit.groupBy(df_credit.columns).count().where('count>1').show()

True
+-----------+--------------+--------+---+-----+--------------+----------------+-----------------+----+-----+
|BRANCH_CODE|CREDIT_CARD_NO|CUST_SSN|DAY|MONTH|TRANSACTION_ID|TRANSACTION_TYPE|TRANSACTION_VALUE|YEAR|count|
+-----------+--------------+--------+---+-----+--------------+----------------+-----------------+----+-----+
+-----------+--------------+--------+---+-----+--------------+----------------+-----------------+----+-----+



In [26]:
df_credit.printSchema()

root
 |-- BRANCH_CODE: long (nullable = true)
 |-- CREDIT_CARD_NO: string (nullable = true)
 |-- CUST_SSN: long (nullable = true)
 |-- DAY: long (nullable = true)
 |-- MONTH: long (nullable = true)
 |-- TRANSACTION_ID: long (nullable = true)
 |-- TRANSACTION_TYPE: string (nullable = true)
 |-- TRANSACTION_VALUE: double (nullable = true)
 |-- YEAR: long (nullable = true)



In [27]:
# Convert day, month, and year to timeid
df_credit = df_credit.withColumn("TIMEID",concat(df_credit["year"].cast("string"),
        lpad(df_credit["month"].cast("string"), 2, "0"),lpad(df_credit["day"].cast("string"), 2, "0")))
col_name=['CUST_SSN','BRANCH_CODE','TRANSACTION_ID','TIMEID']
for cols in col_name:
    df_credit=df_credit.withColumn(cols,col(cols).cast('int'))
df_credit=df_credit.withColumnRenamed("credit_card_no", "CUST_CC_NO")
# df_credit = df_credit.withColumn("TIMEID", expr("make_date(year, month, day)"))
df_credit=df_credit.drop(*['DAY','MONTH','YEAR'])

In [28]:
col_order=['TRANSACTION_ID','CUST_CC_NO','TRANSACTION_TYPE','TRANSACTION_VALUE','BRANCH_CODE','CUST_SSN','TIMEID']
df_credit=df_credit.select(*col_order)
df_credit.show()

+--------------+----------------+----------------+-----------------+-----------+---------+--------+
|TRANSACTION_ID|      CUST_CC_NO|TRANSACTION_TYPE|TRANSACTION_VALUE|BRANCH_CODE| CUST_SSN|  TIMEID|
+--------------+----------------+----------------+-----------------+-----------+---------+--------+
|             1|4210653349028689|       Education|             78.9|        114|123459988|20180214|
|             2|4210653349028689|   Entertainment|            14.24|         35|123459988|20180320|
|             3|4210653349028689|         Grocery|             56.7|        160|123459988|20180708|
|             4|4210653349028689|   Entertainment|            59.73|        114|123459988|20180419|
|             5|4210653349028689|             Gas|             3.59|         93|123459988|20181010|
|             6|4210653349028689|       Education|             6.89|        164|123459988|20180528|
|             7|4210653349028689|   Entertainment|            43.39|        119|123459988|20180519|


In [29]:
write_to_db(df_credit,'CDW_SAPP_CREDIT_CARD')

Error while connecting to MySQL Table or view 'CDW_SAPP_CREDIT_CARD' already exists. SaveMode: ErrorIfExists.


https://raw.githubusercontent.com/platformps/LoanDataset/main/loan_data.json

In [30]:
import requests
url = 'https://raw.githubusercontent.com/platformps/LoanDataset/main/loan_data.json'
resp = requests.get(url).json()
df_loan=pd.DataFrame(columns=['Application_ID', 'Gender',
 'Married','Dependents','Education','Self_Employed',
 'Credit_History','Property_Area','Income','Application_Status'])

In [31]:
for i in range(len(resp)):
    tempDf=pd.DataFrame([{'Application_ID':resp[i]['Application_ID'],
                  'Gender':resp[i]['Gender'],
                  'Married':resp[i]['Married'],
                  'Dependents':resp[i]['Dependents'],
                  'Education':resp[i]['Education'],
                  'Self_Employed':resp[i]['Self_Employed'],
                  'Credit_History':resp[i]['Credit_History'],
                  'Property_Area':resp[i]['Property_Area'],
                  'Income':resp[i]['Income'],
                  'Application_Status':resp[i]['Application_Status']}])
    df_loan=pd.concat([df_loan,tempDf],ignore_index=True )

In [32]:
df_loan.head()

Unnamed: 0,Application_ID,Gender,Married,Dependents,Education,Self_Employed,Credit_History,Property_Area,Income,Application_Status
0,LP001002,Male,No,0,Graduate,No,1,Urban,medium,Y
1,LP001003,Male,Yes,1,Graduate,No,1,Rural,medium,N
2,LP001005,Male,Yes,0,Graduate,Yes,1,Urban,low,Y
3,LP001006,Male,Yes,0,Not Graduate,No,1,Urban,low,Y
4,LP001008,Male,No,0,Graduate,No,1,Urban,medium,Y


In [33]:
df_loan.shape

(511, 10)

In [34]:
df_loan.isnull().sum()

Application_ID        0
Gender                0
Married               0
Dependents            0
Education             0
Self_Employed         0
Credit_History        0
Property_Area         0
Income                0
Application_Status    0
dtype: int64

In [35]:
df_loan[df_loan.duplicated()]

Unnamed: 0,Application_ID,Gender,Married,Dependents,Education,Self_Employed,Credit_History,Property_Area,Income,Application_Status


In [36]:
df_loan.to_csv('Loan_Data.csv',index=False)
df_loan=pd.read_csv('Loan_Data.csv')
df_loan.head()

Unnamed: 0,Application_ID,Gender,Married,Dependents,Education,Self_Employed,Credit_History,Property_Area,Income,Application_Status
0,LP001002,Male,No,0,Graduate,No,1,Urban,medium,Y
1,LP001003,Male,Yes,1,Graduate,No,1,Rural,medium,N
2,LP001005,Male,Yes,0,Graduate,Yes,1,Urban,low,Y
3,LP001006,Male,Yes,0,Not Graduate,No,1,Urban,low,Y
4,LP001008,Male,No,0,Graduate,No,1,Urban,medium,Y


In [37]:
schema=StructType([
    StructField('Application_ID',StringType()),
    StructField('Gender',StringType()),
    StructField('Married',StringType()),
    StructField('Dependents',StringType()),
    StructField('Education',StringType()),
    StructField('Self_Employed',StringType()),
    StructField('Credit_History',IntegerType()),
    StructField('Property_Area',StringType()),
    StructField('Income',StringType()),
    StructField('Application_Status',StringType())
])
spark_df=spark.createDataFrame(df_loan,schema=schema)
write_to_db(spark_df,'CDW_SAPP_loan_application')

Error while connecting to MySQL Table or view 'CDW_SAPP_loan_application' already exists. SaveMode: ErrorIfExists.


In [38]:
# ALTER TABLE cdw_sapp_customer ADD PRIMARY KEY (SSN);
# ALTER TABLE cdw_sapp_branch ADD PRIMARY KEY (BRANCH_CODE);
# ALTER TABLE cdw_sapp_credit_card ADD PRIMARY KEY (TRANSACTION_ID);
# ALTER TABLE Date_Dim ADD PRIMARY KEY (Timeid);

# ALTER TABLE `cdw_sapp_credit_card` ADD FOREIGN KEY (CUST_SSN) REFERENCES cdw_sapp_customer(SSN);
# ALTER TABLE `cdw_sapp_credit_card` ADD FOREIGN KEY (BRANCH_CODE) REFERENCES cdw_sapp_branch(BRANCH_CODE);
# ALTER TABLE `cdw_sapp_credit_card` ADD FOREIGN KEY (TIMEID) REFERENCES Date_Dim(Timeid);

### Creating Date Dimension table

In [39]:
df_date=spark.read.format("csv")\
        .option('header','true').option('inferSchema','true')\
        .load('Date_Dim.csv')

df_date.printSchema()

root
 |-- Date_Id: integer (nullable = true)
 |-- calender_date: timestamp (nullable = true)
 |-- month_no: integer (nullable = true)
 |-- month_name: string (nullable = true)
 |-- day_of_month: integer (nullable = true)
 |-- week_day: string (nullable = true)



In [40]:
df_date=df_date.withColumn('calender_date',col('calender_date').cast('date'))
df_date.show()

+--------+-------------+--------+----------+------------+---------+
| Date_Id|calender_date|month_no|month_name|day_of_month| week_day|
+--------+-------------+--------+----------+------------+---------+
|20180101|   2018-01-01|       1|       Jan|           1|   Monday|
|20180102|   2018-01-02|       1|       Jan|           2|  Tuesday|
|20180103|   2018-01-03|       1|       Jan|           3|Wednesday|
|20180104|   2018-01-04|       1|       Jan|           4| Thursday|
|20180105|   2018-01-05|       1|       Jan|           5|   Friday|
|20180106|   2018-01-06|       1|       Jan|           6| Saturday|
|20180107|   2018-01-07|       1|       Jan|           7|   Sunday|
|20180108|   2018-01-08|       1|       Jan|           8|   Monday|
|20180109|   2018-01-09|       1|       Jan|           9|  Tuesday|
|20180110|   2018-01-10|       1|       Jan|          10|Wednesday|
|20180111|   2018-01-11|       1|       Jan|          11| Thursday|
|20180112|   2018-01-12|       1|       Jan|    

In [41]:
write_to_db(df_date,'Date_Dim')

Error while connecting to MySQL Table or view 'Date_Dim' already exists. SaveMode: ErrorIfExists.
