In [1]:
import pandas as pd
import sqlite3
from sqlite3 import Error
pd.options.mode.chained_assignment = None # not to get SettingWithCopyWarning

## Set Variables - Ticker, CIK etc

In [2]:
cik = '0000100493'
ticker = 'TSN'

## Function to use

In [3]:
def write_new_data_into_sqltable(db_path=str, df=pd.DataFrame, statement=str, cik=str):
    # create connection
    connection = None
    try:
        connection = sqlite3.connect(db_path)
        print("Connection to SQLite DB successful!")
    except Error as e:
        print(f"The error '{e}' occurred.")
    
    # extend cik to 10 character
    df['cik'] = [str(cik).zfill(10) for cik in df['cik'].values]
    # keep only one CIK in DataFrame
    df = df.loc[df.cik == cik]
    if len(df) == 0:
        print('CIK filter removed every row.')
    # check CIK is in database
    if cik not in pd.read_sql_query("SELECT DISTINCT cik FROM {}".format(statement), connection).values:
        # get newest report date
        new_updated_date = pd.Timestamp(df['end'].values.max()).normalize() + pd.Timedelta(days=1)
        # create direct input tables with strings
        ingest_df = df[pd.read_sql_query("PRAGMA table_info({});".format(statement), connection).name.values].copy()
        for column in ingest_df.columns:
            ingest_df[column].apply(str)
        print('Creation of input database is successful!')
        # write dataframe into table
        ingest_df.to_sql(
            name=statement,
            con=connection,
            if_exists='append',
            index=False)
        print('Data is written into database!')
        # update main table's update column
        cursor = connection.cursor()
        try:
            cursor.execute("UPDATE main SET updated = '{}' WHERE cik = '{}'".format(new_updated_date, cik))
            connection.commit()
            cursor.close()
            print("Main table's updated column freshed successfully! Cursor closed.")
        except Error as e:
            print(f"The error '{e}' occurred")
    else:
        print('Writing into the database is unsuccessful due to existing CIK.')
    # close connection
    connection.close()
    print('Connection closed!')

In [4]:
def add_extra_data_to_sqltable(db_path=str, df=pd.DataFrame, statement=str, cik=str):
    # create connection
    connection = None
    try:
        connection = sqlite3.connect(db_path)
        print("Connection to SQLite DB successful!")
    except Error as e:
        print(f"The error '{e}' occurred.")
    
    # extend cik to 10 characters
    df['cik'] = [str(cik).zfill(10) for cik in df['cik'].values]
    # check CIK is in main datatable
    if cik in pd.read_sql_query("SELECT DISTINCT cik FROM main", connection)['cik'].values:    
        # get last update date relating to company CIK number
        last_update = pd.Timestamp(pd.read_sql_query("SELECT updated FROM main WHERE cik = '{}'".format(cik), connection)['updated'].values[0])
        # filter input df & keep only new data
        filtered_df = df.loc[df.end > last_update]
        filtered_df.reset_index(inplace=True, drop=True)
        if len(filtered_df) != 0 and str(last_update) not in pd.read_sql_query("SELECT end FROM {} WHERE cik = '{}'".format(statement, cik), connection)['end'].values:
            # create direct input tables with strings
            ingest_df = filtered_df[pd.read_sql_query("PRAGMA table_info({});".format(statement), connection).name.values].copy()
            for column in ingest_df.columns:
                ingest_df[column].apply(str)
            print('Creation of input database is successful!')
            # write dataframe into table
            ingest_df.to_sql(
                name=statement,
                con=connection,
                if_exists='append',
                index=False)
            print('Data is written into database!')
            # update main table's update column
            cursor = connection.cursor()
            try:
                new_updated_date = str(pd.to_datetime('today').normalize())
                cursor.execute("UPDATE main SET updated = '{}' WHERE cik = '{}'".format(new_updated_date, cik))
                connection.commit()
                cursor.close()
                print("Main table's updated column freshed successfully! Cursor closed.")
            except Error as e:
                print(f"The error '{e}' occurred")
        else:
            print('There is NO new data!')
    else:
        print('CIK is not in the main data table! Use another function!')
    # close connection
    connection.close()
    print('Connection closed!')

## 0. Create company row in main table & Load X-CHECKED data

In [5]:
data = pd.read_csv('../sec_report_csv/{}_{}_sec_reports.csv'.format(cik, ticker), parse_dates=['end'])
data.head()

Unnamed: 0,end,form,cik,rev,cost_sale,sale_gen_adm,inpairm,op_income,int_income,int_exp,...,other_ass,tot_ass,short_debt,acc_pay,other_curr_liab,tot_curr_liab,tot_long_debt,other_liab,tot_equity,ticker
0,2011-01-01,10-Q,100493,7615000000,6871000000,246000000,0.0,498000000,3000000,66000000,...,466000000,11190000000,424000000,1212000000,520000000,2645000000,2124000000,520000000,5445000000,TSN
1,2011-04-02,10-Q,100493,8000000000,7467000000,230000000,0.0,303000000,3000000,63000000,...,471000000,11226000000,390000000,1126000000,500000000,2479000000,2105000000,500000000,5649000000,TSN
2,2011-07-02,10-Q,100493,8247000000,7716000000,219000000,0.0,312000000,2000000,58000000,...,461000000,11488000000,362000000,1193000000,457000000,2761000000,2094000000,457000000,5752000000,TSN
3,2011-10-01,10-Q,100493,8404000000,7716000000,219000000,0.0,172000000,2000000,58000000,...,427000000,11071000000,70000000,1264000000,476000000,2374000000,2112000000,476000000,5657000000,TSN
4,2011-10-01,10-K,100493,32266000000,30067000000,906000000,0.0,1289000000,11000000,242000000,...,427000000,11071000000,70000000,1264000000,476000000,2374000000,2112000000,476000000,5657000000,TSN


## 1. Write new data into SQL table

In [6]:
for statement in ['income_statement', 'balance_sheet']:
    print('Strrting to fill {} table in SQL database!'.format(statement))
    write_new_data_into_sqltable(
        db_path='../database/statements.db',
        df=data,
        statement=statement,
        cik=cik
    )

Strrting to fill income_statement table in SQL database!
Connection to SQLite DB successful!
Writing into the database is unsuccessful due to existing CIK.
Connection closed!
Strrting to fill balance_sheet table in SQL database!
Connection to SQLite DB successful!
Writing into the database is unsuccessful due to existing CIK.
Connection closed!


## 2. Update Existing company with newer data

In [7]:
for statement in ['income_statement', 'balance_sheet']:
    print('Starting to update {} table in SQL database!'.format(statement))
    add_extra_data_to_sqltable(
        db_path='../database/statements.db',
        df=data,
        statement=statement,
        cik=cik
    )

Starting to update income_statement table in SQL database!
Connection to SQLite DB successful!
Creation of input database is successful!
Data is written into database!
Main table's updated column freshed successfully! Cursor closed.
Connection closed!
Starting to update balance_sheet table in SQL database!
Connection to SQLite DB successful!
There is NO new data!
Connection closed!
