In [49]:
# Set autoreload
%load_ext autoreload
%autoreload 2

import os
import sys
import pandas as pd
import hashlib
sys.path.append('../../../app/src')
from config import config
import sqlite3

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Extract financial data

In [50]:
financial_advisor_clients_raw = pd.read_csv(config.RAW_DATA_PATH / 'financial_advisor_clients.csv')
financial_advisor_clients = financial_advisor_clients_raw.copy()
financial_advisor_clients.head(2)

Unnamed: 0,Client,Symbol,Name,Sector,Quantity,Buy Price,Current Price,Market Value,Purchase Date,Dividend Yield,P/E Ratio,52-Week High,52-Week Low,Analyst Rating,Target Price,Risk Level
0,Client_1,DIA,SPDR Dow Jones Industrial Average ETF,ETF,135.0,264.89,574.42,77546.7,12/11/21,1.61,19.2,674.71,493.6,Hold,660.28,Medium
1,Client_1,GOOGL,Alphabet Inc.,Communication Services,45.0,93.66,1062.91,47830.95,11/23/21,2.62,23.06,1205.77,866.81,Hold,1077.25,Medium


In [51]:
client_regex = r'^Client_\d+$'

# Function to validate which values for column Client meet the clien_regex
def validate_client_values(df, column, regex):
    return df[column].str.match(regex)

In [52]:
financial_advisor_clients['Client_validation'] = \
    validate_client_values(
        financial_advisor_clients, 
        'Client', 
        client_regex
    )
financial_advisor_clients['Client_Id'] = financial_advisor_clients['Client'].apply(
    lambda x: hashlib.md5(x.encode()).hexdigest()
)

In [53]:
financial_advisor_clients.query("Client_validation == False")

Unnamed: 0,Client,Symbol,Name,Sector,Quantity,Buy Price,Current Price,Market Value,Purchase Date,Dividend Yield,P/E Ratio,52-Week High,52-Week Low,Analyst Rating,Target Price,Risk Level,Client_validation,Client_Id
16,Cliunt_2,IVV,iShares Core S&P 500 ETF,ETF,181.0,307.27,1118.16,202386.96,6/15/21,3.64,19.76,1216.79,743.74,Buy,1112.25,Low,False,c0ec1444ed1113cc54ddae7abc8948ce
26,Clpent_2,VOO,Vanguard S&P 500 ETF,ETF,148.0,86.33,1141.92,169004.16,8/3/21,4.46,23.46,1362.35,1026.14,Sell,1180.02,Low,False,050b31cb4dbc8973b0e699cd563efad0
41,Cliint_3,FB,Meta Platforms Inc.,Communication Services,114.0,81.63,1010.69,115218.66,11/26/21,4.75,28.67,1174.00,840.74,Buy,925.63,Low,False,928f8ceb39f1f275e8ced4ca22f57b92
94,Clieat_7,VOO,Vanguard S&P 500 ETF,ETF,133.0,397.85,225.94,30050.02,1/17/21,1.58,25.73,267.94,188.73,Buy,240.24,,False,31bc8dfa4964ed966adef777ba9b0c77
114,rlient_8,VOO,Vanguard S&P 500 ETF,ETF,58.0,1189.99,745.55,43241.90,10/21/21,0.20,6.66,797.92,530.83,Buy,704.26,Medium,False,e513d5d80b441e5df8cb360225d6e24e
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
788,Cliens_13,FB,Meta Platforms Inc.,Communication Services,16.0,1215.99,572.46,9159.36,2/10/21,0.63,25.09,840.77,395.37,Sell,582.26,High,False,10b9a0633da427b6156a4a6bb29e566c
793,Cjient_23,MSFT,Microsoft Corp.,Technology,71.0,887.03,323.24,22950.04,4/24/21,2.32,22.43,390.47,305.32,Buy,331.19,Low,False,114e7038c47808fa4a031210f37a58d8
815,Clieno_32,GOOGL,Alphabet Inc.,Communication Services,130.0,1031.43,528.15,68659.50,4/21/21,2.29,18.04,736.26,411.87,Hold,537.85,Medium,False,b7eba51adaa40a53268aefbd82475865
818,Cbient_40,TSLA,Tesla Inc.,Consumer Discretionary,75.0,671.74,667.52,50064.00,10/18/21,2.95,29.29,977.61,505.53,Buy,776.80,Medium,False,f1fabfeca2aa08020a5d24764ab03d86


## Extract client allocation

In [54]:
client_target_allocations_raw = pd.read_csv(config.RAW_DATA_PATH / 'client_target_allocations.csv')
client_target_allocations = client_target_allocations_raw.copy()
client_target_allocations.head(2)

Unnamed: 0,Client,Target Portfolio,Asset Class,Target Allocation (%)
0,,Balanced,Stocks,50.0
1,Client_1,Balanced,Bonds,30.0


In [55]:
client_target_allocations['Asset Class'].unique()

array(['Stocks', 'Bonds', 'ETFs', 'Cash', nan], dtype=object)

In [56]:
client_target_allocations['Target Portfolio_validation'] = \
    client_target_allocations['Target Portfolio']\
    .apply(lambda x: x in ['Balanced', 'Growth', 'Aggressive Growth', 'Conservative'])

client_target_allocations['Client_validation'] = \
    validate_client_values(
        client_target_allocations, 
        'Client', 
        client_regex
    )

client_target_allocations['Asset Class_validation'] = \
    client_target_allocations['Asset Class']\
    .apply(lambda x: x in ['Stocks', 'Bonds', 'ETFs', 'Cash'])

## Load data

In [57]:
financial_advisor_clients_valid_data = financial_advisor_clients.query("Client_validation == True")
client_target_allocations_valid_data = client_target_allocations.query("""
    `Target Portfolio_validation` == True \
    and Client_validation == True \
    and `Asset Class_validation` == True"""
)

In [58]:
# Create a connection to a file-based SQLite database
with sqlite3.connect(config.ARTIFACTS_PATH / 'sql_database.db') as con_file:
    financial_advisor_clients_valid_data\
        .to_sql('financial_advisor_clients', con_file, index=False, if_exists='replace')
    
    client_target_allocations_valid_data\
        .to_sql('client_target_allocations', con_file, index=False, if_exists='replace')