In [1]:
import os
import pyodbc
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
cnxn = pyodbc.connect('Driver={SQL Server Native Client 11.0};\
                       Server=lasr-sqldb-prd-im,17001;\
                       Database=IM_S_FAMIS_S;\
                       Trusted_Connection=yes;')

In [3]:
sql_query = """
select
FA.AsOfDate,
FA.ValueDecimal,
V.VehicleID as v_VehicleID,
VC.VehicleClassID as vc_VehicleClassID,
V.AccountNumber as v_AccountNumber,
VC.CUSIP as vc_CUSIP,
VC.QuotronSymbol as vc_QuotronSymbol,
V.VehicleTypeName as v_VehicleTypeName,
V.AbbreviatedName as v_AbbreviatedName,
VC.AbbreviatedName as vc_AbbreviatedName,
FT.FactTypeCode as ft_FactTypeCode,
FT.Name as ft_Name,
FST.Name as fst_Name
from MSS_S_FAMIS_S.dbo.FAMIS_MART_FactExt_CURRENT as FA
  inner join
     MSS_S_FAMIS_S.dbo.FAMIS_MART_FactTypeExt_CURRENT as FT
  on FA.FactTypeID=FT.FactTypeID
  left join
	MSS_S_FAMIS_S.dbo.FAMIS_MART_FactSubTypeExt_Current as FST
  on FA.FactSubTypeID=FST.FactSubTypeID
  inner join
     MSS_S_FAMIS_S.dbo.FAMIS_MART_VehicleExt_CURRENT as V
  on FA.VehicleID = V.VehicleID
  LEFT join 
     MSS_S_FAMIS_S.dbo.FAMIS_MART_VehicleClassExt_CURRENT as VC
  on FA.VehicleClassID = VC.VehicleClassID
where FT.FactTypeCode in ('ALPHA','BETA','CORR_ACWI','CORR_SNP','ARRMOP','ARRNAV','TRMOP','TRNAV','RSGA','RSG','ILAF','RILC','ILGVA','ILOF','RSNFA','RSNF','RNSA','RSA','FR','RSQ','SRAT','SRILAF','SDF','SDILAF')
"""

# Save the data 
data = pd.read_sql(sql_query, cnxn)

# Close the connection with LASR
#THIS IS IMPORTANT ALWAYS DO THIS, ESPECIALLY IF CONNECTING TO PROD
cnxn.close()

In [4]:
print(data.shape)
data.head()

(12977852, 13)


Unnamed: 0,AsOfDate,ValueDecimal,v_VehicleID,vc_VehicleClassID,v_AccountNumber,vc_CUSIP,vc_QuotronSymbol,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,ft_Name,fst_Name
0,2016-10-31,0.1383,25488,33462.0,,72201M586,PCKPX,Outside Fund,,,ARRNAV,Returns: Average Annual (NAV),5 Years
1,2018-07-31,-0.011431,10031,10542.0,11000043.0,532726 30 4,LTXCX,American Fund,LTEX,LTEX-C,TRMOP,Returns: Cumulative (MOP),Year to Date
2,2017-03-31,0.047801,10037,10644.0,11000064.0,02631C 84 1,BBDTX,American Fund,AFTD25,AFTD25-B,ARRMOP,Returns: Average Annual (MOP or w/ CDSC),Lifetime
3,2014-11-30,0.0091,27876,34143.0,,38145L646,GDIAX,Outside Fund,,,ARRNAV,Returns: Average Annual (NAV),1 Year
4,2016-11-30,0.1312,25361,12031.0,,641224134,NVAAX,Outside Fund,,,ARRNAV,Returns: Average Annual (NAV),5 Years


In [5]:
data.v_VehicleTypeName.unique()

array(['Outside Fund', 'American Fund', 'Benchmark', 'AFIS Fund',
       'CIAM Fund', 'PCS Fund', 'Composite'], dtype=object)

In [6]:
data['postdt_new'] = data['AsOfDate'].apply(lambda dt: dt.replace(day=1))
data.head()

Unnamed: 0,AsOfDate,ValueDecimal,v_VehicleID,vc_VehicleClassID,v_AccountNumber,vc_CUSIP,vc_QuotronSymbol,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,ft_Name,fst_Name,postdt_new
0,2016-10-31,0.1383,25488,33462.0,,72201M586,PCKPX,Outside Fund,,,ARRNAV,Returns: Average Annual (NAV),5 Years,2016-10-01
1,2018-07-31,-0.011431,10031,10542.0,11000043.0,532726 30 4,LTXCX,American Fund,LTEX,LTEX-C,TRMOP,Returns: Cumulative (MOP),Year to Date,2018-07-01
2,2017-03-31,0.047801,10037,10644.0,11000064.0,02631C 84 1,BBDTX,American Fund,AFTD25,AFTD25-B,ARRMOP,Returns: Average Annual (MOP or w/ CDSC),Lifetime,2017-03-01
3,2014-11-30,0.0091,27876,34143.0,,38145L646,GDIAX,Outside Fund,,,ARRNAV,Returns: Average Annual (NAV),1 Year,2014-11-01
4,2016-11-30,0.1312,25361,12031.0,,641224134,NVAAX,Outside Fund,,,ARRNAV,Returns: Average Annual (NAV),5 Years,2016-11-01


In [30]:
data['dummy'] = 1
data['v_AccountNumber'].fillna('NA')
print(data.shape)
data.head()

(12977852, 15)


Unnamed: 0,AsOfDate,ValueDecimal,v_VehicleID,vc_VehicleClassID,v_AccountNumber,vc_CUSIP,vc_QuotronSymbol,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,ft_Name,fst_Name,postdt_new,dummy
0,2014-08-31,-0.035,22980,14381.0,,31618H663,FLFTX,Outside Fund,,,ARRNAV,Returns: Average Annual (NAV),Lifetime,2014-08-01,1
1,2017-12-31,0.030565,10031,10542.0,11000043.0,532726 30 4,LTXCX,American Fund,LTEX,LTEX-C,ARRMOP,Returns: Average Annual (MOP or w/ CDSC),20 Years,2017-12-01,1
2,1980-02-29,-0.0632,10161,,,,,Benchmark,FTSE Credit Idx,,TRNAV,Returns: Cumulative (NAV),1 Month,1980-02-01,1
3,2014-08-31,0.2593,23128,21147.0,,921913208,VGIAX,Outside Fund,,,ARRNAV,Returns: Average Annual (NAV),1 Year,2014-08-01,1
4,2011-09-30,94.73,10085,11163.0,11000070.0,030372 60 1,,AFIS Fund,VIAA,VIAA-1,RSQ,R-Squared,Morningstar R-Squared 10 Year,2011-09-01,1


In [7]:
# df_grouped = data.groupby(['postdt_new', 'v_VehicleTypeName', 'v_AbbreviatedName', 'vc_AbbreviatedName','ft_FactTypeCode','ft_Name','fst_Name'])['dummy'].sum().reset_index()
# print(df_grouped.shape)
# df_grouped.head()

(8907087, 8)


Unnamed: 0,postdt_new,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,ft_Name,fst_Name,dummy
0,1933-12-01,American Fund,ICA,ICA-529A,TRMOP,Returns: Cumulative (MOP),Lifetime,1
1,1933-12-01,American Fund,ICA,ICA-529A,TRNAV,Returns: Cumulative (NAV),Lifetime,1
2,1933-12-01,American Fund,ICA,ICA-529B,TRMOP,Returns: Cumulative (MOP),Lifetime,1
3,1933-12-01,American Fund,ICA,ICA-529B,TRNAV,Returns: Cumulative (NAV),Lifetime,1
4,1933-12-01,American Fund,ICA,ICA-529C,TRMOP,Returns: Cumulative (MOP),Lifetime,1


In [8]:
# df_grouped.rename(columns={'dummy':'FundSC_cnt'}, inplace=True)
# df_grouped.head()

Unnamed: 0,postdt_new,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,ft_Name,fst_Name,FundSC_cnt
0,1933-12-01,American Fund,ICA,ICA-529A,TRMOP,Returns: Cumulative (MOP),Lifetime,1
1,1933-12-01,American Fund,ICA,ICA-529A,TRNAV,Returns: Cumulative (NAV),Lifetime,1
2,1933-12-01,American Fund,ICA,ICA-529B,TRMOP,Returns: Cumulative (MOP),Lifetime,1
3,1933-12-01,American Fund,ICA,ICA-529B,TRNAV,Returns: Cumulative (NAV),Lifetime,1
4,1933-12-01,American Fund,ICA,ICA-529C,TRMOP,Returns: Cumulative (MOP),Lifetime,1


In [38]:
data_slim =data[['postdt_new', 'v_VehicleTypeName', 'v_AbbreviatedName', 'vc_AbbreviatedName','ft_FactTypeCode','ft_Name','fst_Name']]

In [36]:
data_slim.to_csv('1_1_MSS_FAMIS_v2.csv')

In [40]:
#data_slim_grouped =data[['postdt_new', 'v_VehicleTypeName', 'v_AbbreviatedName', 'vc_AbbreviatedName','ft_FactTypeCode','ft_Name','fst_Name','dummy']].groupby(['postdt_new', 'v_VehicleTypeName', 'v_AbbreviatedName', 'ft_FactTypeCode','ft_Name','fst_Name'])['dummy'].sum().reset_index()

In [41]:
data_slim_grouped.dummy.sum()

10595186

In [7]:
test=data[data['vc_AbbreviatedName'] == 'AMCAP-R6']

In [9]:
print(test.v_VehicleID.unique())
print(test.vc_VehicleClassID.unique())

[10001]
[ 10036.]
