In [1]:
import os
import pyodbc
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
cnxn = pyodbc.connect('Driver={SQL Server Native Client 11.0};\
                       Server=lasr-sqldb-prd-im,17001;\
                       Database=IM_S_FAMIS_S;\
                       Trusted_Connection=yes;')

In [3]:
sql_query = """
select
FA.AsOfDate,
FA.ValueDecimal,
V.VehicleID as v_VehicleID,
VC.VehicleClassID as vc_VehicleClassID,
--V.AccountNumber as v_AccountNumber,
--VC.CUSIP as vc_CUSIP,
--VC.QuotronSymbol as vc_QuotronSymbol,
V.VehicleTypeName as v_VehicleTypeName,
V.AbbreviatedName as v_AbbreviatedName,
VC.AbbreviatedName as vc_AbbreviatedName,
FT.FactTypeCode as ft_FactTypeCode,
--FT.Name as ft_Name,
FST.Name as fst_Name
from MSS_S_FAMIS_S.dbo.FAMIS_MART_FactExt_CURRENT as FA
  inner join
     MSS_S_FAMIS_S.dbo.FAMIS_MART_FactTypeExt_CURRENT as FT
  on FA.FactTypeID=FT.FactTypeID
  left join
	MSS_S_FAMIS_S.dbo.FAMIS_MART_FactSubTypeExt_Current as FST
  on FA.FactSubTypeID=FST.FactSubTypeID
  inner join
     MSS_S_FAMIS_S.dbo.FAMIS_MART_VehicleExt_CURRENT as V
  on FA.VehicleID = V.VehicleID
  LEFT join 
     MSS_S_FAMIS_S.dbo.FAMIS_MART_VehicleClassExt_CURRENT as VC
  on FA.VehicleClassID = VC.VehicleClassID
where FT.FactTypeCode in ('ARRMOP','ARRNAV','TRMOP','TRNAV')
and FA.ValueDecimal is not NULL
"""

# Save the data 
df_famis = pd.read_sql(sql_query, cnxn)

# Close the connection with LASR
#THIS IS IMPORTANT ALWAYS DO THIS, ESPECIALLY IF CONNECTING TO PROD
cnxn.close()

In [4]:
print(df_famis.shape)
df_famis.head()

(11632094, 9)


Unnamed: 0,AsOfDate,ValueDecimal,v_VehicleID,vc_VehicleClassID,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,fst_Name
0,2016-09-30,0.0505,23982,28271.0,Outside Fund,,,ARRNAV,5 Years
1,2014-06-30,0.0676,22759,16949.0,Outside Fund,,,ARRNAV,Lifetime
2,2016-10-31,2.278697,10028,10503.0,American Fund,AHIM,AHIM-A,TRMOP,Lifetime
3,2016-10-31,0.0507,24721,21871.0,Outside Fund,,,ARRNAV,Lifetime
4,2016-09-30,0.099,22876,21129.0,Outside Fund,,,ARRNAV,Lifetime


In [5]:
df_1stsold = pd.read_csv('FAMIS_MART_VehicleClassDateExt.csv')
print(df_1stsold.shape)

#Keep only the Sold Date records
df_1stsold = df_1stsold[df_1stsold['DateType'] == 'Initial Sold Date']
df_1stsold.drop(['DateType'], axis=1, inplace=True)

df_1stsold.rename(columns={'Date': 'FirstSoldDt'}, inplace=True)

print(df_1stsold.shape)
df_1stsold.head()

(37679, 4)
(1338, 3)


Unnamed: 0,VehicleClassDateID,VehicleClassID,FirstSoldDt
0,10001,10000,07/31/1952
1,10002,10001,03/15/2000
2,10003,10002,03/15/2001
3,10004,10003,03/15/2001
4,10005,10004,08/05/2008


In [7]:
format_str = '%m/%d/%Y' # The format

df_famis['AsOfDate'] = df_famis['AsOfDate'].apply(lambda dt: dt.replace(day=1))
df_1stsold['FirstSoldDt_dt'] = df_1stsold['FirstSoldDt'].apply(lambda dt: datetime.strptime(dt, format_str))
#df_1stsold['FirstSoldDt'] = df_1stsold['FirstSoldDt'].apply(lambda dt: dt.replace(day=1))

TypeError: replace() takes no keyword arguments

In [47]:
#join first sold date to extract
df_combo = df_famis.merge(df_1stsold, left_on=['vc_VehicleClassID'], right_on= ['VehicleClassID'], how='left')
print(df_combo.shape)
df_combo.head()

(11632094, 13)


Unnamed: 0,AsOfDate,ValueDecimal,v_VehicleID,vc_VehicleClassID,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,fst_Name,VehicleClassDateID,VehicleClassID,FirstSoldDt,FirstSoldDt_dt
0,2016-09-01,0.0505,23982,28271,Outside Fund,,,ARRNAV,5 Years,,,,NaT
1,2014-06-01,0.0676,22759,16949,Outside Fund,,,ARRNAV,Lifetime,,,,NaT
2,2016-10-01,2.278697,10028,10503,American Fund,AHIM,AHIM-A,TRMOP,Lifetime,10502.0,10503.0,09/26/1994,1994-09-26
3,2016-10-01,0.0507,24721,21871,Outside Fund,,,ARRNAV,Lifetime,,,,NaT
4,2016-09-01,0.099,22876,21129,Outside Fund,,,ARRNAV,Lifetime,,,,NaT


In [48]:
df_combo.drop(['VehicleClassID','v_VehicleID','vc_VehicleClassID', 'VehicleClassDateID','FirstSoldDt'], axis=1, inplace=True)
df_combo.head()

Unnamed: 0,AsOfDate,ValueDecimal,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,fst_Name,FirstSoldDt_dt
0,2016-09-01,0.0505,Outside Fund,,,ARRNAV,5 Years,NaT
1,2014-06-01,0.0676,Outside Fund,,,ARRNAV,Lifetime,NaT
2,2016-10-01,2.278697,American Fund,AHIM,AHIM-A,TRMOP,Lifetime,1994-09-26
3,2016-10-01,0.0507,Outside Fund,,,ARRNAV,Lifetime,NaT
4,2016-09-01,0.099,Outside Fund,,,ARRNAV,Lifetime,NaT


In [58]:
df_combo['IsHypo'] = df_combo.apply(lambda row: row.AsOfDate < datetime.date(row.FirstSoldDt_dt), axis=1)
#df_combo['IsHypo'] = (df_combo.AsOfDate < df_combo.FirstSoldDt_dt)
df_combo.head()

Unnamed: 0,AsOfDate,ValueDecimal,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,fst_Name,FirstSoldDt_dt,IsHypo
0,2016-09-01,0.0505,Outside Fund,,,ARRNAV,5 Years,NaT,False
1,2014-06-01,0.0676,Outside Fund,,,ARRNAV,Lifetime,NaT,False
2,2016-10-01,2.278697,American Fund,AHIM,AHIM-A,TRMOP,Lifetime,1994-09-26,False
3,2016-10-01,0.0507,Outside Fund,,,ARRNAV,Lifetime,NaT,False
4,2016-09-01,0.099,Outside Fund,,,ARRNAV,Lifetime,NaT,False


In [59]:
#check the amcap example is working
test=df_combo[(df_combo['vc_AbbreviatedName'] == 'AMCAP-R6') & (df_combo['AsOfDate'] <= datetime.strptime('01012010', "%d%m%Y").date())]
test

Unnamed: 0,AsOfDate,ValueDecimal,v_VehicleTypeName,v_AbbreviatedName,vc_AbbreviatedName,ft_FactTypeCode,fst_Name,FirstSoldDt_dt,IsHypo
6728251,2009-05-01,-0.0614,American Fund,AMCAP,AMCAP-R6,ARRNAV,3 Years,2009-05-01,False
6728265,2009-05-01,0.0228,American Fund,AMCAP,AMCAP-R6,ARRNAV,10 Years,2009-05-01,False
6728279,2009-06-01,-0.0624,American Fund,AMCAP,AMCAP-R6,ARRNAV,3 Years,2009-05-01,False
6728293,2009-06-01,0.0163,American Fund,AMCAP,AMCAP-R6,ARRNAV,10 Years,2009-05-01,False
6728307,2009-07-01,-0.0313,American Fund,AMCAP,AMCAP-R6,ARRNAV,3 Years,2009-05-01,False
6728321,2009-07-01,0.0279,American Fund,AMCAP,AMCAP-R6,ARRNAV,10 Years,2009-05-01,False
6728335,2009-08-01,-0.0274,American Fund,AMCAP,AMCAP-R6,ARRNAV,3 Years,2009-05-01,False
6728349,2009-08-01,0.0330,American Fund,AMCAP,AMCAP-R6,ARRNAV,10 Years,2009-05-01,False
6728363,2009-09-01,-0.0224,American Fund,AMCAP,AMCAP-R6,ARRNAV,3 Years,2009-05-01,False
6728377,2009-09-01,0.0372,American Fund,AMCAP,AMCAP-R6,ARRNAV,10 Years,2009-05-01,False


In [60]:
#output the summarized version
df_combo.to_csv('FAMIS_Hypo_20181002.csv')