### Import Packages

In [1]:
import os
import pyodbc
import numpy as np
import pandas as pd
from datetime import datetime

### Import Data

#### `IM_I_RESULTSRELEASED_S.dbo.PortfolioAbsReturnReleased`

In [2]:
cnxn = pyodbc.connect('Driver={SQL Server Native Client 11.0};\
                       Server=lasr-sqldb-prd-im,17001;\
                       Database=IM_I_RESULTSRELEASED_S;\
                       Trusted_Connection=yes;')

In [3]:
sql_query = """
select
pfm.portfoliouid,
XW.aggrid,
pfm.postdt,
pfm.PeriodTypeCode as periodtype,
cg.InvestmentPortfolioTypeCode,
ia.InvestmentAccountUID,
ia.InvestmentAccountTypeCode,
ia.SeparateAccountTypeCode,
ia.FundAccountTypeCode,
ia.FundAccountSubTypeCode,
ia.FundAccountGroupTypeCode
from
(
select
    A.PortfolioUID as portfoliouid, 
	B.PeriodTypeCode,
    A.PortfolioGrossCumulativeReturn as rtnpct,
    A.CalcPeriodEndDate as postdt
from 
    IM_I_RESULTSRELEASED_S.dbo.PortfolioAbsReturnReleased as A
  left join
    IM_I_RESULTSRELEASED_S.dbo.periodtype as B
  on A.DWPeriodTypeID = B.DWPeriodTypeID
where
    A.CurrencyUID = 1 and A.feetypeuid in (0, -9999999999) and A.PortfolioGrossCumulativeReturn is not NULL
) as pfm
left join
	(
	select
	PORTUID as portfoliouid,
	INVPORTID as aggrid
	from IM_I_IDWBRRADM_S.dbo.RA_PORTFOLIO
	where PORTUID is not NULL
	) as XW
on pfm.PORTFOLIOUID = XW.PORTFOLIOUID
-- the labeling stuff here --
  left join 
     IM_I_PORTFOLIOPOS_S.dbo.cgportfolio as cg
  on XW.PortfolioUID=cg.PortfolioUID and pfm.postdt between  cg.dweffectivefromdate and cg.dweffectivetodate

  left join
     IM_U_RESULTS_S.KPMG.KMPYAD_AA as AA
  on cg.portfoliouid =  AA.AAPortfolioUID and pfm.postdt between  AA.dweffectivefromdate and AA.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'AA'
  left join
     IM_U_RESULTS_S.KPMG.KMPYAD_AR as AR
  on cg.portfoliouid =  AR.ARPortfolioUID and pfm.postdt between  AR.dweffectivefromdate and AR.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'AR'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_AT as AT
  on cg.portfoliouid =  AT.ATPortfolioUID and pfm.postdt between  AT.dweffectivefromdate and AT.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'AT'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_RP as RP
  on cg.portfoliouid =  RP.RPPortfolioUID and pfm.postdt between  RP.dweffectivefromdate and RP.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'RP'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_AM as AM
  on cg.portfoliouid =  AM.AMPortfolioUID and pfm.postdt between  AM.dweffectivefromdate and AM.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'AM'
  --the extra non-jasper ones
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_GA as GA
  on cg.portfoliouid =  GA.GAPortfolioUID and pfm.postdt between  GA.dweffectivefromdate and GA.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'GA'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_GM as GM
  on cg.portfoliouid =  GM.GMPortfolioUID and pfm.postdt between  GM.dweffectivefromdate and GM.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'GM'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_GP as GP
  on cg.portfoliouid =  GP.GPPortfolioUID and pfm.postdt between  GP.dweffectivefromdate and GP.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'GP'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_ID as ID
  on cg.portfoliouid =  ID.IDPortfolioUID and pfm.postdt between  ID.dweffectivefromdate and ID.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'ID'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_MR as MR
  on cg.portfoliouid =  MR.MRPortfolioUID and pfm.postdt between  MR.dweffectivefromdate and MR.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'MR'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_SG as SG
  on cg.portfoliouid =  SG.SGPortfolioUID and pfm.postdt between  SG.dweffectivefromdate and SG.dweffectivetodate and cg.InvestmentPortfolioTypeCode = 'SG'
  -----acct_mgr table-----
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_GR as GR
  on XW.aggrid =  GR.RPPCXGRPAGGRID and pfm.postdt between  GR.STARTDT and GR.ENDDT and cg.InvestmentPortfolioTypeCode = 'GR'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_MG as MG
  on XW.aggrid =  MG.MGRAGGRID and pfm.postdt between  MG.STARTDT and MG.ENDDT and cg.InvestmentPortfolioTypeCode = 'MG'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_RM as RM
  on XW.aggrid =  RM.RPPCXMGRAGGRID and pfm.postdt between  RM.STARTDT and RM.ENDDT and cg.InvestmentPortfolioTypeCode = 'RM'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_SM as SM
  on XW.aggrid =  SM.SPRGRPMGRAGGRID and pfm.postdt between  SM.STARTDT and SM.ENDDT and cg.InvestmentPortfolioTypeCode = 'SM'
  left join
       IM_U_RESULTS_S.KPMG.KMPYAD_SR as SR
  on XW.aggrid =  SR.RPPCXSPRGRPAGGRID and pfm.postdt between  SR.STARTDT and SR.ENDDT and cg.InvestmentPortfolioTypeCode = 'SR'
  -- finally join to investment account
  left join
     IM_I_ACCTPOSTRANS_S.dbo.InvestmentAccount as ia
	       ---case it out---
  on ia.InvestmentAccountUID = case when cg.InvestmentPortfolioTypeCode='AA' THEN AA.InvestmentAccountUID
                                    when cg.InvestmentPortfolioTypeCode='AR' THEN AR.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode='AT' THEN AT.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode='RP' THEN RP.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode='AM' THEN AM.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode='GA' THEN GA.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode='GM' THEN GM.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode='GP' THEN GP.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode='ID' THEN ID.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode='MR' THEN MR.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode='SG' THEN SG.InvestmentAccountUID
									----acct_mgr ones here----
									when cg.InvestmentPortfolioTypeCode  = 'GR' THEN GR.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode  = 'MG' THEN MG.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode  = 'RM' THEN RM.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode  = 'SM' THEN SM.InvestmentAccountUID
									when cg.InvestmentPortfolioTypeCode  = 'SR' THEN SR.InvestmentAccountUID
								end
  and pfm.postdt between ia.dweffectivefromdate and ia.dweffectivetodate
"""

# Save the data 
data = pd.read_sql(sql_query, cnxn)

# Close the connection with LASR
cnxn.close()


In [4]:
print(data.shape)
data.head()

(376315, 11)


Unnamed: 0,portfoliouid,aggrid,postdt,periodtype,InvestmentPortfolioTypeCode,InvestmentAccountUID,InvestmentAccountTypeCode,SeparateAccountTypeCode,FundAccountTypeCode,FundAccountSubTypeCode,FundAccountGroupTypeCode
0,13207,10308470.0,2017-09-30,1MO,FL,,,,,,
1,274745,14333066.0,2017-11-30,YTD,CP,,,,,,
2,15723,12308472.0,2011-09-30,3MO,FL,,,,,,
3,209806,171315839.0,2017-10-31,LFT,CP,,,,,,
4,193380,157328121.0,2009-11-30,LFT,CP,,,,,,


### Data Preprocessing

In [7]:
data['postdt_new'] = data['postdt'].apply(lambda dt: dt.replace(day=1))
data.drop(['postdt'], axis=1, inplace=True)
data.head()

Unnamed: 0,portfoliouid,aggrid,periodtype,InvestmentPortfolioTypeCode,InvestmentAccountUID,InvestmentAccountTypeCode,SeparateAccountTypeCode,FundAccountTypeCode,FundAccountSubTypeCode,FundAccountGroupTypeCode,postdt_new
0,13207,10308470.0,1MO,FL,,,,,,,2017-09-01
1,274745,14333066.0,YTD,CP,,,,,,,2017-11-01
2,15723,12308472.0,3MO,FL,,,,,,,2011-09-01
3,209806,171315839.0,LFT,CP,,,,,,,2017-10-01
4,193380,157328121.0,LFT,CP,,,,,,,2009-11-01


In [8]:
#check no duplicate records -- there are none
print(data.drop_duplicates(subset=['portfoliouid', 'postdt_new', 'periodtype'], keep='first', inplace=False).count())

portfoliouid                   376315
aggrid                         374313
periodtype                     376315
InvestmentPortfolioTypeCode    374313
InvestmentAccountUID              669
InvestmentAccountTypeCode         669
SeparateAccountTypeCode             0
FundAccountTypeCode               138
FundAccountSubTypeCode              0
FundAccountGroupTypeCode            0
postdt_new                     376315
dtype: int64


In [9]:
data.rename(columns={'postdt_new': 'postdt', 'periodtype': 'rtntyp', 'InvestmentPortfolioTypeCode': 'Portype', 
                    'InvestmentAccountTypeCode': 'lab1', 'SeparateAccountTypeCode': 'lab2', 'FundAccountTypeCode': 'lab3',
                    'FundAccountSubTypeCode': 'lab4', 'FundAccountGroupTypeCode': 'lab5'}, inplace=True)
data.drop(['aggrid','InvestmentAccountUID'], axis=1, inplace=True)
print(data.shape)
data.head()

(376315, 9)


Unnamed: 0,portfoliouid,rtntyp,Portype,lab1,lab2,lab3,lab4,lab5,postdt
0,13207,1MO,FL,,,,,,2017-09-01
1,274745,YTD,CP,,,,,,2017-11-01
2,15723,3MO,FL,,,,,,2011-09-01
3,209806,LFT,CP,,,,,,2017-10-01
4,193380,LFT,CP,,,,,,2009-11-01


In [10]:
df_final_filled = data.fillna('N/A')

In [11]:
df_final_filled.head()

Unnamed: 0,portfoliouid,rtntyp,Portype,lab1,lab2,lab3,lab4,lab5,postdt
0,13207,1MO,FL,,,,,,2017-09-01
1,274745,YTD,CP,,,,,,2017-11-01
2,15723,3MO,FL,,,,,,2011-09-01
3,209806,LFT,CP,,,,,,2017-10-01
4,193380,LFT,CP,,,,,,2009-11-01


In [12]:
df_summary_out = df_final_filled.groupby(['postdt','rtntyp', 'Portype', 'lab1', 'lab2', 'lab3', 'lab4', 'lab5'])['portfoliouid'].count().reset_index()
df_summary_out.rename(columns={'portfoliouid': 'portuid_cnt'}, inplace=True)
print(df_summary_out.shape)
df_summary_out.head()

(18033, 9)


Unnamed: 0,postdt,rtntyp,Portype,lab1,lab2,lab3,lab4,lab5,portuid_cnt
0,1934-01-01,1MO,CP,,,,,,2
1,1934-01-01,LFT,CP,,,,,,2
2,1934-01-01,QTD,CP,,,,,,2
3,1934-01-01,YTD,CP,,,,,,2
4,1934-02-01,1MO,CP,,,,,,2


In [13]:
df_summary_out.to_csv('AbsReturnReleased_w_Labels_Summarized_20181008.csv')

In [15]:
df_summary_out.portuid_cnt.sum()

376315