### Import Packages

In [1]:
import os
import pyodbc
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline

### Import Data

#### `IM_I_RESULTSRELEASED_S.dbo.PortfolioAbsReturnReleased`

In [6]:
cnxn = pyodbc.connect('Driver={SQL Server Native Client 11.0};\
                       Server=lasr-sqldb-prd-im,17001;\
                       Database=IM_I_RESULTSRELEASED_S;\
                       Trusted_Connection=yes;')

In [7]:
sql_query = """
select
    A.PortfolioUID as PID, 
	B.PeriodTypeCode as periodtype,
    A.PortfolioGrossCumulativeReturn as rtnpct,
    A.CalcPeriodEndDate as POSTDT
from 
    IM_I_RESULTSRELEASED_S.dbo.PortfolioAbsReturnReleased as A
  left join
    IM_I_RESULTSRELEASED_S.dbo.periodtype as B
  on A.DWPeriodTypeID = B.DWPeriodTypeID
where
    A.CurrencyUID = 1 and A.feetypeuid in (0, -9999999999) and A.PortfolioGrossCumulativeReturn is not NULL    
"""

# Save the data 
data = pd.read_sql(sql_query, cnxn)

# Close the connection with LASR
cnxn.close()


In [8]:
print(data.shape)
data.head()

(375666, 4)


Unnamed: 0,PID,periodtype,rtnpct,POSTDT
0,301224,3MO,0.028709,2017-07-31
1,301220,6MO,0.023262,2017-06-30
2,301226,3MO,0.021212,2017-06-30
3,301223,QTD,0.0113,2017-07-31
4,298872,YTD,0.158234,2017-10-31


### Data Preprocessing

In [10]:
data['postdt_new'] = data['POSTDT'].apply(lambda dt: dt.replace(day=1))
data.head()

Unnamed: 0,PID,periodtype,rtnpct,POSTDT,postdt_new
0,301224,3MO,0.028709,2017-07-31,2017-07-01
1,301220,6MO,0.023262,2017-06-30,2017-06-01
2,301226,3MO,0.021212,2017-06-30,2017-06-01
3,301223,QTD,0.0113,2017-07-31,2017-07-01
4,298872,YTD,0.158234,2017-10-31,2017-10-01


In [13]:
data.shape

(375666, 5)

In [30]:
#check no duplicate records
data.drop_duplicates(subset=['PID', 'periodtype', 'postdt_new'], keep='first', inplace=False).count()

PID           375666
periodtype    375666
rtnpct        375666
POSTDT        375666
postdt_new    375666
dtype: int64

In [33]:
new_df = data.groupby(['postdt_new', 'periodtype'])['PID'].count().reset_index()
print(new_df.shape)
new_df.rename(columns={'PID':'count_PID'}, inplace=True)
new_df.head()

(14014, 3)


Unnamed: 0,postdt_new,periodtype,count_PID
0,1934-01-01,1MO,2
1,1934-01-01,LFT,2
2,1934-01-01,QTD,2
3,1934-01-01,YTD,2
4,1934-02-01,1MO,2


In [34]:
new_df.to_csv('summary_PortfolioAbsReturnReleased.csv')