In [None]:

# Author Yansheng Zhu
# Time last edited: 21:00 AUG 24, 2020
# Last edited by: Yansheng Zhu



# Description: uses monte-carlo simulation to get the 'distribution' of a long-short stragety performance at given month under given portfolio
#              The function will return the stragety performance at every int-percentile.( Eg, 99% with a monthly return of 2%, means 99% of 
#                                                                                       the random invest behave worse than 2%)
#
# Input: Portfolio Universe dictionary, month in str form (Eg '2015-01-01'), long in int form, short in in form, 
#        check_month (bool form), used for check whether the randomly selected stocks have data in the given month. If the given month is included
#                                 in the portfolio date range, then check_month should be False
#        sample_size, numbers of random trails used 
# Return: percentiles, a column of int percentile from 1% to 99%
#         percentile_return, the corresponding return at each int-percentile

import random
def month_sample_return(portfolio,month,long,short,check_month=False,sample_size=5000):
    returns=[]
    t1=time()
    for sample in range(sample_size):
        long_stocks=[]
        short_stocks=[]
        r=0
        re_sample=0
        
        long_stocks=random.sample(portfolio.keys(),long)
        
        if check_month:
            for stk in long_stocks[:]:
                if month not in list(portfolio[stk]['Date']):
                    re_sample+=1
                    long_stocks.remove(stk)
                    
                    
            while re_sample>0:
                stk=random.sample(portfolio.keys(),1)[0]
                if month in list(portfolio[stk]['Date']) and stk not in long_stocks:
                    re_sample-=1
                    long_stocks.append(stk)
        
        short_stocks=random.sample(portfolio.keys(),short)
        
        if check_month:
            for stk in short_stocks[:]:
                if month not in list(portfolio[stk]['Date']) or stk in long_stocks:
                    re_sample+=1
                    short_stocks.remove(stk)
               
            while re_sample>0:
                stk=random.sample(portfolio.keys(),1)[0]
                if month in list(portfolio[stk]['Date']) and stk not in long_stocks and stk not in short_stocks:
                    re_sample-=1
                    short_stocks.append(stk)
        
        for stk in long_stocks:
            m=portfolio[stk].set_index('Date').loc[month]
            r+=(m['Close']-m['Open'])/m['Open']
        for stk in short_stocks:
            m=portfolio[stk].set_index('Date').loc[month]
            r+=-(m['Close']-m['Open'])/m['Open']
        
        returns.append(r/(long+short))

    percentiles=[i for i in range(1,100)]
    percentile_return=[np.percentile(returns,i) for i in percentiles]
    t2=time()
    t2-t1
    return percentiles,percentile_return
    
 

In [None]:
# Description: this fuction will get the 'distribution' of a long-short stragety performances through out the given date range under given portfolio
#              by call month_sample_return() function at each month
# Input: Portfolio Universe dictionary, start_month in str form (Eg '2015-01-01'), end_month in str form (Eg '2019-01-01'),
#        long in int form, short in in form, 
#        check_month (bool form), used for check whether the randomly selected stocks have data in the given month. If the given month is included
#                                 in the portfolio date range, then check_month should be False
#        sample_size, numbers of random trails used 
#        path, for storing the result data
# Ouput: a dataframe stored in the given path, with columns representing each month and rows representing the percentile


def universe_sample_return(portfolio,start_month,end_month,long,short,check_month=False,sample_size=5000,path='data/p_value/'):
    date_format='%Y-%m-%d'
    df=pd.DataFrame(columns=pd.date_range(start_month,end_month,freq='MS').strftime(date_format))
    
    
    for i in df.columns:
        print(i)
        percentile_return=month_sample_return(portfolio,i,long,short,check_month,sample_size)[1]
        df[i]=percentile_return
    df.insert(0,'percentile',0)
    df['percentile']=[str(i)+'%' for i in range(1,100)]
    
    df.to_csv(path+'long_'+str(long)+'_short_'+str(short)+'.csv')
    return df