In [None]:
def build_machine(portfolio, features, model, date, hist_depth, train_depth, use_scaling=True, target='Close'):
    train_df = build_train_df(portfolio, features, date, hist_depth, train_depth, target=target)
    
    X = train_df.loc[:, train_df.columns != 'Target'].values
    y = train_df['Target'].values
    
    model.fit(X, y)
    
    ###training R^2
    train_score=model.score(X,y)

    
    test_df, tickers = build_test_df(portfolio, features, date, hist_depth, target=target)
    X_test = test_df.values
    
    
    
    previous_close = build_previous_close(portfolio, date)['Close 1'].values
    
    ###predicted price on traning
    predicted_train_close=model.predict(X)
    
    ###predicted price on testing
    predicted_returns = model.predict(X_test)
    
    ###trainiong return residual
    train_return_residual=abs(predicted_train_close-y)/y
    
    ### average trainiong return residual for each stock
    ave_residual=np.array([(train_residual[3*i]+train_residual[3*i+1]+train_residual[3*i+2])/3 for i in range(len(X_test))])
    
    ### actual close on test
    actual_close=build_actual_close(portfolio, date)
    
    ###testing R^2
    test_score= model.score(X_test, actual_close)
    
    ###close residual on testing
    residuals=predicted_returns-actual_close
    
    ###return residual on testing
    test_residual_returns=abs((predicted_returns - previous_close)/previous_close - (actual_close - previous_close)/previous_close)
    
    #plt.plot(ave_residual)
    #plt.plot(test_residual_returns)
    
    if use_scaling:
        predicted_returns = (predicted_returns - previous_close)/previous_close
    
    predicted_returns = list(predicted_returns)
    
    returns_dict = {}

    for i in range(len(tickers)):
        
        returns_dict[tickers[i]] = predicted_returns[i]
        
    ##### outliers' ticker for test and train, use 98% as threshold
    test_extrem=[list(portfolio.keys())[i] for i in range(len(test_residual_returns)) if test_residual_returns[i]>np.percentile(test_residual_returns,95)]
    train_extrem=[list(portfolio.keys())[i] for i in range(len(ave_residual)) if ave_residual[i]>np.percentile(ave_residual,98)]
    
    #####
    return returns_dict,train_extrem,test_extrem

In [None]:
def backtest(portfolio, features, model, hist_depth, train_depth, start_date, end_date, 
             allocation_builder=fixed_long_short, params={}, blacklist=set(), 
             target='Close'):
    
    use_scaling=True
    
    months = list(pd.date_range(start_date, end_date, freq='MS').strftime('%Y-%m-%d'))
    
    for ticker in blacklist:
        if ticker in portfolio:
            del portfolio[ticker]
            
    portfolio = deepcopy(portfolio)
    
    overall_returns = []
    specific_returns = []
    
    train_extrems=[]
    test_extrems=[]
    
    for month in months:
        start_time = time()
        
        clean_portfolio(portfolio, month)
        
        ############
        returns_dict,train_extrem,test_extrem = build_machine(portfolio, features, model, month, hist_depth, train_depth, target=target, use_scaling=use_scaling)
        train_extrems.append(train_extrem)
        test_extrems.append(test_extrem)
        ############
        
        allocation = allocation_builder(returns_dict, **params)
        
        long_returns = build_returns(portfolio, allocation['long'], month)
        short_returns = build_returns(portfolio, allocation['short'], month)
        short_returns = [short_return * -1 for short_return in short_returns]
        total_returns = long_returns + short_returns
        average_returns = sum(total_returns)/len(total_returns)
        
        
        specific_returns_dict = {'long': {}, 'short': {}}
        for i in range(len(allocation['long'])):
            specific_returns_dict['long'][allocation['long'][i]] = long_returns[i]

        for i in range(len(allocation['short'])):
            specific_returns_dict['short'][allocation['short'][i]] = short_returns[i]
            
        specific_returns.append(specific_returns_dict)
        overall_returns.append(average_returns)
        
        print(month, round(average_returns, 5), round(time() - start_time, 2))
        
    build_documentation(
        portfolio, 
        features, 
        model, 
        hist_depth, 
        train_depth, 
        start_date, 
        end_date, 
        allocation_builder=allocation_builder, 
        params=params, 
        blacklist=blacklist, 
        target=target, 
        returns=overall_returns, 
        specific_ret=specific_returns
    )
    ###########
    return overall_returns, specific_returns, train_extrems, test_extrems