In [1]:
import pandas as pd
import numpy as np

import re

import matplotlib
import matplotlib.pyplot as plt

from datetime import datetime
from difflib import get_close_matches

from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

from sklearn.naive_bayes import MultinomialNB

from sklearn.model_selection import GridSearchCV


from sklearn.metrics import accuracy_score

from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn import svm

from sklearn.metrics import f1_score

from xgboost import XGBClassifier, XGBRegressor, plot_importance

from sklearn.utils import shuffle

In [2]:
class DecisionMaking:
    """
    A class that takes pre-trained models and product the next predicted values
    
    list_to_add_unit_hour - list of rolling averages to add in the unit of hour, e.g.
                                list_to_add_unit_hour = [1, 2, 4] , will produce RAs of
                                1H, 2H, 4H (window calculated by dividing by the index_unit)

    index_unit - unit of index (whatever was used in the grouper), e.g. '15min' (takes minutes)


    """
    
    def __init__(self, columns_to_norm, base, list_to_add_unit_hour, index_unit, model_avg, model_high, model_low):
        
        self.columns_to_norm = columns_to_norm
        self.base = base
        self.list_to_add_unit_hour = list_to_add_unit_hour
        self.index_unit = index_unit
        
        self.model_avg  = model_avg
        self.model_high = model_high
        self.model_low  = model_low
        
        assert base not in columns_to_norm, "base should not be in the list of columns, as it will be dividing by 0"
    
    def get_raw_data(self, df):
        """
        raw 1 minute df
        """
        assert df.index.dtype == '<M8[ns]', "Index should be time"
        self.raw_1min_df = df.sort_index(ascending=True).copy(deep=True)
        
        return None
    
    def window_groupby(self, freq, agg_grouping, return_results=False):
        
        """
        return_results to see the table returned, otherwise, save at: self.df_windowed
        """
        
        df=self.raw_1min_df.copy(deep=True)
        grouper_key = self.raw_1min_df.index.name
        assert grouper_key != None, "no grouper key"
        
        df_temp = df.reset_index().groupby(pd.Grouper(key=grouper_key, freq=freq)).agg(agg_grouping)
        if 'open_time' in agg_grouping.keys():
            df_temp.columns = [open_time_refine(col) for col in df_temp.columns]

        df_temp['avg'] = df_temp['quote_asset_volume']/df_temp['volume']
        df_temp['avg'] = df_temp['avg'].fillna(method='ffill')  ## when volume is 0
        
        if return_results:
            return df_temp
            
        else:
            self.df_windowed = df_temp.copy(deep=True)
            return None


    
    def add_rolling_averges(self, return_results=False):
        """
        INPUT:
        return_results - boolean, defult False
        
        OUTPUT:
        (if return_results = True), dataframe with added rolling averages
        (else, save at self.df_wind_ra)
        """

        df_temp = self.df_windowed.copy(deep=True)

        rows = float(self.index_unit.replace('min',''))/60

        for ra in self.list_to_add_unit_hour:

            df_temp['ra_{x}H'.format(x=ra)] = df_temp['avg'].rolling(window=int(ra/rows)).mean()

        if return_results:
            print(".dropna(axis=0) not applied here")
            return df_temp
            
        else:
            self.df_wind_ra = df_temp.dropna(axis=0).copy(deep=True)
            return None


    
    def normalising_data(self, return_results=False):
        """
        INPUT:
        return_results - boolean, defult False

        OUTPUT:
        (if return_results = True), normalised dataframe
        (else, save at self.df_normalised_input)
        """
        
        df_temp = self.df_wind_ra.copy(deep=True)

        for col in self.columns_to_norm:
            df_temp[col] = 100 * ((df_temp[col].astype(np.float)/df_temp[self.base]) - 1)

        if return_results:
            return df_temp
            
        else:
            self.df_normalised_input = df_temp.copy(deep=True)
            return None
        
    def _return_predicted_vales(self, pred, base_value):
        final_value = (1.0 + pred/100) * base_value
        return final_value
    
    def get_predictions(self):
        """
        INPUTS:
        ml_low = machine learning model for predicting low
        ml_avg = machine learning model for predicting average
        ml_high = machine learning model for predicting high
        
        Return
        predicted: average, low, and high from the models
        """
        y_pred_avg_decision = self.model_avg.predict(self.df_normalised_input[self.columns_to_norm].values)
        y_pred_low_decision = self.model_low.predict(self.df_normalised_input[self.columns_to_norm].values)
        y_pred_high_decision = self.model_high.predict(self.df_normalised_input[self.columns_to_norm].values)

        y_pred_unscaled_avg = self._return_predicted_vales(y_pred_avg_decision[0], self.df_normalised_input[self.base].values[0])
        y_pred_unscaled_low = self._return_predicted_vales(y_pred_low_decision[0], self.df_normalised_input[self.base].values[0])
        y_pred_unscaled_high = self._return_predicted_vales(y_pred_high_decision[0], self.df_normalised_input[self.base].values[0])
            
        return {'pred_avg':y_pred_unscaled_avg,
                'pred_low':y_pred_unscaled_low,
                'pred_high':y_pred_unscaled_high}

In [3]:
class MrMarket:
    def __init__(self, usd, coin, last_event, next_event, taker_fee):
        
        self.usd_wallet = usd
        self.coin_wallet = coin
        self._LastEvent_ = last_event
        self._NextEvent_ = next_event
        self.buy_position = None
        self.sell_position = None
        self.current_low = None
        self.current_high = None
        
        self.taker_fee = taker_fee
        
        self.history_log = [{"time_of_event" : datetime.now(),  ## if back testing, now() shoud be the max time 
                              "event_name"   : self._LastEvent_,
                              "usd_wallet"   : self.usd_wallet, 
                              "coin_wallet"  : self.coin_wallet,
                              "next_action"  : self._NextEvent_,
                              "pred_low"     : self.buy_position,
                              "pred_high"    : self.sell_position,
                              "true_low"     : self.current_low,
                              "true_high"    : self.current_high
                            }]
        
        print("initialisation")
        display(pd.DataFrame(self.history_log))
        
        return None 
    
    
    
    def _event_log(self, event):
        
        self.history_log.append({"time_of_event" : self.time,  ## if back testing, now() shoud be the max time 
                                  "event_name"   :  event['event_name'],
                                  "usd_wallet"    : self.usd_wallet, 
                                  "coin_wallet"   : self.coin_wallet,
                                  "next_action"   : self._NextEvent_,
                                  "pred_low"     : self.buy_position,
                                  "pred_high"    : self.sell_position,
                                  "true_low"     : self.current_low,
                                  "true_high"    : self.current_high,
                            "position_opened_at" : self.position_opened_at
                                        })

        return "Appened to history_log"
    
    def _CheckOrder_(self):
        return self._WaitingToFillOrder_, self._NextEvent_
    
    def data(self, row):
        self.current_low = row['low']
        self.current_high = row['high']
        self.current_open = row['open']
        self.current_close = row['close']
        self.time = row.name ## of that minute
        
    
    def open_buy_position(self, buy_price_set, sell_price_set, data_at_time):
        self.buy_position = buy_price_set
        self.sell_position = sell_price_set
        
        self._LastEvent_ = "open_buy_position"
        self._NextEvent_ = "close_buy_position"
        
        self.position_opened_at = {'open_buy': data_at_time.name, 'open_sell': None}
            
        self.history_log.append({"time_of_event" : data_at_time.name,  ## if back testing, now() shoud be the max time 
                                  "event_name"    : self._LastEvent_,
                                  "usd_wallet"    : self.usd_wallet, 
                                  "coin_wallet"   : self.coin_wallet,
                                  "next_action"   : self._NextEvent_,
                                  "pred_low"     : buy_price_set,
                                  "pred_high"    : sell_price_set,
                                  "true_low"     : data_at_time['low'],
                                  "true_high"    : data_at_time['high'],
                                  "position_opened_at" : self.position_opened_at
                })
        

        self._WaitingToFillOrder_ = True
    
        return data_at_time.name
    
    def check_status(self, input_event):
        assert self._NextEvent_ == input_event, "self event: {_NextEvent_} and input: {input_event} should be the same"
        
        if self._NextEvent_ == 'close_buy_position':
            print("try to close buy position!!", self.time)
            if self.current_low < self.buy_position:
                
                "Buy executed, sell position opened"
                self.coin_wallet = self.coin_wallet + float(self.usd_wallet / self.buy_position) * (100 - self.taker_fee)/100
                self.usd_wallet = self.usd_wallet - self.usd_wallet
                self._NextEvent_ = "close_sell_position"
                
                self.position_opened_at = {'open_buy': None, 'open_sell': self.time}
                
                self._event_log({"event_name": "close_buy_position, (auto: opened sell position)"})
                
                return "Executed Buy"
            
            else:
                
                if (self.time - self.position_opened_at['open_buy']).total_seconds() >= 30*60:
                    assert pd.notnull(self.position_opened_at['open_buy']), "should have a timestampe"
                    
                    "Cancel trade, price expired"
                    
                    verdict = "Buy order expired, Reset"
                    

                    self.position_opened_at = None
                    self._WaitingToFillOrder_ = False
                    
                    self._NextEvent_ = "open_buy_position"
                    
                    self._event_log({"event_name": "expired buy order"})
                    
                    print('heere>>>>>>>' ,verdict, self._NextEvent_, self.time)
                    
                    return verdict
                
                else:
                    verdict = "Buy condition not met"
                    self._event_log({"event_name": verdict})

                    return verdict
            
            
        elif self._NextEvent_ == 'close_sell_position':
            if self.current_high > self.sell_position:
                
                "Sell executed, waiting to open the next buy order"
                self.usd_wallet = self.usd_wallet + self.sell_position * self.coin_wallet * (100 - self.taker_fee)/100
                self.coin_wallet = self.coin_wallet - self.coin_wallet
                self._NextEvent_ = "open_buy_position"

                
                self._event_log({"event_name": "close_sell_position"})
                
                self._WaitingToFillOrder_ = False
                
                return "Executed Sell"
            
            else:
                
                if (self.time - self.position_opened_at['open_sell']).total_seconds() >= 30*60:
                    assert pd.notnull(self.position_opened_at['open_sell']), "should have a timestampe"
                    print("sell order taking too lond?",  self.time)
#                     !! ?? sell at a loss ??

#                 else:
                verdict = "Sell condition not met"
                self._event_log({"event_name": verdict})

                return verdict

### Data

In [6]:
def window_groupby(df, grouper_key, freq, agg_grouping):
    
    df_temp = df.reset_index().groupby(pd.Grouper(key=grouper_key, freq=freq)).agg(agg_grouping)
    
    if 'open_time' in agg_grouping.keys():
        df_temp.columns = [open_time_refine(col) for col in df_temp.columns]
    
    df_temp['avg'] = df_temp['quote_asset_volume']/df_temp['volume']
    
    return df_temp

In [7]:
def add_rolling_averges(df, list_to_add_unit_hour, index_unit):
    """
    INPUT:
    df - dataframe (assume the datafram is already sorted)
    list_to_add_unit_hour - list of rolling averages to add in the unit of hour, e.g.
                            list_to_add_unit_hour = [1, 2, 4] , will produce R.As of
                            1H, 2H, 4H (window calculated by dividing by the index_unit)
                            
    index_unit - unit of index (whatever was used in the grouper), e.g. '15min' (takes minutes)
    
    OUTPUT:
    dataframe with added rolling averages
    """
    df_temp = df
    
    rows = float(index_unit.replace('min',''))/60
    
    for ra in list_to_add_unit_hour:
        
        df_temp['ra_{x}H'.format(x=ra)] = df_temp['avg'].rolling(window=int(ra/rows)).mean()
    
    return df_temp

In [8]:
def run_decision(decision_class, df_past_4hours):
    decision_class.get_raw_data(df_past_4hours)
    decision_class.window_groupby(freq='15min', 
                      agg_grouping={'open': 'first',
                                     'high': 'max',
                                     'low': 'min',
                                     'close': 'last',
                                     'volume': 'sum',
                                     'quote_asset_volume': 'sum',
                                     'open_time': ['min', 'max', 'count']}
                     )
    decision_class.add_rolling_averges()
    decision_class.normalising_data()
    
#     display(decision_class.df_normalised_input[['open_time_max']])
    return (decision_class.get_predictions() )

In [9]:
df_ETHUSD_1min = pd.read_parquet('../ETH-USDT.parquet')

In [10]:
df_ETHUSD_1min = df_ETHUSD_1min[df_ETHUSD_1min['volume'] != 0]

In [11]:
df_ETHUSD_1min.loc['2017-09-12 06:10:00']

Unnamed: 0_level_0,open,high,low,close,volume,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-09-12 06:10:00,309.779999,309.779999,309.779999,309.779999,0.40098,124.215584,4,0.0,0.0


In [12]:
# df_ETHUSD_1min['volume'] = df_ETHUSD_1min['volume'] + 1

In [13]:
# df_ETHUSD_1min['volume'] = np.clip(df_ETHUSD_1min['volume'], 1, df_ETHUSD_1min['volume'].max()) ??

In [14]:
df_ETHUSD_1min = df_ETHUSD_1min.sort_index(ascending=True)

In [15]:
# df_ETHUSD_1min['avg'] = df_ETHUSD_1min['quote_asset_volume']/df_ETHUSD_1min['volume']

low and high are different for different rolling averages, therefore, there should be 2 segment-paris (or 4 segments for):

> bull low v ra <br/> bull high v ra <br/> bear low v ra <br/>  bear high v ra
    
    
e.g. in a bull market, low ticker vs rolling average of X (this X needs to be found)
(expect abs(low-ra | bull) < abs(low-ra | bear)  :: absolute value of low minus ra give bull => abs(low-ra | bull)

In [16]:
def open_time_refine(txt):
    if txt[0] != 'open_time':
        return txt[0]
    
    else:
        return txt[0] + '_' +txt[1]

In [17]:
agg_grouping = {'open':'first',
                'high':'max',
                'low':'min',
                'close':'last',
                'volume':'sum',
                'quote_asset_volume':'sum',
                'open_time':['min','max','count']
               }


In [18]:
df_15min = window_groupby(df=df_ETHUSD_1min,
                       grouper_key='open_time',
                       freq='15min',
                       agg_grouping=agg_grouping
                         )

In [19]:
df_prepare = add_rolling_averges(df_15min, [1,2,4], '15min').dropna(axis=0).copy(deep=True)

In [20]:
df_prepare['y_avg'] = df_prepare['avg'].shift(-1)

In [21]:
df_prepare['y_low'] = 100 * ((df_prepare['low'].shift(-1) - df_prepare['y_avg'])/df_prepare['y_avg'])
df_prepare['y_high'] = 100 * ((df_prepare['high'].shift(-1) - df_prepare['y_avg'])/df_prepare['y_avg'])
df_prepare['y_avg'] = 100 * ((df_prepare['avg'].shift(-1) - df_prepare['avg'])/df_prepare['avg'])

2 approaches to try here:
> 1. engineer some features and use typical ML mode
> 2. remove most of the features (ra etc) and input into a neural network

predict low and high:
> if high reached first, cancel the position (since high --> low situation) <br/>
> want low to arrive first (since low --> high situation)

(**thinking about this**) <br/>
for each approach, need to train 2 models:
> model_1_low: first training, predict the low <br/>
> model_2_higih: second training, with the low_pred value from the model before, predict the high

### Traditional Approach

first to train a model to predict the lows and highs in the next 15minutes

In [22]:
def normalising_data(df, columns, base):
    """
    INPUT:
    df - dataframe to normalise
    columns - list of columns to be normalised (column/base)
    base - name of the normalised base (denominator)
    
    
    OUTPUT:
    df - normalised dataframe
    """
    assert base not in columns, "base should not be in the list of columns, as it will be dividing by 0"
    
    df_temp = df.copy(deep=True)
    
    for col in columns:
        df_temp[col] = 100 * ((df_temp[col].astype(np.float)/df_temp[base]) - 1)
    
    return df_temp

In [23]:
base = 'ra_1H'
columns_to_norm = [d for d in ['open','high','low','close','avg','ra_1H','ra_2H','ra_4H'] if d != base]
y_columns = ['y_avg','y_low','y_high']

In [24]:
df_normalised = normalising_data(df_prepare, columns=columns_to_norm, base=base)

In [25]:
## preparing dataframe for training
df_input = df_normalised[columns_to_norm + y_columns].copy(deep=True)

In [26]:
df_input = df_input[:-1]

In [27]:
df_input_set_aside = df_input[-17000:].copy(deep=True)

In [28]:
df_input_traintest = df_input[:-17000].copy(deep=True)

In [29]:
df_input_traintest = shuffle(df_input_traintest)

In [30]:
X_train, X_test, y_train, y_test = train_test_split(df_input_traintest[columns_to_norm].values, 
                                                    df_input_traintest[['y_avg','y_low','y_high']].values ,
                                                    train_size=0.70,
                                                    random_state=42)

In [31]:
y_train

array([[ 0.6013337 , -0.63194096,  0.40751332],
       [-0.23544566, -0.17652541,  0.26660928],
       [-0.27557915, -0.26616523,  0.4213655 ],
       ...,
       [ 0.3280987 , -0.24822368,  0.37084153],
       [ 0.13441385, -0.1489057 ,  0.1603165 ],
       [ 0.33140427, -0.2693424 ,  0.4294711 ]], dtype=float32)

In [32]:
y_train_avg = y_train[:,0]
y_train_low = y_train[:,1]
y_train_high = y_train[:,2]

y_test_avg = y_test[:,0]
y_test_low = y_test[:,1]
y_test_high = y_test[:,2]

In [33]:
pipeline_xgb_avg_rel = Pipeline([
                            ('multi_clf', XGBRegressor(n_estimators=100, learning_rate=0.1, gamma=0, subsample=0.75,
                                       colsample_bytree=1, max_depth=7) )
                            ])


pipeline_xgb_low = Pipeline([
                            ('multi_clf', XGBRegressor(n_estimators=100, learning_rate=0.1, gamma=0, subsample=0.75,
                                       colsample_bytree=1, max_depth=7) )
                            ])

pipeline_xgb_high = Pipeline([
                            ('multi_clf', XGBRegressor(n_estimators=100, learning_rate=0.1, gamma=0, subsample=0.75,
                                       colsample_bytree=1, max_depth=7) )
                            ])


> try different learning rates (0.08 is the best so far)

In [34]:
pipeline_xgb_low.fit(X_train, y_train_low)

  "because it will generate extra copies and increase " +


Pipeline(steps=[('multi_clf',
                 XGBRegressor(base_score=0.5, booster='gbtree',
                              colsample_bylevel=1, colsample_bynode=1,
                              colsample_bytree=1, gamma=0, gpu_id=-1,
                              importance_type='gain',
                              interaction_constraints='', learning_rate=0.1,
                              max_delta_step=0, max_depth=7, min_child_weight=1,
                              missing=nan, monotone_constraints='()',
                              n_estimators=100, n_jobs=8, num_parallel_tree=1,
                              random_state=0, reg_alpha=0, reg_lambda=1,
                              scale_pos_weight=1, subsample=0.75,
                              tree_method='exact', validate_parameters=1,
                              verbosity=None))])

In [35]:
pipeline_xgb_avg_rel.fit(X_train, y_train_avg)
pipeline_xgb_high.fit(X_train, y_train_high)

Pipeline(steps=[('multi_clf',
                 XGBRegressor(base_score=0.5, booster='gbtree',
                              colsample_bylevel=1, colsample_bynode=1,
                              colsample_bytree=1, gamma=0, gpu_id=-1,
                              importance_type='gain',
                              interaction_constraints='', learning_rate=0.1,
                              max_delta_step=0, max_depth=7, min_child_weight=1,
                              missing=nan, monotone_constraints='()',
                              n_estimators=100, n_jobs=8, num_parallel_tree=1,
                              random_state=0, reg_alpha=0, reg_lambda=1,
                              scale_pos_weight=1, subsample=0.75,
                              tree_method='exact', validate_parameters=1,
                              verbosity=None))])

In [36]:
X_train

array([[-0.29479837,  0.41735258, -0.33288374, ...,  0.15096424,
        -0.31277909, -0.8256981 ],
       [ 0.21139104,  0.24090149, -0.00697913, ...,  0.12179704,
        -0.03419998,  0.07081958],
       [-0.09362069,  0.0281841 , -0.29522087, ..., -0.14982546,
         0.21539663,  0.73282985],
       ...,
       [-0.25065601,  0.56662405, -0.31051511, ...,  0.15687993,
        -0.13565203,  0.7544979 ],
       [-0.19601246,  0.51789303, -0.19601246, ...,  0.06569769,
        -0.39540185, -0.55977844],
       [-0.49914211,  0.33003339, -1.77425939, ..., -1.11705796,
        -0.51543579,  0.0441378 ]])

In [37]:
pipeline_xgb_avg_rel._final_estimator.feature_importances_

array([0.07482548, 0.11577956, 0.13486499, 0.31539315, 0.17495988,
       0.07335466, 0.11082234], dtype=float32)

In [38]:
pipeline_xgb_low._final_estimator.feature_importances_

array([0.16244796, 0.22256657, 0.33253646, 0.12634191, 0.0378809 ,
       0.04472347, 0.07350275], dtype=float32)

In [39]:
pipeline_xgb_high._final_estimator.feature_importances_

array([0.04001858, 0.2308267 , 0.43596196, 0.09280872, 0.05701578,
       0.06087828, 0.08249004], dtype=float32)

In [40]:
data_4H = pd.DataFrame([], columns = ['open','high','low','close','volume','quote_asset_volume'])
data_4H.index.name = 'open_time'

In [41]:
decide = DecisionMaking(columns_to_norm = columns_to_norm,
                        base = base,
                        list_to_add_unit_hour = [1,2,4],
                        index_unit = '15min',
                        model_avg = pipeline_xgb_avg_rel,
                        model_high = pipeline_xgb_high,
                        model_low = pipeline_xgb_low)

In [42]:
min_df = df_ETHUSD_1min.iloc[0].name

In [43]:
max_df = df_ETHUSD_1min.iloc[-1].name

In [44]:
df_ETHUSD_1min_reindex = df_ETHUSD_1min.reindex(pd.date_range(start=min_df, end=max_df, freq='min')).copy(deep=True)

In [45]:
df_ETHUSD_1min_reindex = df_ETHUSD_1min_reindex.fillna(method='ffill')

In [46]:
df_ETHUSD_1min_reindex.index.name = 'open_time'

In [47]:
df_base = df_ETHUSD_1min_reindex[df_ETHUSD_1min_reindex.index >= '2020-12-01'][:239]  ## + 12 to make sure it starts on the multiples of quarter minutes
df_continue = df_ETHUSD_1min_reindex[df_ETHUSD_1min_reindex.index >= '2020-12-01'][239:]

In [48]:
### Action

In [49]:

history_log = [{'time': datetime.now(),
                'last_event': 'start outside log',
                'next_event': 'begin trading'}]
n = 0
to_skip_trade = False

## fee of 0.4%
downward_adjustment = 1  ## unit percent
dif_high_low = 3 ## unit percent
order_high_from_low = 1.5 ## unit percent, at least 0.4

# eth_bot1 = TradingBot(100, 0)
# next_event = eth_bot1.get_log()[-1]['next_action'] 

prediction_log = []

number_of_window = 0

_Waiting_ = False  ## waiting for orders
_LastEvent_ = 'initiation'
_NextEvent_ = 'open_buy_position'  # open_buy_position | close_buy_position | close_sell_position
                                   # open_sell_position <- should be done in MrMarket
                                    

MrMarket_ETH = MrMarket(usd=100,
                        coin=0, 
                        last_event=_LastEvent_,
                        next_event=_NextEvent_,
                        taker_fee=0.4)
wait_for_one = False
        
for k, row in df_continue.iterrows():
    """4H of data"""
    row_append =pd.Series( {'open' :row['open'],
                          'high' :row['high'],
                          'low'  :row['low'],
                          'close':row['close'],
                          'volume':row['volume'],
                          'quote_asset_volume':row['quote_asset_volume']
                       }, 
                     name = row.name
                     )

    df_base = df_base.append(row_append, ignore_index=False)
    
    ## if at 15min point, and ....
    if (n%15 == 0) and \
       (_NextEvent_ == 'open_buy_position') and \
       (_Waiting_ == False) :
#         print('ML training ---------------------', row.name)
        for_log = run_decision(decide, df_base) ## apend to log
        
#         print("condition:  ", 100 * (for_log['pred_high']/for_log['pred_low'] -1) > dif_high_low, 100 * (for_log['pred_high']/for_log['pred_low'] -1))
        
        ## check if the margin is okay
        if 100 * (for_log['pred_high']/for_log['pred_low'] -1) > dif_high_low:
            number_of_window += 1
            
            predicted_values = run_decision(decide, df_base)
            low_price_position = predicted_values['pred_low'] * ((100-downward_adjustment)/100)
            high_price_position = low_price_position * (100 + order_high_from_low)/100


            MrMarket_ETH.open_buy_position(low_price_position, high_price_position, row) # should then have a buy position opened at low price, then a sell position once that is triggered
            _LastEvent_ = _NextEvent_
            _Waiting_, _NextEvent_ = MrMarket_ETH._CheckOrder_() ## True, close_buy_position
        
            wait_for_one = True
            
            assert _Waiting_ in [True, False], "_Waiting_ must be boolean"
            assert _NextEvent_ in ['close_buy_position','close_sell_position']
            
            continue

        else:
            history_log.append({'time': row.name,
                                'last_event': "margin not met",
                                'next_event': _NextEvent_,
                                'detail'    : for_log})
      
    if wait_for_one:
        wait_for_one = False

    else:
        if _Waiting_ == True:
            ## All the below can be put in MrMarket

            MrMarket_ETH.data(row)  ## provide data for MrMarket
            if _NextEvent_ == 'close_buy_position': ## close sell position should be done with MrMarket
                _Verdict_ = MrMarket_ETH.check_status(_NextEvent_) ## check if true_low <= pred_low

                assert _Verdict_ in ['Buy condition not met', 'Executed Buy', 'Buy order expired, Reset']

                if _Verdict_ == 'Executed Buy':
                    _LastEvent_ = _NextEvent_
                    _Waiting_, _NextEvent_ = MrMarket_ETH._CheckOrder_()  ## should have already executed buy order, and open a sell order
                    assert _Waiting_ == True, "Should be still waiting for the execution"
                    assert _NextEvent_ == 'close_sell_position', "Next event should be close sell position"

                    history_log.append({'time': row.name,
                                        'last_event': _LastEvent_,
                                        'next_event': _Verdict_})

                elif _Verdict_ == 'Buy condition not met':
                    history_log.append({'time': row.name,
                                        'last_event': _LastEvent_,
                                        'next_event': _Verdict_})

                elif _Verdict_ == 'Buy order expired, Reset':
                    history_log.append({'time': row.name,
                                        'last_event': _LastEvent_,
                                        'next_event': _Verdict_})
                    _Waiting_ = False
                    _NextEvent_ = 'open_buy_position'

                else:
                    print("!! _Verdict_ is wrong!! Shouldn't show: {} ".format(_Verdict_))
                    break 
                    #! shouldn't have anything here

            else:
                _Verdict_ = MrMarket_ETH.check_status(_NextEvent_)
                assert _Verdict_ in ['Sell condition not met', 'Executed Sell']

                if _Verdict_ == 'Executed Sell':
                    _LastEvent_ = _NextEvent_
                    _Waiting_, _NextEvent_ = MrMarket_ETH._CheckOrder_()  ## should have already executed sell order, and should be just waiting now
                    assert _Waiting_ == False, "Should not be waiting for anything, ready to accept a new buy order"
                    assert _NextEvent_ == 'open_buy_position', "Next event should be open buy position"

                    history_log.append({'time': row.name,
                                        'last_event': _LastEvent_,
                                        'next_event': _Verdict_})

                else: ## sell condition not met
                    history_log.append({'time': row.name,
                                        'last_event': _LastEvent_,
                                        'next_event': _Verdict_})


    #             elif: ## expired order


    #             else:
    #                 #! shouldn't have anything here

        else:
            history_log.append({'time': row.name,
                                'last_event': "some condition not met",
                                'next_event': _NextEvent_,
                                'detail'    : for_log})
    
    n += 1 
    df_base = df_base[1:]
    
    if n% (60*10) == 0:
        print(row.name)

initialisation


Unnamed: 0,time_of_event,event_name,usd_wallet,coin_wallet,next_action,pred_low,pred_high,true_low,true_high
0,2021-02-06 22:28:20.510985,initiation,100,0,open_buy_position,,,,


try to close buy position!! 2020-12-01 11:31:00
try to close buy position!! 2020-12-01 11:32:00
try to close buy position!! 2020-12-01 11:33:00
try to close buy position!! 2020-12-01 11:34:00
try to close buy position!! 2020-12-01 11:35:00
try to close buy position!! 2020-12-01 12:32:00
try to close buy position!! 2020-12-01 12:48:00
sell order taking too lond? 2020-12-01 13:18:00
sell order taking too lond? 2020-12-01 13:19:00
sell order taking too lond? 2020-12-01 13:20:00
sell order taking too lond? 2020-12-01 13:21:00
sell order taking too lond? 2020-12-01 13:22:00
sell order taking too lond? 2020-12-01 13:23:00
sell order taking too lond? 2020-12-01 13:24:00
sell order taking too lond? 2020-12-01 13:25:00
sell order taking too lond? 2020-12-01 13:26:00
sell order taking too lond? 2020-12-01 13:27:00
sell order taking too lond? 2020-12-01 13:28:00
sell order taking too lond? 2020-12-01 13:29:00
sell order taking too lond? 2020-12-01 13:30:00
sell order taking too lond? 2020-12-01 1

In [50]:
trading_log = pd.DataFrame(MrMarket_ETH.history_log)

In [51]:
trading_log

Unnamed: 0,time_of_event,event_name,usd_wallet,coin_wallet,next_action,pred_low,pred_high,true_low,true_high,position_opened_at
0,2021-02-06 22:28:20.510985,initiation,100.000000,0.0,open_buy_position,,,,,
1,2020-12-01 11:29:00.000000,open_buy_position,100.000000,0.0,close_buy_position,607.278732,616.387913,612.549988,615.919983,"{'open_buy': 2020-12-01 11:29:00, 'open_sell':..."
2,2020-12-01 11:31:00.000000,Buy condition not met,100.000000,0.0,close_buy_position,607.278732,616.387913,614.000000,617.609985,"{'open_buy': 2020-12-01 11:29:00, 'open_sell':..."
3,2020-12-01 11:32:00.000000,Buy condition not met,100.000000,0.0,close_buy_position,607.278732,616.387913,613.000000,615.299988,"{'open_buy': 2020-12-01 11:29:00, 'open_sell':..."
4,2020-12-01 11:33:00.000000,Buy condition not met,100.000000,0.0,close_buy_position,607.278732,616.387913,614.000000,616.799988,"{'open_buy': 2020-12-01 11:29:00, 'open_sell':..."
...,...,...,...,...,...,...,...,...,...,...
166,2020-12-27 17:00:00.000000,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,693.989990,696.619995,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
167,2020-12-27 17:01:00.000000,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,693.289978,695.950012,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
168,2020-12-27 17:02:00.000000,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,692.200012,694.919983,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
169,2020-12-27 17:03:00.000000,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,691.909973,693.880005,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."


In [52]:
_Waiting_

False

In [53]:
_NextEvent_

'open_buy_position'

In [54]:
outside_log = pd.DataFrame(history_log)

In [55]:
outside_log['last_event'].unique()

array(['start outside log', 'margin not met', 'some condition not met',
       'open_buy_position', 'close_buy_position', 'close_sell_position'],
      dtype=object)

In [56]:
outside_log['detail'][1]

{'pred_avg': 605.6286606646917,
 'pred_low': 604.6603675244602,
 'pred_high': 609.5173300483716}

In [57]:
outside_log['detail'][100]

{'pred_avg': 605.9243583522174,
 'pred_low': 602.9375390004077,
 'pred_high': 606.2747683090927}

In [58]:
outside_log[outside_log['time'] >= '2020-12-01 13:38:00'].head(20)

Unnamed: 0,time,last_event,next_event,detail
0,2021-02-06 22:28:20.510789,start outside log,begin trading,
607,2020-12-01 13:38:00.000000,close_buy_position,Sell condition not met,
608,2020-12-01 13:39:00.000000,close_buy_position,Sell condition not met,
609,2020-12-01 13:40:00.000000,close_buy_position,Sell condition not met,
610,2020-12-01 13:41:00.000000,close_buy_position,Sell condition not met,
611,2020-12-01 13:42:00.000000,close_buy_position,Sell condition not met,
612,2020-12-01 13:43:00.000000,close_buy_position,Sell condition not met,
613,2020-12-01 13:44:00.000000,close_sell_position,Executed Sell,
614,2020-12-01 13:45:00.000000,some condition not met,open_buy_position,"{'pred_avg': 604.8725064857432, 'pred_low': 59..."
615,2020-12-01 13:46:00.000000,some condition not met,open_buy_position,"{'pred_avg': 604.8725064857432, 'pred_low': 59..."


In [59]:
trading_log.tail(20)

Unnamed: 0,time_of_event,event_name,usd_wallet,coin_wallet,next_action,pred_low,pred_high,true_low,true_high,position_opened_at
151,2020-12-27 16:45:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,693.960022,697.659973,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
152,2020-12-27 16:46:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,692.0,695.0,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
153,2020-12-27 16:47:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,693.02002,695.070007,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
154,2020-12-27 16:48:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,691.059998,693.820007,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
155,2020-12-27 16:49:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,692.700012,695.559998,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
156,2020-12-27 16:50:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,693.059998,695.48999,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
157,2020-12-27 16:51:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,688.700012,693.309998,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
158,2020-12-27 16:52:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,685.460022,691.650024,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
159,2020-12-27 16:53:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,687.799988,690.679993,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."
160,2020-12-27 16:54:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,690.590027,695.169983,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':..."


In [60]:
100 * (for_log['pred_high']/for_log['pred_low'] -1)

0.7426339872678378

In [61]:
dif_high_low

3

In [62]:
100 * (for_log['pred_high']/for_log['pred_low'] -1) > dif_high_low

False

In [63]:
pd.DataFrame(history_log)['detail'][1]

{'pred_avg': 605.6286606646917,
 'pred_low': 604.6603675244602,
 'pred_high': 609.5173300483716}

In [64]:
pd.DataFrame(history_log)['detail'][100]

{'pred_avg': 605.9243583522174,
 'pred_low': 602.9375390004077,
 'pred_high': 606.2747683090927}

In [65]:
trading_log['position_opened_at_decode'] = trading_log['position_opened_at'].apply(lambda x: None if pd.isnull(x) else (x['open_buy'], x['open_sell']))

In [66]:
a = trading_log.head(25).loc[8]

In [67]:
a['time_of_event']

Timestamp('2020-12-01 11:37:00')

In [68]:
a['position_opened_at_decode'][1]

Timestamp('2020-12-01 11:35:00', freq='T')

In [69]:
(a['position_opened_at_decode'][1] - a['position_opened_at_decode'][1]).total_seconds()

0.0

In [70]:
a = trading_log.head(25).loc[12]

In [71]:
a['time_of_event']

Timestamp('2020-12-01 11:41:00')

In [72]:
a['position_opened_at_decode'][1]

Timestamp('2020-12-01 11:35:00', freq='T')

In [73]:
(a['time_of_event'] - a['position_opened_at_decode'][1]).total_seconds()

360.0

In [75]:
trading_log.tail()

Unnamed: 0,time_of_event,event_name,usd_wallet,coin_wallet,next_action,pred_low,pred_high,true_low,true_high,position_opened_at,position_opened_at_decode
166,2020-12-27 17:00:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,693.98999,696.619995,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':...","(2020-12-27 16:34:00, None)"
167,2020-12-27 17:01:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,693.289978,695.950012,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':...","(2020-12-27 16:34:00, None)"
168,2020-12-27 17:02:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,692.200012,694.919983,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':...","(2020-12-27 16:34:00, None)"
169,2020-12-27 17:03:00,Buy condition not met,102.083172,0.0,close_buy_position,678.382488,688.558225,691.909973,693.880005,"{'open_buy': 2020-12-27 16:34:00, 'open_sell':...","(2020-12-27 16:34:00, None)"
170,2020-12-27 17:04:00,expired buy order,102.083172,0.0,open_buy_position,678.382488,688.558225,690.859985,694.150024,,
