## Simulation

In [1]:
from tadawol.history import get_top_tickers

top_tickers = get_top_tickers(0, 300)

In [2]:
from tadawol.strategies import recovery
from tadawol.strategies import macd

strategy = macd.MACD()

In [3]:
data = strategy.simulate(top_tickers)
print("Entries number = ", data.shape)

Simulating strategy for 269 tickers


*****
Unnamed: 0      0
Date            0
Open            0
High            0
Low             0
Close           0
Adj Close       0
Volume          0
Ticker          0
Close_ema_23    0
Close_ema_10    0
ggg             0
dtype: int64
Unnamed: 0               int64
Date            datetime64[ns]
Open                   float64
High                   float64
Low                    float64
Close                  float64
Adj Close              float64
Volume                 float64
Ticker                  object
Close_ema_23           float64
Close_ema_10           float64
ggg                    float64
dtype: object
   Unnamed: 0       Date       Open       High        Low      Close  \
0       28660 2020-07-24  95.980003  96.220001  93.870003  94.809998   
1           0 2020-07-27  94.959999  97.540001  94.900002  97.269997   
2           1 2020-07-28  96.980003  97.250000  95.199997  95.300003   
3           2 2020-07-29  95.809998  97.660004  95.519997  97.099998   
4           3 2020-

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
  val = np.array(val, copy=False)
  return array(a, dtype, copy=False, order=order)


ValueError: Must have equal len keys and value when setting with an iterable

## Add companies general data

In [None]:
# JOIN RESULTS WITH TICKERS DATA
import pandas as pd
from math import log


from tadawol import history

tickers_data = pd.read_csv(history.TICKERS_LIST_PATH)

data = pd.merge(tickers_data, data, on="Ticker", how="inner")
data.rename(columns={"Volume_y": "Volume"}, inplace=True)


## Get earnings

In [None]:
from tadawol.earnings import get_earnings_data_on_all_dates

earnings_by_date = get_earnings_data_on_all_dates(data)
data_with_earnings = pd.merge(data, earnings_by_date, on=["Ticker", "Date"])

print("SHAPES RATIO = ", round(data_with_earnings.shape[0]/data.shape[0], 2))


## Helper functions

In [None]:
from math import log

def get_volume_category(volume):
    if log(volume) <= 14:
        return "Low Volume"
    if log(volume) <= 16:
        return "Medium Volume"
    return "High Volume"

data_with_earnings["Volume_category"] = data_with_earnings.Volume.map(get_volume_category)


In [None]:
def print_results_data(df):
    print("days_since_last_result : ", df["days_since_last_result"].mean())
    print("days_to_next_result : ", df["days_to_next_result"].mean())
    print("average earnings surprise: ", df["earnings_surprise"].mean())
    
def print_results_by_column(df):
    
    bad_results = df[df["win_percent"] < -3]
    good_results = df[df["win_percent"] > 3]
    
    columns = ["days_since_last_result", "days_to_next_result", "earnings_surprise", "exit_date", "week_previous_entries"]
    
    for column in columns:
        print(f"***** {column} *****")
        print("Bad result : ", round(bad_results[column].mean(), 2))
        print("Good result : ", round(good_results[column].mean(), 2))


In [None]:
import pandas as pd
import statistics 
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from prettytable import PrettyTable

def _performance_by(df, column, index):
    
    x = []
    labels = []
    occurences = []
    win_percents = []
    i = 1
    for column_value, column_data in df.groupby([column]):
        labels.append(column_value)
        x.append(i)
        occurences.append(column_data.shape[0])
        win_percent = column_data["win_percent"].mean()
        win_percents.append(win_percent)
        i += 1
    
    f = plt.figure(index)
    plt.plot(x, occurences)
    plt.plot(x, len(x)* [statistics.mean(occurences)])
    plt.xticks(x, labels, rotation='vertical')
    f.show()
    
    index += 1
    g = plt.figure(index)
    plt.plot(x, win_percents)
    plt.plot(x, len(x)* [0])
    plt.xticks(x, labels, rotation='vertical')
    g.show()


def simulate_trades(df, total_amount: int, min_invest_per_trade: float, max_trades_by_day):
    
    transaction_min = min_invest_per_trade * total_amount
    transaction_max = 2 * transaction_min
    current_amount = total_amount
    df = df.sort_values(by="Date", ascending=True)
    date = df["Date"].min()
    
    returned_money_by_date = dict()
    trade_fees = 0
    realized_trades = []
    while date < datetime.now():
        #recuperate money
        money_to_recupere = returned_money_by_date.get(date, 0)
        current_amount += money_to_recupere
        returned_money_by_date[date] = 0
        
        # see available money
        day_trades = df[df["Date"] == date]
        
        #print(returned_money_by_date)
        day_trades_number = min(day_trades.shape[0], max_trades_by_day)
        if day_trades_number == 0 or current_amount/day_trades_number < transaction_min:
            date += timedelta(days=1)
            continue
        
        money_by_trade = current_amount/day_trades_number
        money_by_trade = min(money_by_trade, transaction_max)
        
        
        # SORT BY EARNINGS
        day_trades.sort_values(by="earnings_surprise", ascending=False, inplace=True)
        
        trade_number = 0
        for _, trade_row in day_trades.iterrows():
            trade_number += 1
            
            current_amount -= money_by_trade
            trade_fees += 2
            # add money to exit
            
            money_to_be_returned = (1 + trade_row["win_percent"]/100.0) * money_by_trade
            
            exit_date =date + timedelta(days=trade_row["exit_date"])
            returned_money_on_exit = returned_money_by_date.get(exit_date, 0)
            returned_money_by_date[exit_date] = returned_money_on_exit + money_to_be_returned
            
            realized_trades.append(
                [
                    trade_row["Ticker"], 
                    date, 
                    trade_row["exit_date"],
                    trade_row["Close"], 
                    trade_row["exit_price"], 
                    trade_row["win_percent"]
                ]
            )
            
            if trade_number == day_trades_number:
                break
        
        date += timedelta(days=1)
        
    rest_money = 0
    for m in returned_money_by_date.values():
        rest_money += m
    
    
    win = rest_money + current_amount - total_amount
    realized_trades_df = pd.DataFrame(
        data=realized_trades, 
        columns=["Ticker", "Date", "Exit date", "Enter price", "Exit price", "win_percent"])
    return win, trade_fees, realized_trades_df

def plot_trades(trades_df, plot_index):
    
    months = []
    win_percents = []
    
    trades_df["month"] = trades_df["Date"].map(lambda x: datetime(x.year, x.month, 1))
    
    for month, month_trades in trades_df.groupby(["month"]):
        trades_product = 1
        months.append(month)
        win_percents.append( month_trades["win_percent"].mean())
        
    f = plt.figure(plot_index)
    x = range(1, len(months) + 1)
    plt.plot(x, win_percents)
    plt.plot(x, len(win_percents) * [0])
    plt.xticks(x, months, rotation='vertical')
    f.show()
    
def analyse_results(df: pd.DataFrame, print_figures: bool = True, total_amount=20000, min_invest_per_trade=0.02, max_trades_by_day=4) -> None:
    assert 0 < min_invest_per_trade <= 0.1
    df = df.copy(deep=True)
    min_date = df["Date"].min()
    max_date = df["Date"].max()
    days_number = (5.0 / 7.0) * (max_date - min_date).total_seconds() / (60 * 60 * 24)
    win, trade_fees, realized_trades_df = simulate_trades(df, total_amount, min_invest_per_trade, max_trades_by_day)
    
    
    x = PrettyTable()
    x.field_names = ["KPI", "value"]

    x.add_row(["Enter cases number", df.shape[0]])
    x.add_row(["Cases number per day", round(df.shape[0]/days_number, 2)])
    x.add_row(["Average winning cases", round(100 * df[df["win_percent"] >= 0].shape[0] / df.shape[0], 1)])
    x.add_row(["Average win", round(df["win_percent"].mean(), 2)])
    x.add_row([f"Win for {round(total_amount/1000)} K€", round(win)])
    x.add_row(["Trade fees", trade_fees])
    x.add_row(["Trade number", realized_trades_df.shape[0]])
    x.add_row(["Total margin", round(100 * (win - trade_fees)/total_amount, 1)])
    print(x)
    df.loc[:, "win_percent"] = df["win_percent"].astype(float)
    df[["win_percent"]].hist(bins=50)
    
    plot_trades(realized_trades_df, 2)
    
    if not print_figures:
        return None
    
        
    df.loc[:, "month"] = df['Date'].map(lambda x: f"{x.month}/{x.year}")
    
    
    index = 3
    labels = ["month", "Country", "Sector", "week_previous_entries", "Volume_category"]
    for label in labels:
        _performance_by(df, label, index)
        index += 2
        
    return None

def cut_results_surprises(df):
    df = df.copy(deep=True)
    df = df[df["days_to_next_result"] >= 15]
    df = df[df["days_since_last_result"] > 0]
    return df

def print_cases(df: pd.DataFrame, bad=True, start=0, end=10):
    
    df = df.copy(deep=True)
    ascending = True
    if not bad:
        ascending = False
    df.sort_values(by=["win_percent", "Date"], ascending=ascending, inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    columns = ["Ticker", "Date", "win_percent", "company_short_name", "Country", "Sector", "days_to_next_result", "days_since_last_result","exit_date", "Close", "Open", "exit_price",]
    df = df[columns]
    df = df.loc[start:end]
    return df
    

In [None]:
trades = analyse_results(data_with_earnings, print_figures=True, max_trades_by_day=2, min_invest_per_trade=0.04)

### Try machine learning

In [None]:
from sklearn.linear_model import LogisticRegression

def transform_data(df):
    df = df.copy(deep=True)
    df["20/10"] = df["Close_ema_20"] / df["Close_ema_10"]
    df["10/5"] = df["Close_ema_10"] / df["Close_ema_5"]
    df["Open/Close"] = df["Open"] / df["Close"]

    df["evolution_diff_10"] = df["evolution_diff_long_medium"] / df["Close_ema_20"]
    df["evolution_diff_20"] = df["evolution_diff_long_medium"] / df["Close_ema_20"]

    columns = [
        "20/10", "10/5", "Open/Close", 
        "days_since_last_result", "days_to_next_result", "earnings_surprise", 
        "week_previous_entries", "evolution_diff_10", "evolution_diff_20"]
    
    return df[columns]

X = transform_data(data_with_earnings)
y = data_with_earnings["win_percent"].map(lambda x: 0 if x<0 else 1)
clf = LogisticRegression(random_state=0).fit(X, y)



In [None]:
from sklearn.model_selection import cross_validate
from sklearn import ensemble, neighbors, svm, tree
algos = [
    LogisticRegression(),
    ensemble.AdaBoostClassifier(), 
    ensemble.BaggingClassifier(), 
    ensemble.ExtraTreesClassifier(),
    ensemble.GradientBoostingClassifier(),
    ensemble.RandomForestClassifier(n_estimators = 20),
    neighbors.KNeighborsClassifier(n_neighbors = 3),
    svm.SVC(probability=True),
    svm.LinearSVC(max_iter=30000),
    tree.DecisionTreeClassifier(),
    tree.ExtraTreeClassifier()
]

def get_classification_results(alg, X, y, cv):
    results = cross_validate(alg, X, y, cv=cv, return_train_score=True)
    
    return {
        'alg_name': alg.__class__.__name__,
        'train_score': round(results['train_score'].mean(), 2),
        'test_score': round(results['test_score'].mean(), 2)
    }

get_classification_results(clf, X, y, 10)

### Clean data

In [None]:
import numpy as np
from datetime import datetime

# Avoid coronavirus period
cleaned_data = data_with_earnings[data_with_earnings["Date"] < datetime(2020, 1, 1)] 
# Avoid 2018 little crisis
cleaned_data = cleaned_data[(cleaned_data["Date"] < datetime(2018, 12, 1)) | (cleaned_data["Date"] > datetime(2018, 12, 31))]

## Simulate top tickers

In [None]:
# Take 100 top tickers
top_tickers = get_top_tickers(10, 70)
top_companies_trades = cleaned_data[cleaned_data["Ticker"].isin(top_tickers)]
analyse_results(top_companies_trades, True, total_amount=20000, max_trades_by_day=2, min_invest_per_trade=0.04)


In [None]:
print_cases(df, True, 10, 30)

In [None]:
bad_tickers = ["TSLA", "SHOP", "QCOM", "ORCL", "INTC"]
trades = top_companies_trades[~top_companies_trades["Ticker"].isin(bad_tickers)]
analyse_results(trades, False, total_amount=20000, max_trades_by_day=2, min_invest_per_trade=0.04)

In [None]:
ticker = ""
ticker_trades = top_companies_trades[top_companies_trades["Ticker"] == ticker]
ticker_trades[["Ticker", "Date", "win_percent", "exit_date", "exit_reason"]]
print_results_by_column(top_companies_trades)

## LABORATORY

In [None]:
# Try losing days

df = cleaned_data.copy(deep=True)#[cleaned_data["Close"] < 1 * cleaned_data["Open"]]
df = df[df["ema_evolution"] < 0]
df = df[df["days_to_next_result"] > 15]
analyse_results(df, False, total_amount=20000, max_trades_by_day=4, min_invest_per_trade=0.05)

## END LABORATORY

## Today results

In [None]:
from tadawol.strategies import recovery
from tadawol.history import get_top_tickers
from tadawol.earnings import get_earnings_data_on_all_dates

strategy = recovery.Recovery()

tickers = get_top_tickers(100)
today_trades, today_exits = strategy.get_today_trades_and_exits(tickers)


earnings_by_date = get_earnings_data_on_all_dates(today_trades)
today_trades = pd.merge(today_trades, earnings_by_date, on=["Ticker", "Date"])


In [None]:
today_trades.reset_index(drop=True, inplace=True)
x_today = transform_data(today_trades)
predictions = clf.predict_proba(x_today)
predictions = [round(p[1], 2) for p in predictions]
today_trades["score"] = predictions


In [None]:
today_trades.sort_values(by=["score", "earnings_surprise"], ascending=False)[["Date", "Ticker", "earnings_surprise", "days_since_last_result", "days_to_next_result", "week_previous_entries"]]