In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import yfinance as yf
import datetime


# 0. Functions

In [None]:

def daily_price(ticker, end, days_earlier=3, columns=['Close']):
    '''
    Returns a DataFrame of prices for ticker from Yahoo Finance API
    The close date is excluded!!!!
    Minimum 3 days window due to weekends and holidays.
    '''
    result_series = []

    for timestamp in end:
        start = timestamp - datetime.timedelta(days=days_earlier)

        obj = yf.Ticker(ticker)
        data = obj.history(start=start, end=timestamp)[columns]

        result_series.append(data[columns].values.mean())

    return pd.Series(result_series).values

In [None]:
def replace_format_input(input=pd.DataFrame):
    '''
    replace - characters to 0
    add missing 0-s from the end
    drop out , as separators
    set datatype to integer
    '''
    for column in input.columns:
        if isinstance(input[column][0], str):
            # create empty list to add element
            result = []
            # itreate through the columns
            for elem in input[column]:
                # checkt the value contains a ,
                if ',' in elem:
                    # if the last part of string is shorter than 3 characters
                    original_value = elem.split(',')
                    if len(original_value[-1]) < 3:
                        # create new last element of original value
                        original_value[-1] = original_value[-1].ljust(3, '0')    
                        # recreate string
                        new_value = "".join(original_value)
                    else:
                        new_value = "".join(original_value)
                    # add merged element to list
                    result.append(new_value)
                elif elem == '-':
                    # replace - to 0
                    result.append('0')
                else:
                    # add don't modified values
                    result.append(elem)
            # overwrite column values and fix datatype
            input[column] = pd.Series(result).astype(int)
    return input

In [None]:
def convert_national_currency(input_data=pd.DataFrame, currency=pd.DataFrame):
    '''
    Convert colums into national currency
    '''
    output_data = input_data.copy()

    for column in output_data.columns:
        if column not in ['shares', 'national_div', 'usd_div', 'usd_nat_currency', 'real_date', 'date']:
            output_data[column] = output_data[column].astype(float) * currency['usd_nat_currency']

    return output_data

In [None]:
def calculate_real_date(input):
    '''
    Calculate the rough estimated date when the quaterly report could be available
    '''
    result = []
    for timestamp in input['date']:
        if timestamp.month == 12:
            result.append(timestamp + datetime.timedelta(days=42))
        else:
            result.append(timestamp + datetime.timedelta(days=21))

    input['real_date'] = result
    return input

In [None]:
def filter_raw_data(input=pd.DataFrame):
    filtered = input[
                [
                'date',
                'real_date',
                'shares',
                'revenue',
                'cogs',
                'gross_profit',
                'net_profit',
                'cash',
                'acc_rec',
                'inventory',
                'curr_assets',
                'goodwill',
                'intangible_assets',
                'total_assets',
                'acc_pay',
                'short_term_debt',
                'current_debt',
                'curr_liab',
                'long_term_debt',
                'total_liab',
                'cash_from_operating_activities',
                'capex'
                ]
            ].copy()
    
    return filtered

In [None]:
def calculate_input_value_ratios(input=pd.DataFrame):
    input['eps'] = (input['net_profit'] * 4) / input['shares'] # quaterly corrected here --> multipled by 4
    input['bv_per_share'] = (input['total_assets']-input['total_liab']) / input['shares']
    input['fcf'] = (4 * input['cash_from_operating_activities']) - (input['capex'] * 4) # quaterly corrected here --> multipled by 4
    input['fcf_per_share'] = input['fcf'] / input['shares']

    return input

In [None]:
def ratios_input_filter(input=pd.DataFrame):
    ratios = input[
                [
                'date',
                'real_date',
                'revenue',
                'eps',
                'bv_per_share',
                'shares',
                'fcf',
                'fcf_per_share'
                ]
            ].copy()

    return ratios

In [None]:
def evaluate_performance(input=pd.DataFrame, output=pd.DataFrame):
    # evauleat short term debt
    output['current_ratio'] = input['curr_assets'] / input['curr_liab']
    output['quick_ratio'] = (input['curr_assets'] - input['inventory']) / input['curr_liab']
    output['cash_ratio'] = input['cash'] / input['curr_liab']
    #evaluate long term debt
    output['debt_to_equity'] = input['total_liab'] / (input['total_assets'] - input['total_liab'])
    output['equity_ratio'] = (input['total_assets'] - input['total_liab']) / input['total_assets']
    output['debt_ratio'] = input['total_liab'] / input['total_assets']
    # evlauate management --> based on efficiency ratios
    output['acc_rec_ratio'] = (4 * input['revenue']) / input['acc_rec']
    output['acc_pay_ratio'] = (-4 * input['cogs']) / input['acc_pay']
    output['cash_turnover'] = (4 * input['revenue']) / input['cash']
    output['inventory_turnover'] = (-4 * input['cogs']) / input['inventory']
    # test economy moat
    output['gross_profit_margin'] = input['gross_profit'] / input['revenue']
    output['net_profit_margin'] = input['net_profit'] / input['revenue']
    output['roa'] = (4 * input['net_profit']) / input['total_assets']
    output['roe'] = (4 * input['net_profit']) / (input['total_assets'] - input['total_liab'])

    return output

In [None]:
def price_ratios(input=pd.DataFrame):
    '''
    The original metrics have been develoed to annual data. I use quaterly data.
    '''
    input['pe_ratio'] = input['share_price'] / input['eps'] # previously multiplied by 4 (quaterly correction)
    input['pb_ratio'] = input['share_price'] / input['bv_per_share'] # don't need to quaterly correct (Income Statement data)
    input['ps_ratio'] = (input['share_price'] * input['shares']) / (input['revenue'] * 4) # quaterly corrected here --> multipled by 4
    input['pfcf_ratio'] = (input['share_price'] * input['shares']) / input['fcf']  # previously multiplied by 4 (quaterly correction)

    return input

# 1. Load raw input
## Hardcoded block

In [None]:
evaluate_last_X_years = False
X=10
currency_pair = 'CADUSD=X'
numbers_in_currency = 'CAD'
share_name = 'FTS.TO'

# 1.1 Calculate Share numbers from total distributed dividend (CFS) and dividend amount
## Pull historical USD national currency rates to dividend & calculate historical dividend in USD

In [None]:
# ticker_share = yf.Ticker(share_name)
# dividend = ticker_share.history(period='max', interval="1wk")
# dividend = dividend[dividend['Dividends'] > 0]
# dividend = dividend.rename(columns={'Dividends': 'local_amount'})

# dividend['real_date'] = dividend.index 
# dividend['real_date'] = dividend['real_date'].dt.tz_localize(None)

# dividend = dividend[['real_date', 'local_amount']]

# dividend['usd_rate'] = daily_price(ticker=currency_pair, end=dividend['real_date'], days_earlier=3)
# dividend['currency'] = [currency_pair.split(sep='=')[0] for x in range(len(dividend))]
# dividend['usd_dividend'] = dividend['local_amount'] / dividend['usd_rate']

# dividend.head()

## Extend your input statement table with historical dividend data and calculate share prices!

# 1.2. Raw input statement data

In [None]:
route = f"../csv_files/{share_name}_data.csv"
data = pd.read_csv(route, sep=';', parse_dates=['date'])
# replace -, extend missing 0-s, fix datatype
data = replace_format_input(data)

## (OPTIONAL) Time Filtering - Last 7 years daat

In [None]:
if evaluate_last_X_years:
    data = data[data['date'] > datetime.datetime.today() - datetime.timedelta(days=X*366+93)]
    data = data.reset_index(drop=True)

data.head()

## Calculate date when quaterly reports could be available

In [None]:
data = calculate_real_date(data)

# 2. Filter out unnecesary columns

In [None]:
data = filter_raw_data(data)
data.tail()

## Pull historical average USD national currency rates and add to the dataframe

In [None]:
data['usd_nat_currency'] = daily_price(
    ticker=currency_pair,
    end=data['date'],
    days_earlier=90
    )

# drop rows, when USD rates wasn't available
data = data[data['usd_nat_currency'].notna()]

data.tail()

## Convert USD values related to IS, BS, CFS to national currency

In [None]:
# convert columns into national currency if necessary
if numbers_in_currency == 'USD':
    data_nat_curr = convert_national_currency(input_data=data, currency=data)
else:
    data_nat_curr = data.copy()
data_nat_curr.tail()

# 3. Calculate input to Value Raios (eps, bv per share, fcf)
## filter out unwanted columns

In [None]:
filtered_nat_curr = calculate_input_value_ratios(data_nat_curr)

## Calculate input values to value ratios

In [None]:
ratios_nat_curr = ratios_input_filter(filtered_nat_curr)

In [None]:
ratios_nat_curr = evaluate_performance(input=filtered_nat_curr, output=ratios_nat_curr)

# 4. Pull historical share prices and merge with sattement data
## Pull historical share prices

In [None]:

obj = yf.Ticker(share_name)
share_price = obj.history(interval="1wk", start=data.date.min(), end=pd.Timestamp.now())
# add new date column due to pd.merge_asof match
share_price['real_date'] = share_price.index
# remove localization (timezone) to let merge the two columns
share_price['real_date'] = share_price['real_date'].dt.tz_localize(None)
# add share price column and keep the date and share prices
share_price['share_price'] = share_price['Close']
share_price = share_price[['real_date', 'share_price']]

share_price.tail()

## Merge the statement dataset with share prices

In [None]:
merged_nat_curr = pd.merge_asof(left=share_price, right=ratios_nat_curr, on='real_date', direction='backward')
# drop rows with np.Nan and reset index
merged_nat_curr = merged_nat_curr.dropna()
merged_nat_curr = merged_nat_curr.reset_index(drop=True)
merged_nat_curr.head()

# Calculate price ratios (P/E, P/B, P/S, P/FCF)

In [None]:
merged_nat_curr = price_ratios(merged_nat_curr)
merged_nat_curr.head()

# Calculate Dividend yield and currency-share price correlation

In [None]:
obj = yf.Ticker(currency_pair)
obj2 = yf.Ticker(share_name)
usd_nat_curr = obj.history(interval="1d", start=merged_nat_curr['real_date'].min(), end=pd.Timestamp.now())
usd_nat_curr['date'] = usd_nat_curr.index
usd_nat_curr['date'] = usd_nat_curr['date'].dt.tz_localize(None)
usd_nat_curr['currency_rate'] = usd_nat_curr['Close']

In [None]:
share_all_price = obj2.history(interval="1d", start=merged_nat_curr['real_date'].min(), end=pd.Timestamp.now())
share_all_price['date'] = share_all_price.index
share_all_price['date'] = share_all_price['date'].dt.tz_localize(None)
share_all_price['share_price'] = share_all_price['Close']
share_all_price['dividend_yield'] = share_all_price['Dividends'] * 100 / share_all_price['Close']

In [None]:
fig, ax1 = plt.subplots(figsize=(15, 6))

ax1.plot(share_all_price.index, share_all_price['dividend_yield'], color='k', label=share_name)

ax1.set_xlabel('Date')
ax1.set_ylabel('Dividend Yield (%)', color='k')

plt.legend()
plt.show()

In [None]:
share_price_currency_rate_correlation = pd.merge_asof(left=usd_nat_curr, right=share_all_price, on='date')
share_price_currency_rate_correlation.index = share_price_currency_rate_correlation['date']
share_price_currency_rate_correlation = share_price_currency_rate_correlation[['currency_rate', 'share_price']]

share_price_currency_rate_correlation.corr()['share_price']

In [None]:
fig, ax1 = plt.subplots(figsize=(15, 6))

ax2 = ax1.twinx()
ax1.plot(usd_nat_curr.index, usd_nat_curr['Close'], color='k', label='USD / national currency')
ax2.plot(share_all_price.index, share_all_price['Close'], color='b', label=share_name)

ax1.set_xlabel('Date')
ax1.set_ylabel('Currency rate (1 USD to X national currency)', color='k')
ax2.set_ylabel('Share price (national)', color='b')

plt.legend()
plt.show()

# Share price and value ratios

In [None]:
for column in merged_nat_curr.columns[3:]:
    fig, ax1 = plt.subplots(figsize=(15, 6))

    ax2 = ax1.twinx()
    ax1.plot(merged_nat_curr['real_date'], merged_nat_curr[column], color='k', label=merged_nat_curr)
    ax2.plot(share_all_price.index, share_all_price['Close'], color='b', label=share_name)

    ax1.set_xlabel('Date')
    ax1.set_ylabel(column, color='k')
    ax2.set_ylabel('Share price (national currency)', color='b')

    plt.legend()
    plt.show()

In [None]:
for column in merged_nat_curr.columns[-8:]:
    try:
        plt.hist(merged_nat_curr[column].values, bins=30, edgecolor='black', color='gray')
        plt.axvline(merged_nat_curr[column].iloc[-1], color='k', linestyle='dotted', linewidth=2, label='Current Value')
        plt.axvline(merged_nat_curr[column].quantile(0.1), color='green', linestyle='dashed', linewidth=1, label='P10')
        plt.axvline(merged_nat_curr[column].quantile(0.3), color='green', linestyle='dashed', linewidth=1, label='P30')
        plt.axvline(merged_nat_curr[column].quantile(0.5), color='blue', linestyle='dashed', linewidth=1, label='Median')
        plt.axvline(merged_nat_curr[column].quantile(0.7), color='orange', linestyle='dashed', linewidth=1, label='P70')
        plt.axvline(merged_nat_curr[column].quantile(0.9), color='red', linestyle='dashed', linewidth=1, label='P90')
        
        plt.xlabel(column.capitalize())
        plt.ylabel('Frequency')
        plt.title(column.capitalize())
        
        plt.legend()

        plt.show()
    except:
        print(column + " diagram is missing due to error.")

## Calculate correlation between value parameters

In [None]:
corr_matrix = pd.DataFrame(merged_nat_curr, columns=['real_date', 'gross_profit_margin', 'net_profit_margin', 'roa', 'roe', 'pe_ratio', 'pb_ratio', 'ps_ratio', 'pfcf_ratio'])
# find independent columns
corr_matrix.corr()

In [None]:
tolerance = 0.1

In [None]:
npm = merged_nat_curr[(merged_nat_curr['net_profit_margin']<= merged_nat_curr['net_profit_margin'].iloc[-1] * (1+tolerance)) & (merged_nat_curr['net_profit_margin']>= merged_nat_curr['net_profit_margin'].iloc[-1] * (1- tolerance))]['date'].values
roe = merged_nat_curr[(merged_nat_curr['roe']<= merged_nat_curr['roe'].iloc[-1] * (1+tolerance)) & (merged_nat_curr['roe']>= merged_nat_curr['roe'].iloc[-1] * (1- tolerance))]['date'].values
per = merged_nat_curr[(merged_nat_curr['pe_ratio']<= merged_nat_curr['pe_ratio'].iloc[-1] * (1+tolerance)) & (merged_nat_curr['pe_ratio']>= merged_nat_curr['pe_ratio'].iloc[-1] * (1- tolerance))]['date'].values
pbr = merged_nat_curr[(merged_nat_curr['pb_ratio']<= merged_nat_curr['pb_ratio'].iloc[-1] * (1+tolerance)) & (merged_nat_curr['pb_ratio']>= merged_nat_curr['pb_ratio'].iloc[-1] * (1- tolerance))]['date'].values
psr = merged_nat_curr[(merged_nat_curr['ps_ratio']<= merged_nat_curr['ps_ratio'].iloc[-1] * (1+tolerance)) & (merged_nat_curr['ps_ratio']>= merged_nat_curr['ps_ratio'].iloc[-1] * (1- tolerance))]['date'].values
pfcfr = merged_nat_curr[(merged_nat_curr['pfcf_ratio']<= merged_nat_curr['pfcf_ratio'].iloc[-1] * (1+tolerance)) & (merged_nat_curr['pfcf_ratio']>= merged_nat_curr['pfcf_ratio'].iloc[-1] * (1- tolerance))]['date'].values

In [None]:
def list_intersect(input1, input2):
    result = [x for x in input1 if x in input2]
    return result

In [None]:
test3 =  list_intersect(npm, roe)
test4 =  list_intersect(test3, per)
test5 =  list_intersect(test4, pbr)
test6 =  list_intersect(test5, psr)
test7 =  list_intersect(test6, pfcfr)
print(len(test7), 'timestemos have been found!')

In [None]:
for column in merged_nat_curr.columns[-8:]:
    fig, ax1 = plt.subplots(figsize=(15, 6))

    ax2 = ax1.twinx()
    ax1.plot(merged_nat_curr['real_date'], merged_nat_curr[column], color='k', label=merged_nat_curr)
    ax2.plot(share_all_price.index, share_all_price['Close'], color='b', label=share_name)

    ax1.set_xlabel('Date')
    ax1.set_ylabel(column, color='k')
    ax2.set_ylabel('Share price (national currency)', color='b')

    for timestamp in test7:
        plt.axvline(timestamp, color='red', linestyle='dashed', linewidth=1)

    ax1.hlines(
        y=merged_nat_curr[column].iloc[-1],
        xmin=merged_nat_curr.date.min(),
        xmax=merged_nat_curr.date.max(),
        linewidth=1, linestyles='dashed',
        color='green',
        )

    plt.legend()
    plt.show()