In [6]:
from typing import List, Tuple
import pandas as pd
import datetime
import requests
import os
import re
from pandas import read_csv

def get_last_price_from_bankier(stock_name: str, from_year: str) -> float:
    date_from = datetime.datetime.fromisoformat(f'{from_year}-12-01T00:00:00+00:00').strftime('%s')
    date_to = datetime.datetime.fromisoformat(f'{from_year}-12-31T23:59:59+00:00').strftime('%s')
    response = requests.get(f'https://www.bankier.pl/new-charts/get-data?date_from={date_from}000&date_to={date_to}000&symbol={stock_name}&intraday=false&type=area')
    return float(re.sub("[^0-9,]", "", response.json()['profileData']['valueAverage']).replace(',', '.'))

In [8]:
def get_data(stock_df: pd.DataFrame, start_row: int, stop_row= None , drop_indexes=[]) -> pd.DataFrame:
    if not stop_row:
        stop_row = start_row + 1

    rows = stock_df.iloc[start_row:stop_row, 2:]
    return rows.reset_index(drop=True).drop(drop_indexes).fillna(0)

In [9]:
def get_row(stock_df: pd.DataFrame) -> pd.DataFrame:
    assets = get_data(stock_df, 31,58,[12])
    print(assets)
    eq= get_data(stock_df, 61,92,[8,19])
    print(eq)
    return pd.concat([assets,eq], ignore_index=True).divide(get_data(stock_df,29).iloc[0], axis='columns')

In [10]:
def get_tere(stock_df: pd.DataFrame, name) -> pd.DataFrame:
    data = get_row(stock_df)
    current = data.drop(data.columns[0], axis='columns')
    current.columns = range(current.shape[1])
    previous = data.drop(data.columns[-1], axis='columns')
    previous.columns = range(previous.shape[1])
    merged = pd.concat([previous,current], ignore_index=True)
    years = get_data(stock_df,28)
    stocks = get_data(stock_df,18) * 1000
    prices = []
    for index, columnData in years.items():
        data = columnData[0]
        if isinstance(data, str):
            year = data[0:4]
            try:
                prices.append(get_last_price_from_bankier(name, year) * stocks[index][0])
            except Exception as error:
                print(f'Error occured in: {name}, year: {year}: {error}')
                prices.append(float("Nan"))
                continue
        else:
            prices.append(float("Nan"))
    price_change =(pd.DataFrame(prices[1:]).T - prices[:-1])/prices[1:]
    return merged.append(price_change).T.dropna()

In [193]:
all_data = []
for category in range(2, 9):
    xlsxs = list(filter(lambda x: not x.startswith('~') and not x.startswith('.'), os.listdir(f'./spolki/{category}')))
    xlsxs.sort()

    for xlsx in xlsxs:
        print(f'spolki/{category}/{xlsx}')
        stock_df = pd.read_excel(f'spolki/{category}/{xlsx}', 'YS', header=None)
        stock_name = xlsx.split('.xlsx')[0]
        all_data.append(get_tere(stock_df, stock_name))
pd.concat(all_data, ignore_index=True).to_csv('modeled_data.csv', sep=';', index=False)

spolki/2/BEDZIN.xlsx
Error occured in: BEDZIN, year: 1997: expected string or bytes-like object
spolki/2/ENEA.xlsx
Error occured in: ENEA, year: 2002: expected string or bytes-like object
Error occured in: ENEA, year: 2003: expected string or bytes-like object
Error occured in: ENEA, year: 2004: expected string or bytes-like object
Error occured in: ENEA, year: 2005: expected string or bytes-like object
Error occured in: ENEA, year: 2007: expected string or bytes-like object
spolki/2/ENERGA.xlsx
Error occured in: ENERGA, year: 2011: expected string or bytes-like object
Error occured in: ENERGA, year: 2012: expected string or bytes-like object
spolki/2/KOGENERA.xlsx
Error occured in: KOGENERA, year: 1997: expected string or bytes-like object
Error occured in: KOGENERA, year: 1998: expected string or bytes-like object
Error occured in: KOGENERA, year: 1999: expected string or bytes-like object
spolki/2/LOTOS.xlsx
Error occured in: LOTOS, year: 2001: expected string or bytes-like object
E

In [11]:
name = 'PETROLINV'
data = pd.read_excel(f'spolki/2/{name}.xlsx', 'YS', header=None)
print(get_row(data))

                                                   2    3    4    5    6   \
0                       Property, plant and equipment  0.0  0.0  0.0  0.0   
1   Exploration for and evaluation of mineral reso...  0.0  0.0  0.0  0.0   
2                                   Intangible assets  0.0  0.0  0.0  0.0   
3                                            Goodwill  0.0  0.0  0.0  0.0   
4                                 Investment property  0.0  0.0  0.0  0.0   
5                            Investment in affiliates  0.0  0.0  0.0  0.0   
6                        Non-current financial assets  0.0  0.0  0.0  0.0   
7                   Non-current loans and receivables  0.0  0.0  0.0  0.0   
8                                 Deferred income tax  0.0  0.0  0.0  0.0   
9           Non-current deferred charges and accruals  0.0  0.0  0.0  0.0   
10                 Non-current derivative instruments  0.0  0.0  0.0  0.0   
11                           Other non-current assets  0.0  0.0  0.0  0.0   

TypeError: unsupported operand type(s) for /: 'str' and 'str'