# Processing data for modelling

In [1]:
import pandas as pd
from tqdm import tqdm

from functions import print_shape, df_security_code

from feature_engineering import (fill_and_drop_na_values,
 adjust_price, 
 price_new_features, 
 encode_flag,
 fill_finances_knn,
 new_features_financial
 )

MISSINGVALUES = True

## import data

In [2]:
def data_selection(selection = True):
    if selection:
        prices = df_security_code(pd.read_csv('data/train_files/stock_prices.csv', parse_dates=[1]))
        financial = df_security_code(pd.read_csv('data/train_files/financials.csv',parse_dates=['Date']))
    else: 
        prices = pd.read_csv('data/train_files/stock_prices.csv', parse_dates=[1])
        financial = pd.read_csv('data/train_files/financials.csv',parse_dates=['Date'])
    return prices, financial

In [3]:
# helper function
def save_and_load(df):
    df_name = name =[x for x in globals() if globals()[x] is df][0]
    name = 'data/curr_' + df_name + '.csv'
    df.to_csv(name)
    del df
    return pd.read_csv(name, parse_dates=['Date'], index_col=[0] )

In [4]:
prices, financial = data_selection(False)

In [5]:
prices.head(2)

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,,False,0.00073
1,20170104_1332,2017-01-04,1332,568.0,576.0,563.0,571.0,2798500,1.0,,False,0.012324


In [6]:
financial.head(2)

Unnamed: 0,DisclosureNumber,DateCode,Date,SecuritiesCode,DisclosedDate,DisclosedTime,DisclosedUnixTime,TypeOfDocument,CurrentPeriodEndDate,TypeOfCurrentPeriod,...,ForecastEarningsPerShare,ApplyingOfSpecificAccountingOfTheQuarterlyFinancialStatements,MaterialChangesInSubsidiaries,ChangesBasedOnRevisionsOfAccountingStandard,ChangesOtherThanOnesBasedOnRevisionsOfAccountingStandard,ChangesInAccountingEstimates,RetrospectiveRestatement,NumberOfIssuedAndOutstandingSharesAtTheEndOfFiscalYearIncludingTreasuryStock,NumberOfTreasuryStockAtTheEndOfFiscalYear,AverageNumberOfShares
0,20161210000000.0,20170104_2753,2017-01-04,2753.0,2017-01-04,07:30:00,1483483000.0,3QFinancialStatements_Consolidated_JP,2016-12-31,3Q,...,319.76,,False,True,False,False,False,6848800.0,－,6848800.0
1,20170100000000.0,20170104_3353,2017-01-04,3353.0,2017-01-04,15:00:00,1483510000.0,3QFinancialStatements_Consolidated_JP,2016-11-30,3Q,...,485.36,,False,True,False,False,False,2035000.0,118917,1916083.0


## - Stock price

In [7]:
# test with feature_engineering.py
#prices = pd.read_csv('data/train_files/stock_prices.csv', parse_dates=[1])
print_shape(prices, MISSINGVALUES)
#prices.head(2)

 Shape:
 ----------------------------------------
 Observations:   2.33M
 Features:       12
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['RowId', 'Date', 'SecuritiesCode', 'Open', 'High', 'Low', 'Close',
       'Volume', 'AdjustmentFactor', 'ExpectedDividend', 'SupervisionFlag',
       'Target'],
      dtype='object')
----------------------------------------


Unnamed: 0,N_missing,Percentage
ExpectedDividend,2313666.0,99.19
Open,7608.0,0.33
High,7608.0,0.33
Low,7608.0,0.33
Close,7608.0,0.33
Target,238.0,0.01
RowId,0.0,0.0
Date,0.0,0.0
SecuritiesCode,0.0,0.0
Volume,0.0,0.0


In [8]:

#from feature_engineering import fill_and_drop_na_values

# fill na
fill_prices = fill_and_drop_na_values(prices, drop=False)

# save and load

fill_prices = save_and_load(fill_prices)

#fill_prices.to_csv('data/curr_fill_prices.csv')
print_shape(fill_prices, MISSINGVALUES)
fill_prices.head(2)



100%|██████████| 2000/2000 [03:28<00:00,  9.61it/s]


 Shape:
 ----------------------------------------
 Observations:   2.33M
 Features:       12
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['RowId', 'Date', 'SecuritiesCode', 'Open', 'High', 'Low', 'Close',
       'Volume', 'AdjustmentFactor', 'ExpectedDividend', 'SupervisionFlag',
       'Target'],
      dtype='object')
----------------------------------------


Unnamed: 0,N_missing,Percentage
ExpectedDividend,127359.0,5.46
Open,264.0,0.01
High,264.0,0.01
Low,264.0,0.01
Close,264.0,0.01
Target,238.0,0.01
RowId,0.0,0.0
Date,0.0,0.0
SecuritiesCode,0.0,0.0
Volume,0.0,0.0


Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,,False,0.00073
1865,20170105_1301,2017-01-05,1301,2743.0,2747.0,2735.0,2738.0,17900,1.0,,False,0.00292


In [9]:
#fill_prices = pd.read_csv('data/curr_fill_prices.csv', parse_dates=['Date'], index_col=[0])
#print_shape(fill_prices)

In [10]:



ad_price = adjust_price(fill_prices)
ad_price = save_and_load(ad_price)
print_shape(ad_price, MISSINGVALUES)
ad_price.head(2)
#del ad_price

100%|██████████| 2000/2000 [03:55<00:00,  8.48it/s]


 Shape:
 ----------------------------------------
 Observations:   2.33M
 Features:       13
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['RowId', 'Date', 'SecuritiesCode', 'AdjustmentFactor',
       'ExpectedDividend', 'SupervisionFlag', 'Target', 'ad_Open', 'ad_High',
       'ad_Low', 'ad_Close', 'ad_Volume', 'ad_Target'],
      dtype='object')
----------------------------------------


Unnamed: 0,N_missing,Percentage
ExpectedDividend,127359.0,5.46
ad_Target,4238.0,0.18
Target,238.0,0.01
ad_Open,264.0,0.01
ad_High,264.0,0.01
ad_Low,264.0,0.01
ad_Close,264.0,0.01
RowId,0.0,0.0
Date,0.0,0.0
SecuritiesCode,0.0,0.0


Unnamed: 0,RowId,Date,SecuritiesCode,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,ad_Open,ad_High,ad_Low,ad_Close,ad_Volume,ad_Target
1483,20170104_8194,2017-01-04,8194,1.0,,False,-0.002963,3325.0,3390.0,3300.0,3380.0,36100.0,-0.002963
3348,20170105_8194,2017-01-05,8194,1.0,,False,-0.020802,3395.0,3420.0,3360.0,3375.0,19400.0,-0.020802


In [11]:
#ad_price = pd.read_csv('data/curr_ad_price.csv', parse_dates=['Date'], index_col=[0])
#print_shape(ad_price)

In [12]:
#from feature_engineering import price_new_features, encode_flag

ad_price_feat = price_new_features(ad_price)

ad_price_feat['SupervisionFlag'] = encode_flag(ad_price_feat)
#ad_price_feat.to_csv('data/curr_ad_price_feat.csv')

ad_price_feat = save_and_load(ad_price_feat)

print_shape(ad_price_feat, MISSINGVALUES)


100%|██████████| 2000/2000 [1:37:54<00:00,  2.94s/it]  


 Shape:
 ----------------------------------------
 Observations:   2.33M
 Features:       36
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['RowId', 'Date', 'SecuritiesCode', 'AdjustmentFactor',
       'ExpectedDividend', 'SupervisionFlag', 'Target', 'ad_Open', 'ad_High',
       'ad_Low', 'ad_Close', 'ad_Volume', 'ad_Target', 'ad_Close_lag1',
       'ad_Close_sma10', 'ad_Open_lag1', 'ad_Open_sma10', 'ad_High_lag1',
       'ad_High_sma10', 'ad_Low_lag1', 'ad_Low_sma10', 'ad_Volume_lag1',
       'ad_Volume_sma10', 'RSI', 'Return', 'logreturn', 'Log_Return', 'MACD',
       'MACD_h', 'MACD_s', 'Year', 'week', 'Volatility_week', 'macd', 'macd_h',
       'macd_s'],
      dtype='object')
----------------------------------------


Unnamed: 0,N_missing,Percentage
ExpectedDividend,127359.0,5.46
RSI,28264.0,1.21
ad_High_sma10,18264.0,0.78
ad_Open_sma10,18264.0,0.78
ad_Close_sma10,18264.0,0.78
ad_Low_sma10,18264.0,0.78
ad_Volume_sma10,18000.0,0.77
ad_Target,4238.0,0.18
Return,2264.0,0.1
logreturn,2264.0,0.1


In [13]:
#ad_price_feat =  pd.read_csv('data/curr_ad_price_feat.csv', parse_dates=['Date'], index_col=[0])
#print_shape(ad_price_feat)

## - Financials

In [14]:
#from feature_engineering import fill_finances

#financial = financial = pd.read_csv('data/train_files/financials.csv',parse_dates=['Date'])
#financial.head(2)

In [15]:
print_shape(financial)

 Shape:
 ----------------------------------------
 Observations:   93K
 Features:       45
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['DisclosureNumber', 'DateCode', 'Date', 'SecuritiesCode',
       'DisclosedDate', 'DisclosedTime', 'DisclosedUnixTime', 'TypeOfDocument',
       'CurrentPeriodEndDate', 'TypeOfCurrentPeriod',
       'CurrentFiscalYearStartDate', 'CurrentFiscalYearEndDate', 'NetSales',
       'OperatingProfit', 'OrdinaryProfit', 'Profit', 'EarningsPerShare',
       'TotalAssets', 'Equity', 'EquityToAssetRatio', 'BookValuePerShare',
       'ResultDividendPerShare1stQuarter', 'ResultDividendPerShare2ndQuarter',
       'ResultDividendPerShare3rdQuarter',
       'ResultDividendPerShareFiscalYearEnd', 'ResultDividendPerShareAnnual',
       'ForecastDividendPerShare1stQuarter',
       'ForecastDividendPerShare2ndQuarter',
       'ForecastDividendPerShare3rdQuarter',
       'ForecastDividendPerShareFiscalYearEnd',
       'ForecastDividendPer

Unnamed: 0,N_missing,Percentage
ApplyingOfSpecificAccountingOfTheQuarterlyFinancialStatements,85707.0,92.2
ForecastDividendPerShare1stQuarter,73715.0,79.3
ResultDividendPerShareAnnual,73541.0,79.11
ResultDividendPerShareFiscalYearEnd,73540.0,79.11
BookValuePerShare,57183.0,61.52
ResultDividendPerShare3rdQuarter,55279.0,59.47
ForecastDividendPerShare2ndQuarter,50337.0,54.15
ResultDividendPerShare2ndQuarter,37016.0,39.82
ForecastDividendPerShare3rdQuarter,32149.0,34.59
MaterialChangesInSubsidiaries,28452.0,30.61


In [16]:
filled_financial = fill_finances_knn(financial, prices)

100%|██████████| 2000/2000 [00:05<00:00, 392.33it/s]


In [17]:
filled_financial.head()

Unnamed: 0,Date,Day,Month,Year,SecuritiesCode,Profit,NetSales
0,2017-02-10,10.0,2.0,2017.0,1301.0,2449000000.0,179975000000.0
1,2017-05-11,11.0,5.0,2017.0,1301.0,2422000000.0,236561000000.0
2,2017-08-04,4.0,8.0,2017.0,1301.0,754000000.0,56844000000.0
3,2017-11-06,6.0,11.0,2017.0,1301.0,1633000000.0,120458000000.0
4,2018-02-09,9.0,2.0,2018.0,1301.0,2784000000.0,198323000000.0


In [18]:
from feature_engineering import fill_finances_knn



filled_financial = fill_finances_knn(financial, prices)
#filled_financial.to_csv('data/curr_filled_finances.csv')

filled_financial = save_and_load(filled_financial)

print_shape(filled_financial, MISSINGVALUES)
filled_financial.head(2)
#del filled_finances

100%|██████████| 2000/2000 [00:05<00:00, 399.79it/s]


 Shape:
 ----------------------------------------
 Observations:   39.9K
 Features:       7
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['Date', 'Day', 'Month', 'Year', 'SecuritiesCode', 'Profit', 'NetSales'], dtype='object')
----------------------------------------


Unnamed: 0,N_missing,Percentage
Date,0.0,0.0
Day,0.0,0.0
Month,0.0,0.0
Year,0.0,0.0
SecuritiesCode,0.0,0.0
Profit,0.0,0.0
NetSales,0.0,0.0


Unnamed: 0,Date,Day,Month,Year,SecuritiesCode,Profit,NetSales
0,2017-02-10,10.0,2.0,2017.0,1301.0,2449000000.0,179975000000.0
1,2017-05-11,11.0,5.0,2017.0,1301.0,2422000000.0,236561000000.0


In [19]:
#filled_finances = pd.read_csv('data/curr_filled_finances.csv', parse_dates=['Date'], index_col=[0])
#print_shape(filled_finances)

In [20]:
from feature_engineering import new_features_financial

filled_financial_features = new_features_financial(filled_financial)

#filled_financial_features.to_csv('data/curr_filled_financial_features.csv')
filled_financial_features = save_and_load(filled_financial_features)
print_shape(filled_financial_features)
filled_financial_features.head()



100%|██████████| 2000/2000 [01:17<00:00, 25.80it/s]


 Shape:
 ----------------------------------------
 Observations:   39.9K
 Features:       18
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['Date', 'Day', 'Month', 'Year', 'SecuritiesCode', 'Profit', 'NetSales',
       'margin', 'profit_ttm', 'rev_ttm', 'win_quarter_growth',
       'rev_quarter_growth', 'win_yoy_growth', 'rev_yoy_growth',
       'win_ttm_growth', 'rev_ttm_growth', 'margin_growth', 'RowId'],
      dtype='object')
----------------------------------------


Unnamed: 0,N_missing,Percentage
rev_ttm_growth,8000.0,20.04
win_ttm_growth,8000.0,20.04
rev_yoy_growth,8000.0,20.04
win_yoy_growth,8000.0,20.04
rev_ttm,6000.0,15.03
profit_ttm,6000.0,15.03
win_quarter_growth,2000.0,5.01
margin_growth,2000.0,5.01
rev_quarter_growth,2000.0,5.01
Date,0.0,0.0


Unnamed: 0,Date,Day,Month,Year,SecuritiesCode,Profit,NetSales,margin,profit_ttm,rev_ttm,win_quarter_growth,rev_quarter_growth,win_yoy_growth,rev_yoy_growth,win_ttm_growth,rev_ttm_growth,margin_growth,RowId
0,2017-02-10,10.0,2.0,2017.0,1301,2449000000.0,179975000000.0,1.360745,,,,,,,,,,20170210_1301
1,2017-05-11,11.0,5.0,2017.0,1301,2422000000.0,236561000000.0,1.023837,,,-1.102491,31.441033,,,,,-24.75903,20170511_1301
2,2017-08-04,4.0,8.0,2017.0,1301,754000000.0,56844000000.0,1.326437,,,-68.868704,-75.97068,,,,,29.555461,20170804_1301
3,2017-11-06,6.0,11.0,2017.0,1301,1633000000.0,120458000000.0,1.355659,7258000000.0,593838000000.0,116.578249,111.909788,,,,,2.203042,20171106_1301
4,2018-02-09,9.0,2.0,2018.0,1301,2784000000.0,198323000000.0,1.403771,7593000000.0,612186000000.0,70.483772,64.640788,13.679053,10.194749,4.615597,3.089732,3.548929,20180209_1301


In [21]:
#filled_financial_features.SecuritiesCode = filled_financial_features.SecuritiesCode.astype(int)

In [22]:
filled_financial_features.head(2)

Unnamed: 0,Date,Day,Month,Year,SecuritiesCode,Profit,NetSales,margin,profit_ttm,rev_ttm,win_quarter_growth,rev_quarter_growth,win_yoy_growth,rev_yoy_growth,win_ttm_growth,rev_ttm_growth,margin_growth,RowId
0,2017-02-10,10.0,2.0,2017.0,1301,2449000000.0,179975000000.0,1.360745,,,,,,,,,,20170210_1301
1,2017-05-11,11.0,5.0,2017.0,1301,2422000000.0,236561000000.0,1.023837,,,-1.102491,31.441033,,,,,-24.75903,20170511_1301


In [23]:
ad_price_feat.head(2)

Unnamed: 0,RowId,Date,SecuritiesCode,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,ad_Open,ad_High,ad_Low,...,Log_Return,MACD,MACD_h,MACD_s,Year,week,Volatility_week,macd,macd_h,macd_s
1483,20170104_8194,2017-01-04,8194,1.0,,0,-0.002963,3325.0,3390.0,3300.0,...,,,,,2017.0,1.0,0.235112,,,
3348,20170105_8194,2017-01-05,8194,1.0,,0,-0.020802,3395.0,3420.0,3360.0,...,-0.00148,,,,2017.0,1.0,0.235112,,,


## - financials + price

In [24]:
# create key on financial : RowId
#filled_financial_features['RowId'] = filled_financial_features.Date.dt.strftime('%Y%m%d').astype(str) + '_' + filled_financial_features.SecuritiesCode.astype(str)

In [25]:
#filled_financial_features.sort_values('Date').head(20)

In [26]:
#filled_financial_features.RowId.info()

In [27]:
# concat financials and price
price_financial = pd.merge(ad_price_feat, filled_financial_features, how='left', on='RowId', suffixes=[None, '_f_'])
#price_financial = pd.concat([ad_price, filled_financial_features], keys='RowID')

price_financial = save_and_load(price_financial)
print_shape(price_financial)
price_financial.head()

 Shape:
 ----------------------------------------
 Observations:   2.33M
 Features:       53
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['RowId', 'Date', 'SecuritiesCode', 'AdjustmentFactor',
       'ExpectedDividend', 'SupervisionFlag', 'Target', 'ad_Open', 'ad_High',
       'ad_Low', 'ad_Close', 'ad_Volume', 'ad_Target', 'ad_Close_lag1',
       'ad_Close_sma10', 'ad_Open_lag1', 'ad_Open_sma10', 'ad_High_lag1',
       'ad_High_sma10', 'ad_Low_lag1', 'ad_Low_sma10', 'ad_Volume_lag1',
       'ad_Volume_sma10', 'RSI', 'Return', 'logreturn', 'Log_Return', 'MACD',
       'MACD_h', 'MACD_s', 'Year', 'week', 'Volatility_week', 'macd', 'macd_h',
       'macd_s', 'Date_f_', 'Day', 'Month', 'Year_f_', 'SecuritiesCode_f_',
       'Profit', 'NetSales', 'margin', 'profit_ttm', 'rev_ttm',
       'win_quarter_growth', 'rev_quarter_growth', 'win_yoy_growth',
       'rev_yoy_growth', 'win_ttm_growth', 'rev_ttm_growth', 'margin_growth'],
      dtype='object')
------

Unnamed: 0,N_missing,Percentage
rev_ttm_growth,2300945.0,98.63
win_ttm_growth,2300945.0,98.63
rev_yoy_growth,2300945.0,98.63
win_yoy_growth,2300945.0,98.63
rev_ttm,2298949.0,98.55
profit_ttm,2298949.0,98.55
margin_growth,2294957.0,98.38
rev_quarter_growth,2294957.0,98.38
win_quarter_growth,2294957.0,98.38
Profit,2292963.0,98.29


Unnamed: 0,RowId,Date,SecuritiesCode,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,ad_Open,ad_High,ad_Low,...,margin,profit_ttm,rev_ttm,win_quarter_growth,rev_quarter_growth,win_yoy_growth,rev_yoy_growth,win_ttm_growth,rev_ttm_growth,margin_growth
0,20170104_8194,2017-01-04,8194,1.0,,0,-0.002963,3325.0,3390.0,3300.0,...,,,,,,,,,,
1,20170105_8194,2017-01-05,8194,1.0,,0,-0.020802,3395.0,3420.0,3360.0,...,,,,,,,,,,
2,20170106_8194,2017-01-06,8194,1.0,,0,-0.00607,3365.0,3385.0,3335.0,...,,,,,,,,,,
3,20170110_8194,2017-01-10,8194,1.0,,0,-0.022901,3350.0,3360.0,3290.0,...,,,,,,,,,,
4,20170111_8194,2017-01-11,8194,1.0,,0,0.054688,3330.0,3330.0,3260.0,...,,,,,,,,,,


In [28]:
#price_financial_fill = ffill_pro_code(price_financial)
price_financial_fill = fill_and_drop_na_values(price_financial, drop=False)

price_financial_fill = save_and_load(price_financial_fill)

print_shape(price_financial_fill)
price_financial_fill.head(2)


100%|██████████| 2000/2000 [59:50<00:00,  1.80s/it]


 Shape:
 ----------------------------------------
 Observations:   2.33M
 Features:       53
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['RowId', 'Date', 'SecuritiesCode', 'AdjustmentFactor',
       'ExpectedDividend', 'SupervisionFlag', 'Target', 'ad_Open', 'ad_High',
       'ad_Low', 'ad_Close', 'ad_Volume', 'ad_Target', 'ad_Close_lag1',
       'ad_Close_sma10', 'ad_Open_lag1', 'ad_Open_sma10', 'ad_High_lag1',
       'ad_High_sma10', 'ad_Low_lag1', 'ad_Low_sma10', 'ad_Volume_lag1',
       'ad_Volume_sma10', 'RSI', 'Return', 'logreturn', 'Log_Return', 'MACD',
       'MACD_h', 'MACD_s', 'Year', 'week', 'Volatility_week', 'macd', 'macd_h',
       'macd_s', 'Date_f_', 'Day', 'Month', 'Year_f_', 'SecuritiesCode_f_',
       'Profit', 'NetSales', 'margin', 'profit_ttm', 'rev_ttm',
       'win_quarter_growth', 'rev_quarter_growth', 'win_yoy_growth',
       'rev_yoy_growth', 'win_ttm_growth', 'rev_ttm_growth', 'margin_growth'],
      dtype='object')
------

Unnamed: 0,N_missing,Percentage
rev_ttm_growth,524651.0,22.49
win_ttm_growth,524651.0,22.49
rev_yoy_growth,524651.0,22.49
win_yoy_growth,524651.0,22.49
rev_ttm,406417.0,17.42
profit_ttm,406417.0,17.42
margin_growth,168836.0,7.24
rev_quarter_growth,168836.0,7.24
win_quarter_growth,168836.0,7.24
ExpectedDividend,127369.0,5.46


Unnamed: 0,RowId,Date,SecuritiesCode,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,ad_Open,ad_High,ad_Low,...,margin,profit_ttm,rev_ttm,win_quarter_growth,rev_quarter_growth,win_yoy_growth,rev_yoy_growth,win_ttm_growth,rev_ttm_growth,margin_growth
0,20170104_8194,2017-01-04,8194,1.0,,0,-0.002963,3325.0,3390.0,3300.0,...,,,,,,,,,,
1,20170105_8194,2017-01-05,8194,1.0,,0,-0.020802,3395.0,3420.0,3360.0,...,,,,,,,,,,


In [29]:
# save dataframe to csv
#price_financial_fill.to_csv('data/curr_filled_financial_features.csv')

In [30]:
print_shape(price_financial_fill)

 Shape:
 ----------------------------------------
 Observations:   2.33M
 Features:       53
 Feature Date:    datetime64[ns]
----------------------------------------
Index(['RowId', 'Date', 'SecuritiesCode', 'AdjustmentFactor',
       'ExpectedDividend', 'SupervisionFlag', 'Target', 'ad_Open', 'ad_High',
       'ad_Low', 'ad_Close', 'ad_Volume', 'ad_Target', 'ad_Close_lag1',
       'ad_Close_sma10', 'ad_Open_lag1', 'ad_Open_sma10', 'ad_High_lag1',
       'ad_High_sma10', 'ad_Low_lag1', 'ad_Low_sma10', 'ad_Volume_lag1',
       'ad_Volume_sma10', 'RSI', 'Return', 'logreturn', 'Log_Return', 'MACD',
       'MACD_h', 'MACD_s', 'Year', 'week', 'Volatility_week', 'macd', 'macd_h',
       'macd_s', 'Date_f_', 'Day', 'Month', 'Year_f_', 'SecuritiesCode_f_',
       'Profit', 'NetSales', 'margin', 'profit_ttm', 'rev_ttm',
       'win_quarter_growth', 'rev_quarter_growth', 'win_yoy_growth',
       'rev_yoy_growth', 'win_ttm_growth', 'rev_ttm_growth', 'margin_growth'],
      dtype='object')
------

Unnamed: 0,N_missing,Percentage
rev_ttm_growth,524651.0,22.49
win_ttm_growth,524651.0,22.49
rev_yoy_growth,524651.0,22.49
win_yoy_growth,524651.0,22.49
rev_ttm,406417.0,17.42
profit_ttm,406417.0,17.42
margin_growth,168836.0,7.24
rev_quarter_growth,168836.0,7.24
win_quarter_growth,168836.0,7.24
ExpectedDividend,127369.0,5.46


final
