In [1]:
import torch
has_mps = torch.backends.mps.is_built()
device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")


Using device: cuda


In [2]:
import numpy


In [17]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import random_split, DataLoader
from tqdm import tqdm
import os



In [4]:
from dataclasses import dataclass, field

@dataclass
class PreProcessed:
    train_index_start:int
    val_index_start:int
    test_index_start:int
    train_index_end:int
    val_index_end:int
    test_index_end:int
    train_set_length:int
    val_set_length:int
    test_set_length:int
    file:str


In [5]:
a = [1,2,3,4]
max(a)

4

In [6]:
df = pd.read_csv("/home/vavasthi/non-work/java-cli/outputs/BEL.csv", sep=',', index_col=False)
df[0:5][['High','Low']]

Unnamed: 0,High,Low
0,2.17,2.07
1,2.18,2.11
2,2.13,2.05
3,2.1,2.04
4,2.09,2.05


In [7]:
df1 = df[0:5][['High','Low']].rename(columns={'High':'High_{}'.format(1),'Low':'Low_{}'.format(1)}).reset_index(drop=True)
df2 = df[7:12][['High','Low']].rename(columns={'High':'High_7','Low':'Low_7'}).reset_index(drop=True)
df1, df2

(   High_1  Low_1
 0    2.17   2.07
 1    2.18   2.11
 2    2.13   2.05
 3    2.10   2.04
 4    2.09   2.05,
    High_7  Low_7
 0    2.06   2.00
 1    2.04   1.94
 2    2.00   1.96
 3    1.98   1.90
 4    2.01   1.93)

In [8]:
df1.join(df2)

Unnamed: 0,High_1,Low_1,High_7,Low_7
0,2.17,2.07,2.06,2.0
1,2.18,2.11,2.04,1.94
2,2.13,2.05,2.0,1.96
3,2.1,2.04,1.98,1.9
4,2.09,2.05,2.01,1.93


In [9]:
df[5:6]


Unnamed: 0,StockCode,Year,Month,Day,Open,Close,High,Low,AdjustedClose,Volume,...,cpiFuel,cpiVegetables,cpiGeneral,iipBasicGoods,iipCapitalGoods,iipConsumerDurables,iipElectricity,iipIntermediateGoods,iipGeneral,iipOtherManufacturing
5,500049,2002,6,8,2.09,2.05,2.09,2.04,1.36,10296792.0,...,0.0,0.0,0.0,159.100006,167.5,229.699997,166.199997,188.0,171.800003,166.300003


In [10]:
def convert_to_sequences(memory, days_prediction, data_sequence):
    x = []
    y = []
    for i in range(len(data_sequence) - memory - max(days_prediction)):
        window = data_sequence[i:i+memory]
        prediction = []
        for j in range(len(days_prediction)):
            after_days = days_prediction[j]
            prediction.append(data_sequence[i+memory + after_days - 1,[8,9]])
        after_window = np.hstack(prediction)
        x.append(window)
        y.append(after_window)
    return np.asarray(x), np.asarray(y)
    

In [13]:
print(df.columns)


Index(['StockCode', 'Year', 'Month', 'Day', 'Open', 'Close', 'High', 'Low',
       'AdjustedClose', 'Volume', 'Bonus', 'Dividend', 'EPS', 'Equity', 'PBT',
       'PAT', 'Tax', 'PromoterShares', 'NonPromoterShares', 'cpiOverall',
       'cpiHousing', 'cpiFuel', 'cpiVegetables', 'cpiGeneral', 'iipBasicGoods',
       'iipCapitalGoods', 'iipConsumerDurables', 'iipElectricity',
       'iipIntermediateGoods', 'iipGeneral', 'iipOtherManufacturing'],
      dtype='object')


In [27]:
def load_data(directory, cache_directory, memory, train_perc, val_perc, device, forecast_days = [1, 7, 15]):
    rv = []
    train_input_dataset = []
    train_output_dataset = []
    val_input_dataset = []
    val_output_dataset = []
    test_input_dataset = []
    test_output_dataset = []
    count = 1
    train_data_size = 0
    val_data_size = 0
    test_data_size = 0
    train_index_start = 0
    val_index_start = 0
    test_index_start = 0
    for f in tqdm(os.listdir(directory)):
        file = os.path.join(directory, f)
        df = pd.read_csv(file, sep=',', index_col=False)
        input,output = convert_to_sequences(memory, forecast_days, df.to_numpy())
        train_size = int(len(input) * train_perc);
        val_size = int(len(input) * val_perc);
        test_size = int(len(input) - train_size - val_size)
        train_input_dataset_single, val_input_dataset_single, test_input_dataset_single = random_split(input, [train_size, val_size, test_size])
        train_output_dataset_single, val_output_dataset_single, test_output_dataset_single = random_split(output, [train_size, val_size, test_size])
        print(len(train_input_dataset), len(train_input_dataset_single), f)
        train_input_dataset = np.vstack([train_input_dataset, train_input_dataset_single]) if (len(train_input_dataset) != 0) else train_input_dataset_single 
        train_output_dataset = np.vstack([train_output_dataset, train_output_dataset_single]) if (len(train_output_dataset) != 0) else train_output_dataset_single 
        val_input_dataset = np.vstack([val_input_dataset, val_input_dataset_single]) if (len(val_input_dataset) != 0) else val_input_dataset_single 
        val_output_dataset = np.vstack([val_output_dataset, val_output_dataset_single]) if (len(val_output_dataset) != 0) else val_output_dataset_single 
        test_input_dataset = np.vstack([test_input_dataset, test_input_dataset_single]) if (len(test_input_dataset) != 0) else test_input_dataset_single 
        test_output_dataset = np.vstack([test_output_dataset, test_output_dataset_single]) if (len(test_output_dataset) != 0) else test_output_dataset_single
        train_data_size = train_data_size + len(train_input_dataset)
        val_data_size = val_data_size + len(val_input_dataset)
        test_data_size = test_data_size + len(test_input_dataset)
        if (train_data_size > 20000):
            print("Writing file of size ", train_data_size)
            outfile = os.path.join(cache_directory, 'preprocessed-{}.npz'.format(count))
            np.savez_compressed(outfile, train_input=np.asarray(train_input_dataset), train_output=np.asarray(train_output_dataset), val_input=np.asarray(val_input_dataset), val_output=np.asarray(val_output_dataset), test_input=np.asarray(test_input_dataset), test_output=np.asarray(test_output_dataset)) 
            rv.append(PreProcessed(train_index_start, val_index_start, test_index_start, train_index_start + train_data_size - 1, val_index_start + val_data_size - 1, test_index_start + test_data_size - 1, train_data_size, val_data_size, test_data_size, outfile))
            train_index_start = train_index_start + train_data_size
            val_index_start = val_index_start + val_data_size
            test_index_start = test_index_start + test_data_size
            train_data_size = 0
            val_data_size = 0
            test_data_size = 0
            count = count + 1
            train_input_dataset = []
            train_output_dataset = []
            val_input_dataset = []
            val_output_dataset = []
            test_input_dataset = []
            test_output_dataset = []
    return rv

In [30]:
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(device)

data = load_data('/home/vavasthi/non-work/java-cli/outputs', '/data/datasets/cache', 50, 0.70, 0.15, device)
print(data)

cuda


  3%|████▏                                                                                                                                                       | 2/75 [00:00<00:06, 10.75it/s]

0 5182 EICHERMOT.csv
5182 3081 CENTRALBK.csv
8263 3810 MARUTI.csv
Writing file of size  25518


  7%|██████████▍                                                                                                                                                 | 5/75 [00:03<00:46,  1.49it/s]

0 3166 PFC.csv
3166 5183 FEDERALBNK.csv
8349 61 TCS.csv
8410 61 TECHM.csv
Writing file of size  28396


 12%|██████████████████▋                                                                                                                                         | 9/75 [00:05<00:33,  1.96it/s]

0 3763 UCOBANK.csv
3763 4575 NATIONALUM.csv
8338 5182 RELIANCE.csv
Writing file of size  25621


 16%|████████████████████████▊                                                                                                                                  | 12/75 [00:08<00:44,  1.41it/s]

0 6096 TATAMOTORS.csv
6096 3995 BEL.csv
10091 3908 CANBK.csv
Writing file of size  30186


 20%|███████████████████████████████                                                                                                                            | 15/75 [00:11<00:47,  1.26it/s]

0 3995 APOLLOHOSP.csv
3995 5184 HINDALCO.csv
9179 539 LICI.csv
Writing file of size  22892


 24%|█████████████████████████████████████▏                                                                                                                     | 18/75 [00:14<00:42,  1.33it/s]

0 4650 AXISBANK.csv
4650 5183 CIPLA.csv
9833 3995 GRASIM.csv
Writing file of size  28311


 28%|███████████████████████████████████████████▍                                                                                                               | 21/75 [00:18<00:45,  1.18it/s]

0 3670 MAHABANK.csv
3670 5184 WIPRO.csv
8854 3033 ADANIPORTS.csv
Writing file of size  24411


 33%|███████████████████████████████████████████████████▋                                                                                                       | 25/75 [00:21<00:31,  1.58it/s]

0 5182 TATACONSUM.csv
5182 1327 GICRE.csv
6509 1665 IDFCFIRSTB.csv
8174 5184 SAIL.csv
Writing file of size  33223


 39%|███████████████████████████████████████████████████████████▉                                                                                               | 29/75 [00:24<00:30,  1.52it/s]

0 1318 NIACL.csv
1318 1253 BANDHANBNK.csv
2571 3995 BANKBARODA.csv
6566 3679 PETRONET.csv
Writing file of size  20700


 43%|██████████████████████████████████████████████████████████████████▏                                                                                        | 32/75 [00:27<00:31,  1.37it/s]

0 3059 POWERGRID.csv
3059 3996 PNB.csv
7055 840 MAXHEALTH.csv
7895 3995 ICICIBANK.csv
Writing file of size  29899


 48%|██████████████████████████████████████████████████████████████████████████▍                                                                                | 36/75 [00:30<00:27,  1.40it/s]

0 3995 BAJFINANCE.csv
3995 3996 INDUSINDBK.csv
7991 5129 IOC.csv
Writing file of size  25106


 52%|████████████████████████████████████████████████████████████████████████████████▌                                                                          | 39/75 [00:34<00:28,  1.29it/s]

0 3567 NTPC.csv
3567 2527 COALINDIA.csv
6094 61 BPCL.csv
6155 1315 HDFCLIFE.csv
Writing file of size  23286


 57%|████████████████████████████████████████████████████████████████████████████████████████▊                                                                  | 43/75 [00:36<00:19,  1.67it/s]

0 5184 SUNPHARMA.csv
5184 3164 INDIANB.csv
8348 61 TATASTEEL.csv
Writing file of size  21941


 61%|███████████████████████████████████████████████████████████████████████████████████████████████                                                            | 46/75 [00:38<00:17,  1.67it/s]

0 4179 KOTAKBANK.csv
4179 5182 SBIN.csv
9361 316 JIOFIN.csv
Writing file of size  23217


 65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                     | 49/75 [00:40<00:16,  1.60it/s]

0 5184 TITAN.csv
5184 3997 SHRIRAMFIN.csv
9181 4953 GAIL.csv
Writing file of size  28499


 69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                               | 52/75 [00:44<00:17,  1.31it/s]

0 5184 HDFCBANK.csv
5184 3976 NESTLEIND.csv
9160 3995 ASIANPAINT.csv
Writing file of size  27499


 75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                       | 56/75 [00:47<00:12,  1.51it/s]

0 1663 INDIGO.csv
1663 61 HINDUNILVR.csv
1724 3954 UNIONBANK.csv
5678 5182 M&M.csv


 76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                     | 57/75 [00:47<00:10,  1.78it/s]

10860 3841 JSWSTEEL.csv
Writing file of size  34626


 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                               | 60/75 [00:51<00:11,  1.26it/s]

0 5184 INFY.csv
5184 5182 ONGC.csv
10366 3973 BAJAJFINSV.csv
Writing file of size  29889


 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                        | 63/75 [00:54<00:10,  1.13it/s]

0 3995 TRENT.csv
3995 3974 ULTRACEMCO.csv
7969 1337 SBILIFE.csv
Writing file of size  21270


 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                  | 66/75 [00:57<00:07,  1.25it/s]

0 5181 HINDPETRO.csv
5181 3995 LICHSGFIN.csv
9176 3995 BHARTIARTL.csv
Writing file of size  27528


 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌            | 69/75 [01:01<00:05,  1.19it/s]

0 61 RECLTD.csv
61 5182 DRREDDY.csv
5243 3996 ADANIENT.csv
9239 3997 LT.csv
Writing file of size  27779


 97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊    | 73/75 [01:04<00:01,  1.27it/s]

0 5182 ITC.csv
5182 3997 BAJAJ-AUTO.csv
9179 3977 HCLTECH.csv
Writing file of size  27517


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [01:07<00:00,  1.11it/s]

0 3995 BANKINDIA.csv
[PreProcessed(train_index_start=0, val_index_start=0, test_index_start=0, train_index_end=25517, val_index_end=5465, test_index_end=5472, train_set_length=25518, val_set_length=5466, test_set_length=5473, file='/data/datasets/cache/preprocessed-1.npz'), PreProcessed(train_index_start=25518, val_index_start=5466, test_index_start=5473, train_index_end=53913, val_index_end=11546, test_index_end=11570, train_set_length=28396, val_set_length=6081, test_set_length=6098, file='/data/datasets/cache/preprocessed-2.npz'), PreProcessed(train_index_start=53914, val_index_start=11547, test_index_start=11571, train_index_end=79534, val_index_end=17034, test_index_end=17064, train_set_length=25621, val_set_length=5488, test_set_length=5494, file='/data/datasets/cache/preprocessed-3.npz'), PreProcessed(train_index_start=79535, val_index_start=17035, test_index_start=17065, train_index_end=109720, val_index_end=23501, test_index_end=23537, train_set_length=30186, val_set_length=64




In [200]:
arr = np.load(os.path.join('/data/datasets/cache', 'preprocessed_data.npz'))['train_input']
print(arr.shape)

(303620, 50, 31)


In [166]:
a = 4
i = 0 if a else 45
print(i)

0


In [149]:
import os
directory = "/data/datasets/qrjson"
for f in os.listdir(directory):
    file = os.path.join(directory, f)

/data/datasets/qrjson/POWERGRID.json
/data/datasets/qrjson/ADANIENT.json
/data/datasets/qrjson/PETRONET.json
/data/datasets/qrjson/LICI.json
/data/datasets/qrjson/BEL.json
/data/datasets/qrjson/NIACL.json
/data/datasets/qrjson/MAHABANK.json
/data/datasets/qrjson/HINDPETRO.json
/data/datasets/qrjson/ASIANPAINT.json
/data/datasets/qrjson/EICHERMOT.json
/data/datasets/qrjson/LT.json
/data/datasets/qrjson/ONGC.json
/data/datasets/qrjson/HDFCLIFE.json
/data/datasets/qrjson/UNIONBANK.json
/data/datasets/qrjson/BANKBARODA.json
/data/datasets/qrjson/TATAMOTORS.json
/data/datasets/qrjson/TATACONSUM.json
/data/datasets/qrjson/DRREDDY.json
/data/datasets/qrjson/BPCL.json
/data/datasets/qrjson/SBILIFE.json
/data/datasets/qrjson/HCLTECH.json
/data/datasets/qrjson/BAJAJ-AUTO.json
/data/datasets/qrjson/JIOFIN.json
/data/datasets/qrjson/WIPRO.json
/data/datasets/qrjson/TATASTEEL.json
/data/datasets/qrjson/ITC.json
/data/datasets/qrjson/BANDHANBNK.json
/data/datasets/qrjson/INDUSINDBK.json
/data/datase