## Exploring the Dataset 

In [2]:
PLOTS = False
trade_plots = False 

import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
x_train_path = "..\Data\X_train.csv"
y_train_path = "..\Data\y_train.csv"

In [5]:
x_train = pd.read_csv(x_train_path)
y_train = pd.read_csv(y_train_path)
y_train.head()

Unnamed: 0,obs_id,eqt_code_cat
0,0,10
1,1,15
2,2,0
3,3,13
4,4,0


Data = 504 days × 24 stocks × 20 observations/day × 100 events/observation  

Here is a description of each column in the dataset. <br>

| Column | Description |
| ------ | ------------ |
| **Obs_id** | which observation are we taking into account <br>-> for that observation we will keep track of the next 100 operations in the book orders |
| **Venue_id** | for a given stock, exchanges can happen across many venues :  this id tracks which venue we consider <br> ==> it could be of importance (some stocks are typically traded across many venues ?) |
| **order_id** | for a given observation sequence, each operation is related to an order. An order can be added, updated, deleted. <br>The order_id allows to track the lifecycle of individual orders within a sequence.   |
| **action** |  A (adding an order to the book) , D (Deleting an order from the book), U = updating an action from the book |
| **side** | B (bids, values to buy the action) , A (Ask, values to sell the action) 
| **Price** | - price : price of the order that was affected. *This best_bid_price , at the time of the first event, is substracted from all price reated columns (price, bid, ask  ) |
| **bid , ask** |- bid , ask == best bid (highest bid) /best ask (lowest ask)   |
| **bid_size, ask_size** |  volume of orders at the best bid, respectively ask, price  , on the *aggregated book* <br> => this too could be a valuable information, perhaps some stocks are encoutering more volume than others.  |
|**flux** | the change in volume at a specific price level in the order book due to a particular event |
|**Trade**|A boolean true or false to indicate whether a deletion or update event was due to a trade or due to a cancellation. <br> Most Deletions and updates actually dont occur from Trades

### Example: For a given Observation

| `order_id` | `action` | `price` | `side` | **Description**                                          |
|------------|----------|---------|-------|----------------------------------------------------------|
| 0          | A        | 100.5   | B     | A new order (ID 0) is added at 100.5 on the bid side.    |
| 1          | A        | 101.0   | A     | A new order (ID 1) is added at 101.0 on the ask side.    |
| 0          | U        | 100.5   | B     | The order with ID 0 is updated (e.g., quantity changed). |
| 1          | D        | 101.0   | A     | The order with ID 1 is deleted (removed from the book).  |


# Explore trade info intuition 

In [None]:
if trade_plots:
        
    # Filter actions that are either 'D' or 'U'
    du_actions = df[(df['action'].isin(['D', 'U']))]

    # Count actions where 'trade' is True among 'D' or 'U'
    du_trades = du_actions[du_actions['trade'] == True]

    # Calculate the percentage
    percentage = (len(du_trades) / len(du_actions)) * 100

    # Display the result
    print(f"Percentage of 'D' or 'U' actions coming from trades: {percentage:.2f}%")

    # Merge the main DataFrame (df) with y_train using obs_id
    df = df.merge(y_train, on='obs_id', how='left')  # Assuming y_train has columns ['obs_id', 'stock']

    # Define a function to calculate the percentage for each observation
    def calculate_percentage(sub_df):
        is_du = sub_df['action'].isin(['D', 'U'])
        is_du_trade = is_du & (sub_df['trade'] == True)
        return (is_du_trade.sum() / is_du.sum()) * 100 if is_du.sum() > 0 else 0

    # Group by obs_id and calculate percentage
    df_obs = df.groupby('obs_id').apply(calculate_percentage).reset_index(name='percentage')

    df_obs = df_obs.merge(y_train, on='obs_id', how='left')

    # Group by stock and calculate statistics
    stock_stats = df_obs.groupby('eqt_code_cat')['percentage'].agg(['mean', 'std', 'min', 'max'])

    plt.figure(figsize=(12, 8))
    sns.boxplot(x='eqt_code_cat', y='percentage', data=df_obs)
    plt.title('Distribution of Percentages by Stock')
    plt.xlabel('Stock')
    plt.ylabel('Percentage of D/U Actions from Trades')
    plt.xticks(rotation=90)
    plt.show()


        
    # Cap percentages at 6%
    df_obs_capped = df_obs[df_obs['percentage'] <= 6]

    # Create subplots: One histogram per stock
    stocks = df_obs_capped['eqt_code_cat'].unique()  # Get unique stocks
    num_stocks = len(stocks)

    # Define the number of rows and columns for subplots
    ncols = 4  # Number of columns
    nrows = (num_stocks + ncols - 1) // ncols  # Calculate rows based on number of stocks

    # Create the figure
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, nrows * 3))
    axes = axes.flatten()  # Flatten axes for easier indexing

    # Plot each stock
    for i, stock in enumerate(stocks):
        # Filter the data for the current stock
        stock_data = df_obs_capped[df_obs_capped['eqt_code_cat'] == stock]
        
        # Plot the histogram for the stock
        sns.histplot(
            stock_data,
            x='percentage',
            bins=30,
            ax=axes[i],
            element='step',
            stat='percent'  # Show percentages instead of counts
        )
        axes[i].set_title(f'Stock {stock}', fontsize=12)
        axes[i].set_xlim(0, 6)  # Cap the percentage at 6
        axes[i].set_ylim(0, 100)  # Cap the y-axis at 100%
        axes[i].set_xlabel('Percentage of D/U Actions', fontsize=10)
        axes[i].set_ylabel('Percentage (%)', fontsize=10)

    # Remove unused subplots
    for j in range(i + 1, len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout()
    plt.show()



        



There seems to be three modes : 0% of trades , 2% of trades, 4% of trades 

# Some ideas after the initial exploration

For a given observation, what can help determine the stock ?    
we could use visualisation (for a given stock : average volatility observed , average number of increase of orders, average number of decrease of oders etc simple metrics as such)  
  
To go more in depth : we must use embeddings of our data, think of interesting traits, use correlations, try and reduce the dimensionality.  
--> ideas seem endless we could train an embedding matrix to predict the venue idk 

Combien d'actions d'affilée ? volatilité du prix sur les 100 actions ? prix max et min enregistrés ? % de trade ? 

# Visualisations supplémentaires

volatilité : affichons la distribution de prix des variations de prix pour chacune des 24 actions

In [69]:
x_train = pd.read_csv(x_train_path)
y_train = pd.read_csv(y_train_path)
y_train.head()

Unnamed: 0,obs_id,eqt_code_cat
0,0,10
1,1,15
2,2,0
3,3,13
4,4,0


In [None]:
def distrib_variations(data, stock ):
    # for a given sequence, we keep : lowest , highest (price)

    #une sequence est définie 

# Reproducing the Benchmark

The benchmark for the challenge is the following architecture :  

Preprocess:  
converting each event into a 30-dimensionnal vector.  
group each 100-event-observations into a single "observation" vector, size 100x30
  
Architecture:  
bidirectionnal GRU network, with 64 hidden units.  Producing a single 128 dimensional vector per "observation vector" .  
Many to one architecture :converts the "observation vector" (of 100 individual events) into a single embedding of size 124.  
Then two dense layers 124 -> 64 with SeLU activation, 64 -> 24 with softmax activation  
  
Training :  
Cross entropy Loss  
batch size : 1000 "obervation vectors"  (dim : 1000x100x30)  
optimizer : Base ADAM with lr = 10e-3

In [6]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras import backend as K
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import Dense, Activation, Embedding, Dropout, Input, LSTM, Reshape, Lambda, RepeatVector

## Pre processing of the data 


Exact pre process structure isnt described, so I will do what sounds relevant. 

Here is a description of each column in the dataset. <br>

| Column | Description |
| ------ | ------------ |
| **Obs_id** | which observation are we taking into account <br>-> for that observation we will keep track of the next 100 operations in the book orders |
| **Venue_id** | for a given stock, exchanges can happen across many venues :  this id tracks which venue we consider <br> ==> it could be of importance (some stocks are typically traded across many venues ?) |
| **order_id** | for a given observation sequence, each operation is related to an order. An order can be added, updated, deleted. <br>The order_id allows to track the lifecycle of individual orders within a sequence.   |
| **action** |  A (adding an order to the book) , D (Deleting an order from the book), U = updating an action from the book |
| **side** | B (bids, values to buy the action) , A (Ask, values to sell the action) 
| **Price** | - price : price of the order that was affected. *This best_bid_price , at the time of the first event, is substracted from all price reated columns (price, bid, ask  ) |
| **bid , ask** |- bid , ask == best bid (highest bid) /best ask (lowest ask)   |
| **bid_size, ask_size** |  volume of orders at the best bid, respectively ask, price  , on the *aggregated book* <br> => this too could be a valuable information, perhaps some stocks are encoutering more volume than others.  |
|**flux** | the change in volume at a specific price level in the order book due to a particular event |
|**Trade**|A boolean true or false to indicate whether a deletion or update event was due to a trade or due to a cancellation. <br> Most Deletions and updates actually dont occur from Trades

In [None]:
#venue => one hot encode it 
#action => one hot encode it 
#side : => one hot encode it 
#price,bid,ask,bid_size,ask_size,flux : no transfo
#trade : one hot encode it 

#Justifications ? => none, just exploring 


## Preprocess data

In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import h5py  # For saving large arrays in memory-efficient HDF5 format
from tqdm import tqdm 

In [9]:
def encode_df(df_to_encode):
    categorical_columns = ['venue','action','side','trade']
    df_pandas_encoded = pd.get_dummies(df_to_encode,columns=categorical_columns,drop_first=False,dtype=int)

    return df_pandas_encoded

In [10]:
def transform_df(df):
    #we want to drop obs_id, order_id 
    df = df.drop(['order_id','obs_id'],axis=1) #dropping obs id because they did so in the benchmark
    return df

In [60]:
def correct_df(
    df, 
    column_names=[
        'price', 'bid', 'ask', 'bid_size', 'ask_size', 'flux', 
        'venue_0', 'venue_1', 'venue_2', 'venue_3', 'venue_4', 'venue_5', 
        'action_A', 'action_D', 'action_U', 
        'side_A', 'side_B', 
        'trade_False', 'trade_True'
    ]
):
    """
    Ensures the DataFrame has columns in a specified order, adding missing columns with zeros.
    
    Parameters:
        df (pd.DataFrame): The DataFrame to correct.
        column_names (list): List of column names in the desired order (default provided).
    
    Returns:
        pd.DataFrame: The corrected DataFrame.
    """
    # Add missing columns with zeros
    for column in column_names:
        if column not in df.columns:
            df[column] = 0
    
    # Reorder columns to match the specified order
    df = df[column_names]
    
    return df


In [11]:
# df_test = df.head(int(10e3))
# df_test = encode_df(df_test)
# df_test = transform_df(df_test)
# df_test
# df_test

In [12]:
def create_lstm_data(data, k):
    '''
    input:
        data - the pandas object of (n_observations x 100 , p) shape, where n is the number of rows,
               p is the number of predictors
        k    - the length of the sequences, namely, the number of previous rows 
               (including current) we want to use to predict the target.
    output:
        X_data - the predictors numpy matrix of (n-k, k, p) shape
    '''


    # initialize zero matrix of (n-k, k, p) shape to store the n-k number
    # of sequences of k-length and zero array of (n-k, 1) to store targets
    X_data = np.zeros((data.shape[0]//k, k, data.shape[1]))
    
    # run loop to slice k-number of previous rows as 1 sequence to predict
    # 1 target and save them to X_data matrix and y_data list
    for i in range(data.shape[0]//k):
        cur_sequence = data.iloc[k*i: k*(i+1), :]
                
        X_data[i,:,:] = cur_sequence
    
    return X_data

In [13]:
y_train = np.asarray(pd.read_csv(y_train_path).drop('obs_id',axis=1))

In [14]:
#installing a library to handle out-of-memeory packages https://stackoverflow.com/questions/30376581/save-numpy-array-in-append-mode/64403144#64403144

In [15]:
# pip install npy-append-array

In [16]:
from npy_append_array import NpyAppendArray

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm

def process_data_chunked(x_train_path, y_train_path, output_prefix, chunk_size=10_000, seq_len=100):
    """
    Process data chunk by chunk and save the results incrementally.
    
    Args:
    - x_train_path: Path to the X_train CSV file.
    - y_train_path: Path to the y_train CSV file.
    - output_prefix: Prefix for the output files.
    - chunk_size: Number of rows to process in each chunk.
    - seq_len: Length of each sequence for LSTM.
    """
    # Read y_train (the target file) entirely as it's small and doesn't need chunking
    y_train_full = pd.read_csv(y_train_path).drop('obs_id', axis=1)

    # Use tqdm to show progress
    total_rows = sum(1 for _ in open(x_train_path)) - 1  # Get total rows excluding header
    num_chunks = (total_rows + chunk_size - 1) // chunk_size  # Calculate total chunks

    X_train_npy_name = f"..\Data\{output_prefix}_X_train.npy"
    y_train_npy_name = f"..\Data\{output_prefix}_y_train.npy"


    # Process the X_train file in chunks
    with NpyAppendArray(X_train_npy_name, delete_if_exists=True) as npaa:
        for i, chunk in enumerate(tqdm(pd.read_csv(x_train_path, chunksize=chunk_size), desc="Processing Chunks", total=num_chunks)):
            # Apply the transformation functions
            chunk = correct_df(transform_df(encode_df(chunk)))

            # Create LSTM-compatible data for this chunk
            X_data = create_lstm_data(chunk, seq_len)

            npaa.append(X_data)

            # Clear memory for the current chunk
            del X_data, chunk


    np.save(y_train_npy_name,y_train_full)
    print(f"Processing completed. X_train and y_train saved as {output_prefix}_X_train.npy and {output_prefix}_y_train.npy.")


In [30]:
date = '04-12'
process_data_chunked(x_train_path, y_train_path, date, chunk_size=10_000, seq_len=100)


Processing Chunks: 100%|██████████| 1608/1608 [01:02<00:00, 25.88it/s]

Processing completed. X_train and y_train saved as 04-12_X_train.npy and 04-12_y_train.npy.





In [44]:
x_path_npy = fr'..\Data\{date}_X_train.npy'
y_path_npy = fr'..\Data\{date}_y_train.npy'
# Load the .npy file
data = np.load(x_path_npy)

# Display the data (e.g., shape, a sample of the contents)
print("Data Shape:", data.shape)

Data Shape: (160800, 100, 19)


In [47]:
data[0].shape

(100, 19)

array([3.e-01, 0.e+00, 1.e-02, 1.e+02, 1.e+00, 1.e+02, 0.e+00, 0.e+00,
       0.e+00, 0.e+00, 1.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 1.e+00,
       0.e+00, 1.e+00, 0.e+00])

In [64]:
x_train.iloc[:100,:]['venue'].unique()

array([4, 1, 5, 2, 0, 3], dtype=int64)

In [61]:
x_0_0 = correct_df(transform_df(encode_df(x_train.iloc[:10,:])))
x_0_0.head()

Unnamed: 0,price,bid,ask,bid_size,ask_size,flux,venue_0,venue_1,venue_2,venue_3,venue_4,venue_5,action_A,action_D,action_U,side_A,side_B,trade_False,trade_True
0,0.3,0.0,0.01,100,1,100,0,0,0,0,1,0,1,0,0,1,0,1,0
1,-0.17,0.0,0.01,100,1,100,0,0,0,0,1,0,1,0,0,0,1,1,0
2,0.28,0.0,0.01,100,1,-100,0,0,0,0,1,0,0,1,0,1,0,1,0
3,0.3,0.0,0.01,100,1,100,0,0,0,0,1,0,1,0,0,1,0,1,0
4,0.37,0.0,0.01,100,1,-100,0,0,0,0,1,0,0,1,0,1,0,1,0


In [63]:
print(data[0][0])

[3.e-01 0.e+00 1.e-02 1.e+02 1.e+00 1.e+02 0.e+00 0.e+00 0.e+00 0.e+00
 1.e+00 0.e+00 1.e+00 0.e+00 0.e+00 1.e+00 0.e+00 1.e+00 0.e+00]


In [20]:
# y_path = r"D:\Desktop\Coding-Projects\Prediction-Challenge\Notebooks\27-11_y_train.npy"
# # Load the .npy file
# y = np.load(y_path)

# # Display the data (e.g., shape, a sample of the contents)
# print("Data Shape:", y.shape)


## Data generator

In [None]:
## for each sequence, We want to generate some value that seem interesting regarding the full sequence, such features could be generated by the LSTM but the search space is so big that we implement them by hannd

def genrate_additional_features(sequence: np.array)->np.array :
    '''
    given a sequence of 100x19 features "sequence" 
    returns a np array "features" with relevant features '''

In [None]:
import numpy as np
import keras

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs,x_path_npy,y_path_npy, batch_size=10050, dim=(100,19),
                 n_classes=24, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        self.x_path = x_path_npy
        self.y_path = y_path_npy

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim)
        # Initialization
        X = np.empty((self.batch_size, *self.dim))
        x_additional = np.empty((self.batch_size))
        y = np.empty((self.batch_size))

        # Generate data
        X_full = np.load(self.x_path, mmap_mode="r")
        y_full = np.load(self.y_path,mmap_mode="r")

        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            X[i,] = X_full[ID]

            # Store class
            y[i] = y_full[ID].astype(int)[0]
        
        del X_full
        del y_full


        

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

In [37]:
# Parameters
params = {'dim': (100,19),
          'batch_size': 10050,
          'n_classes': 24,
          'shuffle': True}

# Datasets
full_ids = np.arange(160800)

# Shuffle the IDs to ensure randomness
np.random.shuffle(full_ids)

# Compute the split index for 80/20
split_index = int(len(full_ids) * 13/16) #13/16 test, 3/16 val

# Split the IDs
train_ids = full_ids[:split_index]
val_ids = full_ids[split_index:]

x_path_npy = fr'..\Data\{date}_X_train.npy'
y_path_npy = fr'..\Data\{date}_y_train.npy'

# Generators
training_generator = DataGenerator(train_ids,x_path_npy,y_path_npy, **params)
val_generator = DataGenerator(val_ids,x_path_npy,y_path_npy, **params)


## Model

Now we have the data in a satisfactory format.  
each row of our X_train is made of 100 event, each of these events is represented in a 18 dimension space.  
and for each row of our train set, we have a single target value : in y  
Let's now create a similar architecture as the benchmark  

In [38]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Bidirectional,LSTM

In [39]:
# Input for fixed-length (length = 100) sequences of event observation (dimension = 19)
inputs = keras.Input(shape=(100,19))

# Add 2 bidirectional LSTMs
x = Bidirectional(LSTM(64))(inputs)
x = Dense(64)(x)

# Add a classifier
outputs = Dense(24, activation="softmax")(x)
model_2 = keras.Model(inputs, outputs)
model_2.summary()

model_2.compile(optimizer=keras.optimizers.Adam(learning_rate=3e-3), loss="categorical_crossentropy", metrics=["accuracy"])


Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 100, 19)]         0         
                                                                 
 bidirectional_2 (Bidirectio  (None, 128)              43008     
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dense_5 (Dense)             (None, 24)                1560      
                                                                 
Total params: 52,824
Trainable params: 52,824
Non-trainable params: 0
_________________________________________________________________


### Callbacks

In [40]:
# Import necessary libraries for callbacks
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard, CSVLogger

# Define callbacks to enhance and monitor the training process
callbacks = [
    # 1. ModelCheckpoint:
    # Saves the model to a file ('best_model.h5') whenever the validation loss ('val_loss') improves.
    # Ensures that only the best version of the model (with the lowest validation loss) is saved.
    ModelCheckpoint(
        filepath='best_model.h5',   # Filepath to save the model
        monitor='val_loss',        # Metric to monitor
        save_best_only=True,       # Save only the best model
        mode='min'                 # Minimize the 'val_loss'
    ),
    
    # 2. EarlyStopping:
    # Stops training if the validation loss does not improve for 'patience' epochs (5 in this case).
    # Prevents overfitting and saves time by stopping early when progress stalls.
    EarlyStopping(
        monitor='val_loss',        # Metric to monitor
        patience=5,                # Number of epochs to wait without improvement
        mode='min',                # Minimize the 'val_loss'
        restore_best_weights=True  # Restore the model weights from the best epoch
    ),
    
    # 3. ReduceLROnPlateau:
    # Reduces the learning rate when the validation loss plateaus (does not improve for 3 epochs here).
    # Helps the model converge better by lowering the learning rate when progress slows down.
    ReduceLROnPlateau(
        monitor='val_loss',        # Metric to monitor
        factor=0.2,                # Factor by which to reduce the learning rate
        patience=3,                # Number of epochs to wait before reducing the learning rate
        min_lr=1e-6                # Minimum learning rate to avoid reducing it too much
    ),
    
    # 4. TensorBoard:
    # Logs training metrics, such as loss and accuracy, for visualization using TensorBoard.
    # Also logs histograms and the computational graph of the model.
    TensorBoard(
        log_dir='./logs',          # Directory to save TensorBoard logs
        histogram_freq=1,          # Log histograms of weights after every epoch
        write_graph=True,          # Save the computation graph
        write_images=True          # Save visualizations of weights and biases
    ),
    
    # 5. CSVLogger:
    # Logs training and validation metrics to a CSV file ('training_log.csv').
    # Useful for tracking metrics over time and for external analysis.
    CSVLogger(
        filename='training_log.csv',  # Path to save the log file
        append=True                  # Append to existing file if it exists
    )
]

In [41]:
# Train model on dataset
model_2.fit(
    x=training_generator,
    validation_data=val_generator,
    epochs=2,  # Specify the number of epochs as needed
    callbacks = callbacks
)   

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x21b817478b0>

## Prediction with our trained model 

In [42]:
from random import  randint
sequence = [randint(0,20000) for j in range(100)]

#loading a sequence 

for i in sequence:
        
    A = np.load(x_path_npy,mmap_mode='r')[i] #this is a 100x19 sequence
    A = np.expand_dims(A, axis=0)  # Adding batch dimension
    A.shape
    label = np.load(y_path_npy,mmap_mode='r')[i]

    predicted = model_2.predict(A)

    top_3_indices = predicted[0].argsort()[-3:][::-1]

    # Get the top 3 values
    top_3_values = predicted[0][top_3_indices]

    print("Top 3 predicted labels :", top_3_indices)
    print("with following Top 3 values:", top_3_values)

    #print(f"Label predicted", predicted[0].argmax())
    print(f"True Label",label[0])

Top 3 predicted labels : [16  0 22]
with following Top 3 values: [0.1162839  0.11452413 0.1037069 ]
True Label 16
Top 3 predicted labels : [13 19  7]
with following Top 3 values: [0.12164187 0.11521064 0.09507314]
True Label 5
Top 3 predicted labels : [19 13  6]
with following Top 3 values: [0.2166376  0.18456523 0.10359345]
True Label 2
Top 3 predicted labels : [16  0  4]
with following Top 3 values: [0.09923778 0.08710536 0.08573456]
True Label 4
Top 3 predicted labels : [11 12  3]
with following Top 3 values: [0.08474627 0.07011871 0.0683706 ]
True Label 23
Top 3 predicted labels : [12 14  2]
with following Top 3 values: [0.10421102 0.0946273  0.0644206 ]
True Label 22
Top 3 predicted labels : [11 14  7]
with following Top 3 values: [0.1034211  0.07743471 0.06563167]
True Label 12
Top 3 predicted labels : [ 0  9 17]
with following Top 3 values: [0.15105054 0.11714887 0.10947838]
True Label 22
Top 3 predicted labels : [ 8 12  5]
with following Top 3 values: [0.08079714 0.06683693 0.0

In [43]:
# classes = np.load(y_path_npy)
# pd.DataFrame(classes).describe()
# #description of labels classes 