In [1]:
import numpy as np
import tensorflow as tf
from keras._tf_keras.keras import Sequential
from keras._tf_keras.keras.layers import Dense
from sklearn.datasets import make_blobs

# Example dataset #

In [2]:
centers = [[-5, 2], [-2, -2], [1, 2], [5, -2]]
X_train, y_train = make_blobs(n_samples=2000, centers=centers, cluster_std=1.0, random_state=30)

In [3]:
X_train

array([[ 1.55508243,  0.84801682],
       [-5.33749882,  1.03397255],
       [-4.09353183,  0.67843096],
       ...,
       [-0.84437575, -1.94991543],
       [ 5.0377068 , -2.92221685],
       [ 0.38198674,  1.49735733]])

In [4]:
y_train

array([2, 0, 0, ..., 1, 3, 2])

# Softmax implementation #

In [5]:
model = Sequential([
    Dense(25, activation='relu'),
    Dense(15, activation='relu'),
    Dense(4, activation='linear')
])

In [6]:
from keras._tf_keras.keras.losses import SparseCategoricalCrossentropy
from keras._tf_keras.keras.optimizers import Adam

model.compile(
    loss=SparseCategoricalCrossentropy(from_logits=True),
    optimizer=Adam(0.001)
)

model.fit(X_train, y_train, epochs=10)

Epoch 1/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 1.5839
Epoch 2/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.6643
Epoch 3/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.3105
Epoch 4/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1738
Epoch 5/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1134
Epoch 6/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0917
Epoch 7/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0757
Epoch 8/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0638
Epoch 9/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0591
Epoch 10/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0507


<keras.src.callbacks.history.History at 0x7f01758ecf40>

In [7]:
p = model.predict(X_train)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [8]:
p[:2]

array([[-2.8385828 , -2.1660378 ,  3.319425  , -0.44706932],
       [ 3.948637  ,  0.01468207, -3.1017675 , -5.255323  ]],
      dtype=float32)

In [9]:
sm = tf.nn.softmax(p).numpy()

In [10]:
sm[:2]

array([[2.0560266e-03, 4.0282006e-03, 9.7144330e-01, 2.2472411e-02],
       [9.7987944e-01, 1.9172454e-02, 8.4961241e-04, 9.8615106e-05]],
      dtype=float32)

Selecting most likely categories:

In [11]:
for i in range(5):
    print( f"{p[i]}, category: {np.argmax(p[i])}")

[-2.8385828  -2.1660378   3.319425   -0.44706932], category: 2
[ 3.948637    0.01468207 -3.1017675  -5.255323  ], category: 0
[ 2.7882378   0.21119024 -2.6665256  -4.193566  ], category: 0
[-2.2164762  2.1280901 -3.75662   -2.1793213], category: 1
[-0.87954813 -3.0817313   5.108371   -7.002337  ], category: 2
