In [1]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
print(tf.__version__)

import warnings
warnings.filterwarnings('ignore')

2025-03-17 18:48:33.505950: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-17 18:48:33.519472: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-17 18:48:34.094292: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-17 18:48:36.443230: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.16.1


In [4]:
# Load the data from the raw csv file saved in data directory
import pandas as pd
DATA_DIR_PATH = "../data/"
FILE_NAME = "synthetic_pharmacy_claims.csv"
sp_claim= pd.read_csv(
    f"{DATA_DIR_PATH}/{FILE_NAME}"
    ) 

In [3]:
import numpy as np

numeric_features = ['Days_Supply']
categorical_features = ['Brand_Drug_Indicator']
target = ['Total_Invoice_Amount']

# Separate features (X) and target variable (y)
dataframe = sp_claim[numeric_features + categorical_features + target].rename(columns={'Total_Invoice_Amount':'target'})


train, val, test = np.split(dataframe.sample(frac=1), [int(0.8*len(dataframe)), int(0.9*len(dataframe))])

print(len(train), 'training examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

800 training examples
100 validation examples
100 test examples


In [4]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  df = dataframe.copy()
  labels = df.pop('target')
  df = {key: value.to_numpy()[:,tf.newaxis] for key, value in dataframe.items()}
  ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  ds = ds.prefetch(batch_size)
  return ds

In [5]:
batch_size = 5
train_ds = df_to_dataset(train, batch_size=batch_size)

In [6]:
[(train_features, label_batch)] = train_ds.take(1)
print('Every feature:', list(train_features.keys()))
print('A batch of ages:', train_features['Days_Supply'])
print('A batch of targets:', label_batch )

Every feature: ['Days_Supply', 'Brand_Drug_Indicator', 'target']
A batch of ages: tf.Tensor(
[[67]
 [42]
 [48]
 [ 8]
 [54]], shape=(5, 1), dtype=int64)
A batch of targets: tf.Tensor([ 693.74 1120.33  731.06  683.41  791.9 ], shape=(5,), dtype=float64)


2024-11-03 21:50:00.863040: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [7]:
def get_normalization_layer(name, dataset):
  # Create a Normalization layer for the feature.
  normalizer = layers.Normalization(axis=None)

  # Prepare a Dataset that only yields the feature.
  feature_ds = dataset.map(lambda x, y: x[name])

  # Learn the statistics of the data.
  normalizer.adapt(feature_ds)

  return normalizer

photo_count_col = train_features['Days_Supply']
layer = get_normalization_layer('Days_Supply', train_ds)
layer(photo_count_col)

2024-11-03 21:50:01.057320: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[ 0.9065497 ],
       [-0.04711085],
       [ 0.18176767],
       [-1.3440892 ],
       [ 0.4106462 ]], dtype=float32)>

In [8]:
def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
  # Create a layer that turns strings into integer indices.
  if dtype == 'string':
    index = layers.StringLookup(max_tokens=max_tokens)
  # Otherwise, create a layer that turns integer values into integer indices.
  else:
    index = layers.IntegerLookup(max_tokens=max_tokens)

  # Prepare a `tf.data.Dataset` that only yields the feature.
  feature_ds = dataset.map(lambda x, y: x[name])

  # Learn the set of possible values and assign them a fixed integer index.
  index.adapt(feature_ds)

  # Encode the integer indices.
  encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size())

  # Apply multi-hot encoding to the indices. The lambda function captures the
  # layer, so you can use them, or include them in the Keras Functional model later.
  return lambda feature: encoder(index(feature))

test_type_col = train_features['Brand_Drug_Indicator']
test_type_layer = get_category_encoding_layer(name='Brand_Drug_Indicator',
                                              dataset=train_ds,
                                              dtype='string')
test_type_layer(test_type_col)

2024-11-03 21:50:01.181345: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


<tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.]], dtype=float32)>

In [9]:
batch_size = 256
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [10]:
all_inputs = {}
encoded_features = []

# Numerical features.
for header in numeric_features:
  numeric_col = tf.keras.Input(shape=(1,), name=header)
  normalization_layer = get_normalization_layer(header, train_ds)
  encoded_numeric_col = normalization_layer(numeric_col)
  all_inputs[header] = numeric_col
  encoded_features.append(encoded_numeric_col)

for header in categorical_features:
  categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')
  encoding_layer = get_category_encoding_layer(name=header,
                                               dataset=train_ds,
                                               dtype='string',
                                               max_tokens=5)
  encoded_categorical_col = encoding_layer(categorical_col)
  all_inputs[header] = categorical_col
  encoded_features.append(encoded_categorical_col)

all_features = tf.keras.layers.concatenate(encoded_features)

2024-11-03 21:50:01.265245: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-11-03 21:50:01.283900: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [11]:
x = tf.keras.layers.concatenate(encoded_features)
output = tf.keras.layers.Dense(1)(x)

model = tf.keras.Model(inputs=all_inputs, outputs=output)

In [12]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [43]:
result = model.evaluate(test_ds, return_dict=True)
print(result)

print(model.layers[-1].get_weights())
print()
print(model.get_weights())
model.summary()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 972.8156
{'loss': 972.8156127929688}
[array([[-0.38312727],
       [-0.03720462],
       [ 0.7853937 ],
       [-0.09103703]], dtype=float32), array([0.], dtype=float32)]

[43.234997, 687.21484, 0, array([[-0.38312727],
       [-0.03720462],
       [ 0.7853937 ],
       [-0.09103703]], dtype=float32), array([0.], dtype=float32)]


In [18]:
#model.predict(test_ds)
dir(model)

['__annotations__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_trackable_child',
 '_add_variable_with_custom_getter',
 '_adjust_input_rank',
 '_allow_non_tensor_positional_args',
 '_api_export_path',
 '_api_export_symbol_id',
 '_assert_compile_called',
 '_assert_input_compatibility',
 '_auto_config',
 '_build_by_run_for_kwargs',
 '_build_by_run_for_single_pos_arg',
 '_build_shapes_dict',
 '_call_has_mask_arg',
 '_call_has_training_arg',
 '_call_signature',
 '_called',
 '_check_quantize_args',
 '_check_super_called',
 '_checkpoint_dependencies',
 '_clear_losses',
 '_compile_config',
 '_compile_loss',
 '_compile_metrics',
 '_compi

In [38]:
all_features

<KerasTensor shape=(None, 4), dtype=float32, sparse=False, name=keras_tensor_3>

In [39]:
model.trainable_variables

[<KerasVariable shape=(4, 1), dtype=float32, path=dense/kernel>,
 <KerasVariable shape=(1,), dtype=float32, path=dense/bias>]