In [109]:
import tensorflow as tf
import random

def read_and_split_csv(filename, split_ratio=0.8, target_index=-1):
    # Load the CSV file
    with open(filename, "r") as f:
        lines = f.readlines()

    # Split the lines into header and data
    header = lines[0]
    data = lines[1:]

    # Shuffle the data
    random.shuffle(data)

    # Split the data into training and validation sets
    split_idx = int(len(data) * split_ratio)
    train_data = data[:split_idx]
    val_data = data[split_idx:]

    # Parse the data
    train_features, train_labels = parse_data(train_data, target_index)
    val_features, val_labels = parse_data(val_data, target_index)

    # Create TensorFlow datasets
    train_ds = tf.data.Dataset.from_tensor_slices((train_features, train_labels))
    val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels))

    # Get the number of rows and features
    num_rows = len(lines) - 1
    num_features = len(header.split(","))

    return train_ds, val_ds, num_rows, num_features

def parse_data(data, target_idx):
    # Parse each row of data into features and label
    features = []
    labels = []
    for row in data:
        row = row.strip().split(",")
        features.append([float(x) for i, x in enumerate(row) if i != target_idx])
        labels.append(float(row[target_idx]))
    # Convert the features and labels to tensors
    features = tf.stack(features)
    labels = tf.stack(labels)
    return features, labels



In [110]:
fname = "data/diabetes.csv"
BATCH_SIZE = 32
VALIDATION_SPLIT = 0.2

In [111]:
# Load the data using the read_and_split_csv function
train_ds, val_ds, totalRows, totalFeatures = read_and_split_csv(fname, split_ratio=.8)
print("Total rows: ", totalRows)
print("Total features: ", totalFeatures)

Total rows:  768
Total features:  9


In [112]:
# Map the datasets to tuples of (x, y) pairs
train_ds = train_ds.map(lambda *args: (args[:-1], args[-1]))
val_ds = val_ds.map(lambda *args: (args[:-1], args[-1]))
for i in train_ds:
    print(i[0][0].numpy())

[  0.    134.     58.     20.    291.     26.4     0.352  21.      0.   ]
[4.00e+00 1.31e+02 6.80e+01 2.10e+01 1.66e+02 3.31e+01 1.60e-01 2.80e+01
 0.00e+00]
[  0.    100.     88.     60.    110.     46.8     0.962  31.      0.   ]
[  6.   102.    82.     0.     0.    30.8    0.18  36.     1.  ]
[  2.    112.     66.     22.      0.     25.      0.307  24.      0.   ]
[  2.    102.     86.     36.    120.     45.5     0.127  23.      1.   ]
[ 10.    122.     78.     31.      0.     27.6     0.512  45.      0.   ]
[ 8.    85.    55.    20.     0.    24.4    0.136 42.     0.   ]
[  0.    165.     76.     43.    255.     47.9     0.259  26.      0.   ]
[  3.    111.     62.      0.      0.     22.6     0.142  21.      0.   ]
[  1.    193.     50.     16.    375.     25.9     0.655  24.      0.   ]
[  3.    130.     64.      0.      0.     23.1     0.314  22.      0.   ]
[ 1.   90.   62.   12.   43.   27.2   0.58 24.    0.  ]
[ 1.    90.    68.     8.     0.    24.5    1.138 36.     0.   ]

In [113]:
import tensorflow as tf

# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(totalFeatures,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model with binary cross-entropy loss and accuracy metric
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model for 10 epochs

print(train_ds)
model.fit(train_ds, epochs=10, validation_data=val_ds, verbose=1)


<MapDataset element_spec=((TensorSpec(shape=(9,), dtype=tf.float32, name=None),), TensorSpec(shape=(), dtype=tf.float32, name=None))>
Epoch 1/10


ValueError: in user code:

    File "/Users/akeems/opt/anaconda3/envs/ml3950/lib/python3.9/site-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/Users/akeems/opt/anaconda3/envs/ml3950/lib/python3.9/site-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/akeems/opt/anaconda3/envs/ml3950/lib/python3.9/site-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/Users/akeems/opt/anaconda3/envs/ml3950/lib/python3.9/site-packages/keras/engine/training.py", line 1023, in train_step
        y_pred = self(x, training=True)
    File "/Users/akeems/opt/anaconda3/envs/ml3950/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/akeems/opt/anaconda3/envs/ml3950/lib/python3.9/site-packages/keras/engine/input_spec.py", line 250, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_14' (type Sequential).
    
    Input 0 of layer "dense_42" is incompatible with the layer: expected min_ndim=2, found ndim=1. Full shape received: (9,)
    
    Call arguments received by layer 'sequential_14' (type Sequential):
      • inputs=('tf.Tensor(shape=(9,), dtype=float32)',)
      • training=True
      • mask=None
