In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,Input
from tensorflow.keras.utils import to_categorical


In [2]:
pumpkin = 'data/Pumpkin_Seeds_Dataset.xlsx'
data = pd.read_excel(pumpkin)

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2500 entries, 0 to 2499
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Area               2500 non-null   int64  
 1   Perimeter          2500 non-null   float64
 2   Major_Axis_Length  2500 non-null   float64
 3   Minor_Axis_Length  2500 non-null   float64
 4   Convex_Area        2500 non-null   int64  
 5   Equiv_Diameter     2500 non-null   float64
 6   Eccentricity       2500 non-null   float64
 7   Solidity           2500 non-null   float64
 8   Extent             2500 non-null   float64
 9   Roundness          2500 non-null   float64
 10  Aspect_Ration      2500 non-null   float64
 11  Compactness        2500 non-null   float64
 12  Class              2500 non-null   object 
dtypes: float64(10), int64(2), object(1)
memory usage: 254.0+ KB


In [4]:
X = data.drop(columns=['Class'])
y = data['Class']

In [5]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_categorical, test_size=0.25, random_state=0)


In [8]:
model = Sequential([
    Dense(128, input_shape=(X_train.shape[1],), activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y_categorical.shape[1], activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [10]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, verbose=1)


Epoch 1/100
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7733 - loss: 0.4882 - val_accuracy: 0.8400 - val_loss: 0.3641
Epoch 2/100
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8962 - loss: 0.2843 - val_accuracy: 0.8512 - val_loss: 0.3639
Epoch 3/100
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8838 - loss: 0.2824 - val_accuracy: 0.8496 - val_loss: 0.3537
Epoch 4/100
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8862 - loss: 0.2926 - val_accuracy: 0.8576 - val_loss: 0.3556
Epoch 5/100
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8891 - loss: 0.2825 - val_accuracy: 0.8464 - val_loss: 0.3526
Epoch 6/100
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8922 - loss: 0.2908 - val_accuracy: 0.8448 - val_loss: 0.3449
Epoch 7/100
[1m59/59[0m [32m━━━

In [11]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8825 - loss: 0.3060 
Test Accuracy: 86.88%


In [12]:
print("Class Labels:", label_encoder.classes_)

Class Labels: ['Çerçevelik' 'Ürgüp Sivrisi']


In [13]:
for i,name in enumerate(label_encoder.classes_):
    print(name, "=", i)

Çerçevelik = 0
Ürgüp Sivrisi = 1


In [14]:
new_data = np.array([
    [0,0,0,0,0,0,0,0,0,0,0,0],  # Example data point 1
    [60000, 900.0, 350.0, 250.0, 62000, 280.0, 0.80, 0.99, 0.75, 0.88, 1.6, 0.82]   # Example data point 2
])

In [15]:
new_data_scaled = scaler.transform(new_data)



In [16]:
predictions = model.predict(new_data_scaled)
predicted_classes = label_encoder.inverse_transform(np.argmax(predictions, axis=1))

print("Predictions for new data:", predicted_classes)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
Predictions for new data: ['Çerçevelik' 'Çerçevelik']
