In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [3]:
data=pd.read_csv('../input/avocado-prices/avocado.csv')

In [4]:
data

In [5]:
data.drop(data.columns[0],axis=1,inplace=True)

In [6]:
data.info()

In [7]:
plt.figure(figsize=(20,10))

for i in range(len(data.columns)):
    if data.dtypes[i]!='object':
        plt.subplot(3,5,i+1)
        plt.boxplot(data[data.columns[i]],vert=False)
        plt.title(data.columns[i])
        

plt.show()

In [8]:
data.isna().sum()

In [9]:
def get_uniques(df,columns):
    return {column:list(df[column].unique()) for column in columns}

In [10]:
categorical_columns=['region','Date','type']


get_uniques(data,categorical_columns)

In [11]:
ordinal_feature=['Date']

nominal_feature=['region']

target_column='type'

In [12]:
data_ordering=sorted(data['Date'].unique())

In [15]:
def ordinal_encode(df,column,ordering):
    df=df.copy()
    df[column]=df[column].apply(lambda x: ordering.index(x))
    return df

def onehot_encode(df, column):
    df = df.copy()
    dummies = pd.get_dummies(df[column])
    df = pd.concat([df, dummies], axis=1)
    df.drop(column, axis=1, inplace=True)
    return df

In [16]:
data=ordinal_encode(data,'Date',data_ordering)

In [17]:
data=onehot_encode(data,'region')

In [18]:
data

In [19]:
label_encoder = LabelEncoder()

data[target_column] = label_encoder.fit_transform(data[target_column])

In [20]:
y = data[target_column]
X = data.drop(target_column, axis=1)

In [21]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [22]:
x_train,x_test,y_train,y_test=train_test_split(X,y,train_size=0.7)

In [28]:
inputs=tf.keras.Input(65,)
x=tf.keras.layers.Dense(64,activation='relu')(inputs)
x=tf.keras.layers.Dense(64,activation='relu')(x)
outputs=tf.keras.layers.Dense(1,activation='sigmoid')(x)

model=tf.keras.Model(inputs=inputs,outputs=outputs)

model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['accuracy']
)

batch_size=64
epochs=38

history=model.fit(
    x_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()],
    verbose=0
)

In [26]:
plt.figure(figsize=(14,10))

epochs_range=range(1,epochs+1)
train_loss=history.history['loss']
val_loss=history.history['val_loss']

plt.plot(epochs_range,train_loss,label='Training Loss')
plt.plot(epochs_range,val_loss,label='Validation Loss')

plt.title("Training and validation Loss")
plt.xlabel("Epochs")
plt.ylabel('Loss')
plt.legend()

plt.show()

In [27]:
np.argmin(val_loss)+1

In [29]:
model.evaluate(x_test,y_test)