In [88]:
!pip install tensorflow



In [90]:
df_history = pd.read_csv('historic.csv')
df_history.head()

Unnamed: 0,item_no,category,main_promotion,color,stars,success_indicator
0,739157,Tunic,Catalog,Green,3.1,flop
1,591846,Hoodie,Category_Highlight,Red,1.5,flop
2,337574,Sweatshirt,Catalog,Red,4.4,top
3,401933,Polo-Shirt,Category_Highlight,Blue,3.1,flop
4,812151,Hoodie,Category_Highlight,Green,4.1,top


In [92]:
df_history['stars'] = df_history['stars'].apply(lambda x: min(x, 5))
df_history['stars'].unique()

array([3.1, 1.5, 4.4, 4.1, 3.9, 1.4, 1.8, 3.2, 5. , 2.5, 3.4, 2.7, 1.7,
       2.8, 4.8, 4. , 4.5, 1.3, 2.6, 3.6, 4.9, 2.2, 3.7, 2.3, 3.8, 4.6,
       3.5, 2.9, 1.6, 3. , 4.2, 1.9, 4.3, 4.7, 2. , 1. , 3.3, 2.4, 1.2,
       2.1, 0.7, 0.9, 0.8, 0.6, 1.1, 0.3])

In [105]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.ensemble import RandomForestClassifier

In [127]:



class NeuralNetworkModel:
    def __init__(self):
        self.model = Sequential()

    def load(self, filepath):
        self.df = pd.read_csv(filepath)
        return self.df
    def preprocess(self):
        X = self.df.drop(['success_indicator'], axis=1)
        y = self.df['success_indicator']

        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(y)

        categorical_cols = ['stars','category', 'main_promotion', 'color']
        X_encoded = pd.get_dummies(X, columns=categorical_cols)

        X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        self.X_train, self.X_test, self.y_train, self.y_test = X_train_scaled, X_test_scaled, y_train, y_test

    def train(self):
        self.model.add(Dense(64, input_dim=self.X_train.shape[1], activation='relu'))
        self.model.add(Dense(32, activation='relu'))
        self.model.add(Dense(len(np.unique(self.y_train)), activation='softmax'))

        self.model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

        self.model.fit(self.X_train, self.y_train, epochs=10, batch_size=32, validation_data=(self.X_test, self.y_test))

    def test(self):
        y_pred = np.argmax(self.model.predict(self.X_test), axis=1)

        accuracy = accuracy_score(self.y_test, y_pred)
        precision = precision_score(self.y_test, y_pred, average='weighted')
        recall = recall_score(self.y_test, y_pred, average='weighted')
        f1 = f1_score(self.y_test, y_pred, average='weighted')
        conf_matrix = confusion_matrix(self.y_test, y_pred)

        print("ANN Metrics ")

        print(f'Accuracy: {accuracy}')
        print(f'Precision: {precision}')
        print(f'Recall: {recall}')
        print(f'F1 Score: {f1}')
        print('Confusion Matrix:\n', conf_matrix)

    def predict(self, new_data):
        new_data_encoded = pd.get_dummies(new_data, columns=['stars','category', 'main_promotion', 'color'])
        new_data_scaled = StandardScaler().fit_transform(new_data_encoded)
        return np.argmax(self.model.predict(new_data_scaled), axis=1)


class RandomForestModel:
    def __init__(self):
        self.model = RandomForestClassifier(n_estimators=100, random_state=42)

    def load(self, filepath):
        self.df = pd.read_csv(filepath)

    def preprocess(self):
        X = self.df.drop(['success_indicator'], axis=1)
        y = self.df['success_indicator']

        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(y)

        categorical_cols = ['stars','category', 'main_promotion', 'color']

        X_encoded = pd.get_dummies(X, columns=categorical_cols)

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

    def train(self):
        self.model.fit(self.X_train, self.y_train)

    def test(self):
        y_pred = self.model.predict(self.X_test)

        accuracy = accuracy_score(self.y_test, y_pred)
        precision = precision_score(self.y_test, y_pred, average='weighted')
        recall = recall_score(self.y_test, y_pred, average='weighted')
        f1 = f1_score(self.y_test, y_pred, average='weighted')
        conf_matrix = confusion_matrix(self.y_test, y_pred)

        print("Random Forest Metrics ")

        print(f'Accuracy: {accuracy}')
        print(f'Precision: {precision}')
        print(f'Recall: {recall}')
        print(f'F1 Score: {f1}')
        print('Confusion Matrix:\n', conf_matrix)

    def predict(self, new_data):
        new_data_encoded = pd.get_dummies(new_data, columns=['stars','category', 'main_promotion', 'color'])
        return self.model.predict(new_data_encoded)


# Example for Neural Network
nn_model = NeuralNetworkModel()
nn_model.load('historic.csv')
nn_model.preprocess()
nn_model.train()
nn_model.test()

# Example for Random Forest
rf_model = RandomForestModel()
rf_model.load('historic.csv')
rf_model.preprocess()
rf_model.train()
rf_model.test()


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
ANN Metrics 
Accuracy: 0.8275
Precision: 0.8258941604480045
Recall: 0.8275
F1 Score: 0.8236335548172758
Confusion Matrix:
 [[387 184]
 [ 92 937]]
Random Forest Metrics 
Accuracy: 0.7975
Precision: 0.7945222361415597
Recall: 0.7975
F1 Score: 0.7949611247301598
Confusion Matrix:
 [[381 190]
 [134 895]]


 We Found that the accuracy of ANN is greater than Random Forest Classifier



### On real Data with prediction_input.csv


In [131]:
nn_model_p = NeuralNetworkModel()
data = nn_model_p.load('prediction_input.csv')
# print(data)
predictions_nn = nn_model_p.predict(data)
# Example for Random Forest
rf_model_p = RandomForestModel()
data_rf = rf_model_p.load('prediction_input.csv')

# predictions_rf = rf_model_p.predict(data_rf)


result_nn = pd.concat([data, pd.DataFrame(predictions_nn, columns=['predicted_success_indicator_nn'])], axis=1)
# result_rf = pd.concat([data_rf, pd.DataFrame(predictions_rf, columns=['predicted_success_indicator_rf'])], axis=1)

# Save the results to new CSV files
result_nn.to_csv('predicted_results_nn.csv', index=False)
# result_rf.to_csv('predicted_results_rf.csv', index=False)



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   item_no         2000 non-null   int64  
 1   category        2000 non-null   object 
 2   main_promotion  2000 non-null   object 
 3   color           2000 non-null   object 
 4   stars           2000 non-null   float64
dtypes: float64(1), int64(1), object(3)
memory usage: 78.2+ KB
None
