In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Function to load the data from CSV
def load_data(file_path):
    df = pd.read_csv('pizza.csv')
    return df

# Data Preprocessing function
def preprocess_data(df):
    # Feature selection: Define features and target variable
    X = df[['age', 'gender', 'location', 'income_level', 'pizza_type_preference', 'is_vegetarian']]
    y = df['pizza_like']
    
    # Encoding categorical features
    label_encoder_gender = LabelEncoder()
    X['gender'] = label_encoder_gender.fit_transform(X['gender'])
    
    label_encoder_location = LabelEncoder()
    X['location'] = label_encoder_location.fit_transform(X['location'])
    
    label_encoder_income = LabelEncoder()
    X['income_level'] = label_encoder_income.fit_transform(X['income_level'])
    
    label_encoder_pizza_type = LabelEncoder()
    X['pizza_type_preference'] = label_encoder_pizza_type.fit_transform(X['pizza_type_preference'])
    
    # Split the dataset into training and testing sets (80-20 split)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Standardize features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test, label_encoder_gender, label_encoder_location, label_encoder_income, label_encoder_pizza_type, scaler

# Function to train the RandomForest model
def train_model(X_train_scaled, y_train):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)
    return model

# Function to predict pizza like based on user input
def predict_pizza_like(model, scaler, label_encoders, age, gender, location, income_level, pizza_type_preference, is_vegetarian):
    # Preprocess the user input data
    user_data = pd.DataFrame({
        'age': [age],
        'gender': [gender],
        'location': [location],
        'income_level': [income_level],
        'pizza_type_preference': [pizza_type_preference],
        'is_vegetarian': [is_vegetarian]
    })
    
    # Use the label encoders to transform categorical values
    user_data['gender'] = label_encoders['gender'].transform(user_data['gender'])
    user_data['location'] = label_encoders['location'].transform(user_data['location'])
    user_data['income_level'] = label_encoders['income_level'].transform(user_data['income_level'])
    user_data['pizza_type_preference'] = label_encoders['pizza_type_preference'].transform(user_data['pizza_type_preference'])
    
    # Scale the input data using the same scaler used for training
    user_data_scaled = scaler.transform(user_data)
    
    # Make the prediction using the trained model
    prediction = model.predict(user_data_scaled)
    
    # Return the prediction result
    return "Likes Pizza" if prediction[0] == 1 else "Does Not Like Pizza"

# Main function to run the prediction system
def run_prediction_system():
    # Load the dataset (Replace 'pizza_data.csv' with your actual CSV file path)
    file_path = 'pizza_data.csv'
    df = load_data(file_path)
    
    # Preprocess the data
    X_train_scaled, X_test_scaled, y_train, y_test, label_encoder_gender, label_encoder_location, label_encoder_income, label_encoder_pizza_type, scaler = preprocess_data(df)
    
    # Train the model
    model = train_model(X_train_scaled, y_train)
    
    # Evaluate the model
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {accuracy:.4f}")
    
    # Get user input for prediction
    print("\nPlease provide the following information to predict if you like pizza.")
    age = int(input("Age: "))
    gender = input("Gender (Male/Female): ")
    location = input("Location (Urban/Suburban/Rural): ")
    income_level = input("Income Level (Low/Medium/High): ")
    pizza_type_preference = input("Pizza Type Preference (Vegetarian/Non-Vegetarian): ")
    is_vegetarian = int(input("Are you vegetarian? (1 = Yes, 0 = No): "))
    
    # Make prediction based on user input
    prediction = predict_pizza_like(model, scaler, {
        'gender': label_encoder_gender, 
        'location': label_encoder_location, 
        'income_level': label_encoder_income, 
        'pizza_type_preference': label_encoder_pizza_type
    }, age, gender, location, income_level, pizza_type_preference, is_vegetarian)
    
    # Print the prediction result
    print(f"Prediction: {prediction}")

# Run the prediction system
if __name__ == "__main__":
    run_prediction_system()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['gender'] = label_encoder_gender.fit_transform(X['gender'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['location'] = label_encoder_location.fit_transform(X['location'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['income_level'] = label_encoder_income.fit_transform(X['income_level'])
A

Model Accuracy: 0.0000

Please provide the following information to predict if you like pizza.
Age: 25
