# 🧠 Hypothesis Validation in Tourism Studies using Machine Learning & MLP

This notebook compares multiple models (Naive Bayes, Logistic Regression, SVM, and Multilayer Perceptron) to validate research hypotheses related to tourist satisfaction, destination image, and loyalty — based on survey-based behavioral data.

**Author:** Wajeeha Sajid  
**Electrical and Computer Engineering**


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder

# Load the dataset
file_path = '/content/Data.xlsx'
df = pd.read_excel(file_path)

# Clean the dataset
df_cleaned = df.drop(columns=['S/No 序号'], errors='ignore')
numeric_columns = df_cleaned.select_dtypes(include='number').columns
categorical_columns = df_cleaned.select_dtypes(exclude='number').columns
df_cleaned[numeric_columns] = df_cleaned[numeric_columns].fillna(df_cleaned[numeric_columns].median())
for column in categorical_columns:
    df_cleaned[column] = df_cleaned[column].fillna(df_cleaned[column].mode()[0])

# Apply label encoding
label_encoders = {}
for column in df_cleaned.columns:
    if df_cleaned[column].dtype == 'object':
        le = LabelEncoder()
        df_cleaned[column] = le.fit_transform(df_cleaned[column].astype(str))
        label_encoders[column] = le

# Hypothesis 1: A positive destination Image enhances tourist satisfaction
X1 = df_cleaned[['1.What sources of information influence your perception of a travel destination?哪些信息来源影响您对旅游目的地的看法?', '2.How important is the destination image in influencing your decision to visit a particular destination?目的地图像在影响您访问特定目的地的决定方面有多重要?']]
Y1 = df_cleaned['8.How satisfied were you with the value you received from the dining and food options at the destination?您对目的地餐饮选择的价值是否满意?']

# Hypothesis 2: Perceived value positively influences tourist satisfaction
X2 = df_cleaned[['2.How satisfied are you with the perceived value you received in terms of the quality and range of amenities and services provided by the destination?您对目的地提供的设施和服务的质量和范围的感知价值有多满意?']]
Y2 = df_cleaned['8.How satisfied were you with the value you received from the dining and food options at the destination?您对目的地餐饮选择的价值是否满意?']

# Hypothesis 3: Tourist satisfaction mediates the link between perceived value and destination image
X3 = df_cleaned[['2.How satisfied are you with the perceived value you received in terms of the quality and range of amenities and services provided by the destination?您对目的地提供的设施和服务的质量和范围的感知价值有多满意?', '1.What sources of information influence your perception of a travel destination?哪些信息来源影响您对旅游目的地的看法?']]
Y3 = df_cleaned['8.How satisfied were you with the value you received from the dining and food options at the destination?您对目的地餐饮选择的价值是否满意?']

# Hypothesis 4: Destination image positively influences loyalty to Hainan
X4 = df_cleaned[['1.What sources of information influence your perception of a travel destination?哪些信息来源影响您对旅游目的地的看法?']]
Y4 = df_cleaned['7.How well the destinations did perceived value influence your likelihood of recommending it to others?目的地的感知价值在多大程度上影响了您向他人推荐该目的地的可能性?']

# Hypothesis 5: Perceived value has a positive impact on destination loyalty in Hainan
X5 = df_cleaned[['2.How satisfied are you with the perceived value you received in terms of the quality and range of amenities and services provided by the destination?您对目的地提供的设施和服务的质量和范围的感知价值有多满意?']]
Y5 = df_cleaned['7.How well the destinations did perceived value influence your likelihood of recommending it to others?目的地的感知价值在多大程度上影响了您向他人推荐该目的地的可能性?']

# Hypothesis 6: Tourist satisfaction has a positive impact on loyalty to Hainan
X6 = df_cleaned[['8.How satisfied were you with the value you received from the dining and food options at the destination?您对目的地餐饮选择的价值是否满意?']]
Y6 = df_cleaned['7.How well the destinations did perceived value influence your likelihood of recommending it to others?目的地的感知价值在多大程度上影响了您向他人推荐该目的地的可能性?']

# Function to train, test and evaluate models
def evaluate_models(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    models = {
        "Naive Bayes": GaussianNB(),
        "Logistic Regression": LogisticRegression(),
        "SVM": SVC(),
        "MLP": MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=1000, random_state=42)
    }

    for model_name, model in models.items():
        model.fit(X_train, Y_train)
        Y_pred = model.predict(X_test)
        print(f"\n{model_name} Accuracy:", accuracy_score(Y_test, Y_pred))
        print(f"\n{model_name} Report:")
        print(classification_report(Y_test, Y_pred))

# Evaluate models for all hypotheses
print("Hypothesis 1 Evaluation:")
evaluate_models(X1, Y1)

print("Hypothesis 2 Evaluation:")
evaluate_models(X2, Y2)

print("Hypothesis 3 Evaluation:")
evaluate_models(X3, Y3)

print("Hypothesis 4 Evaluation:")
evaluate_models(X4, Y4)

print("Hypothesis 5 Evaluation:")
evaluate_models(X5, Y5)

print("Hypothesis 6 Evaluation:")
evaluate_models(X6, Y6)

Hypothesis 1 Evaluation:

Naive Bayes Accuracy: 0.47619047619047616

Naive Bayes Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         2
           2       0.41      0.37      0.39        38
           3       0.51      0.71      0.59        51
           4       0.00      0.00      0.00        10

    accuracy                           0.48       105
   macro avg       0.18      0.21      0.20       105
weighted avg       0.40      0.48      0.43       105


Logistic Regression Accuracy: 0.4380952380952381

Logistic Regression Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         2
           2       0.38      0.50      0.43        38
           3       0.49      0.53      0.51        51
           4       0.00      0.00      0.00        10

    accuracy     

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



MLP Accuracy: 0.45714285714285713

MLP Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         2
           2       0.38      0.32      0.34        38
           3       0.49      0.71      0.58        51
           4       0.00      0.00      0.00        10

    accuracy                           0.46       105
   macro avg       0.17      0.20      0.18       105
weighted avg       0.38      0.46      0.41       105

Hypothesis 2 Evaluation:

Naive Bayes Accuracy: 0.638095238095238

Naive Bayes Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         2
           2       0.65      0.74      0.69        38
           3       0.66      0.76      0.71        51
           4       0.00      0.00      0.00        10

    accuracy                           0.64       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



MLP Accuracy: 0.638095238095238

MLP Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         2
           2       0.65      0.74      0.69        38
           3       0.66      0.76      0.71        51
           4       0.00      0.00      0.00        10

    accuracy                           0.64       105
   macro avg       0.26      0.30      0.28       105
weighted avg       0.56      0.64      0.59       105

Hypothesis 3 Evaluation:

Naive Bayes Accuracy: 0.638095238095238

Naive Bayes Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         2
           2       0.65      0.74      0.69        38
           3       0.66      0.76      0.71        51
           4       0.00      0.00      0.00        10

    accuracy                           0.64       10

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



MLP Accuracy: 0.638095238095238

MLP Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         2
           2       0.65      0.74      0.69        38
           3       0.66      0.76      0.71        51
           4       0.00      0.00      0.00        10

    accuracy                           0.64       105
   macro avg       0.26      0.30      0.28       105
weighted avg       0.56      0.64      0.59       105

Hypothesis 4 Evaluation:

Naive Bayes Accuracy: 0.4666666666666667

Naive Bayes Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00        10
           2       0.00      0.00      0.00        34
           3       0.47      1.00      0.64        49
           4       0.00      0.00      0.00         9

    accuracy                           0.47       1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



MLP Accuracy: 0.4666666666666667

MLP Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00        10
           2       0.00      0.00      0.00        34
           3       0.47      1.00      0.64        49
           4       0.00      0.00      0.00         9

    accuracy                           0.47       105
   macro avg       0.09      0.20      0.13       105
weighted avg       0.22      0.47      0.30       105

Hypothesis 5 Evaluation:

Naive Bayes Accuracy: 0.47619047619047616

Naive Bayes Report:
              precision    recall  f1-score   support

           0       0.33      0.33      0.33         3
           1       0.33      0.10      0.15        10
           2       0.00      0.00      0.00        34
           3       0.48      0.98      0.65        49
           4       0.00      0.00      0.00         9

    accuracy                           0.48      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



MLP Accuracy: 0.4666666666666667

MLP Report:
              precision    recall  f1-score   support

           0       0.33      0.33      0.33         3
           1       0.00      0.00      0.00        10
           2       0.00      0.00      0.00        34
           3       0.47      0.98      0.64        49
           4       0.00      0.00      0.00         9

    accuracy                           0.47       105
   macro avg       0.16      0.26      0.19       105
weighted avg       0.23      0.47      0.31       105

Hypothesis 6 Evaluation:

Naive Bayes Accuracy: 0.4666666666666667

Naive Bayes Report:
              precision    recall  f1-score   support

           0       0.25      0.33      0.29         3
           1       0.00      0.00      0.00        10
           2       0.00      0.00      0.00        34
           3       0.48      0.98      0.64        49
           4       0.00      0.00      0.00         9

    accuracy                           0.47       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



MLP Accuracy: 0.4666666666666667

MLP Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.00      0.00      0.00        10
           2       0.00      0.00      0.00        34
           3       0.47      1.00      0.64        49
           4       0.00      0.00      0.00         9

    accuracy                           0.47       105
   macro avg       0.09      0.20      0.13       105
weighted avg       0.22      0.47      0.30       105



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
