In [15]:
# Basic Libraries
import pandas as pd
import numpy as np

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, classification_report

# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")


In [16]:
# Load your dataset
df = pd.read_csv("Indian_Tourism_ML_Big_Dataset.csv")

# Check dataset
df.head()


Unnamed: 0,State,Place_Name,Weather,Crowd_Level,Best_Time_To_Visit,Famous_For,Tourism_Type,Budget_Level
0,Uttar Pradesh,Agra,Pleasant,High,Oct-Mar,Taj Mahal,Historical,Low
1,Uttar Pradesh,Varanasi,Pleasant,High,Oct-Mar,Ganga Aarti,Religious,Low
2,Rajasthan,Jaipur,Pleasant,High,Oct-Mar,Pink City,Historical,Medium
3,Rajasthan,Udaipur,Pleasant,Moderate,Oct-Mar,City of Lakes,Historical,Medium
4,Goa,Baga Beach,Warm,High,Nov-Feb,Nightlife,Beach,High


In [25]:
# Step 1: Encode features with separate LabelEncoders
# ==========================
weather_le = LabelEncoder().fit(df['Weather'])
crowd_le = LabelEncoder().fit(df['Crowd_Level'])
best_time_le = LabelEncoder().fit(df['Best_Time_To_Visit'])
famous_le = LabelEncoder().fit(df['Famous_For'])
tourism_type_le = LabelEncoder().fit(df['Tourism_Type'])
budget_le = LabelEncoder().fit(df['Budget_Level'])

state_le = LabelEncoder().fit(df['State'])
place_le = LabelEncoder().fit(df['Place_Name'])


In [26]:
X = pd.DataFrame({
    'Weather': weather_le.transform(df['Weather']),
    'Crowd_Level': crowd_le.transform(df['Crowd_Level']),
    'Best_Time_To_Visit': best_time_le.transform(df['Best_Time_To_Visit']),
    'Famous_For': famous_le.transform(df['Famous_For']),
    'Tourism_Type': tourism_type_le.transform(df['Tourism_Type']),
    'Budget_Level': budget_le.transform(df['Budget_Level'])
})

# Encode targets
y = pd.DataFrame({
    'State': state_le.transform(df['State']),
    'Place_Name': place_le.transform(df['Place_Name'])
})

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [28]:
# RandomForest for multi-output
rf = RandomForestClassifier(n_estimators=200, random_state=42)
multi_target_model = MultiOutputClassifier(rf, n_jobs=-1)

# Train model
multi_target_model.fit(X_train, y_train)


In [29]:
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# Predict
y_pred = multi_target_model.predict(X_test)

# Accuracy for each output
state_acc = accuracy_score(y_test['State'], y_pred[:,0])
place_acc = accuracy_score(y_test['Place_Name'], y_pred[:,1])

print("State Prediction Accuracy:", state_acc)
print("Place Prediction Accuracy:", place_acc)

# Detailed Classification Report for State
print("State Classification Report:\n", classification_report(
    y_test['State'], 
    y_pred[:,0], 
    target_names=state_le.classes_,
    labels=np.arange(len(state_le.classes_))  # Ensure labels match classes
))

# Detailed Classification Report for Place
print("Place Classification Report:\n", classification_report(
    y_test['Place_Name'], 
    y_pred[:,1], 
    target_names=place_le.classes_,
    labels=np.arange(len(place_le.classes_))  # Ensure labels match classes
))


State Prediction Accuracy: 0.22535211267605634
Place Prediction Accuracy: 0.04225352112676056
State Classification Report:
                         precision    recall  f1-score   support

     Andaman & Nicobar       0.50      1.00      0.67         1
   Andaman and Nicobar       1.00      1.00      1.00         1
        Andhra Pradesh       0.00      0.00      0.00         8
     Arunachal Pradesh       0.00      0.00      0.00         5
                 Assam       0.00      0.00      0.00         1
                 Bihar       0.25      1.00      0.40         1
            Chandigarh       0.00      0.00      0.00         1
          Chhattisgarh       0.00      0.00      0.00         5
  Dadra & Nagar Haveli       0.00      0.00      0.00         0
Dadra and Nagar Haveli       0.00      0.00      0.00         2
           Daman & Diu       0.00      0.00      0.00         0
         Daman and Diu       0.00      0.00      0.00         1
                 Delhi       1.00      1.00

In [None]:
def suggest_places(df, weather=None, crowd=None, best_time=None, famous_for=None, tourism_type=None, budget=None):
    """
    Returns a dictionary with States as keys and list of matching Place_Names as values.
    It tries strict match first. If no results, it relaxes criteria step by step.
    """
    criteria = {
        'Weather': weather,
        'Crowd_Level': crowd,
        'Best_Time_To_Visit': best_time,
        'Famous_For': famous_for,
        'Tourism_Type': tourism_type,
        'Budget_Level': budget
    }
    
    # Remove None values from criteria
    criteria = {k:v for k,v in criteria.items() if v is not None}
    
    # Start with all criteria (strict match)
    df_filtered = df.copy()
    for col, val in criteria.items():
        df_filtered = df_filtered[df_filtered[col] == val]
    
    # If nothing found, relax criteria step by step
    relax_order = list(criteria.keys())
    while df_filtered.empty and relax_order:
        relax_col = relax_order.pop()  # remove last criteria
        df_filtered = df.copy()
        for col, val in criteria.items():
            if col != relax_col:
                df_filtered = df_filtered[df_filtered[col] == val]
    
    # Group by State and list places
    result = df_filtered.groupby('State')['Place_Name'].apply(list).to_dict()
    
    return result


In [34]:
# Example Input
output = suggest_places(
    weather='Cold',
    crowd='Low',
    best_time='Winter',
    famous_for='Hill Station',
    tourism_type='Nature',
    budget='Medium'
)

# Print result
print(output)


{}
