In [18]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from tabulate import tabulate
import re

# Load the dataset
dataset = pd.read_csv('C:/Users/HP/Downloads/archive (2)/Property_with_Feature_Engineering.csv')

# Convert numerical columns to strings
dataset['price'] = dataset['price'].astype(str)
dataset['baths'] = dataset['baths'].astype(str)
dataset['area'] = dataset['area'].astype(str)
dataset['bedrooms'] = dataset['bedrooms'].astype(str)

# Concatenate the relevant features into a single text column
dataset['content'] = dataset['property_type'] + ' ' + dataset['price'].astype(str) + ' ' + dataset['location'] + ' ' + dataset['baths'].astype(str) + ' ' + dataset['area'].astype(str) + ' ' + dataset['purpose'] + ' ' + dataset['bedrooms'].astype(str) + ' ' + dataset['agent'].fillna('')

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Preprocess and transform the text data
tfidf_matrix = vectorizer.fit_transform(dataset['content'])

# Create a KNN model
knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
knn_model.fit(tfidf_matrix)

# Function to preprocess user input text
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()

    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)

    return text

# Function to get recommended items for user input
def get_recommendations(user_input, top_n=5):
    # Preprocess user input
    user_input = preprocess_text(user_input)

    # Create TF-IDF vector for user input
    user_input_vector = vectorizer.transform([user_input])

    # Get the indices and distances of similar items
    _, indices = knn_model.kneighbors(user_input_vector, n_neighbors=top_n)

    # Return the recommended item IDs
    recommended_items = [dataset.iloc[i] for i in indices.squeeze()]
    return recommended_items

# Example usage
user_input = input("Enter the details of the property you are looking for: ")
recommendations = get_recommendations(user_input)

# Create a DataFrame with the recommended item details
recommendations_df = pd.DataFrame(recommendations)

# Reorder the columns if they exist in the dataset
columns_order = [
    'property_id','location_id','location', 'city',
    'province_name', 'price','purpose','agent'
]
valid_columns = [col for col in columns_order if col in recommendations_df.columns]
recommendations_df = recommendations_df[valid_columns]

# Display the DataFrame
print(tabulate(recommendations_df, headers='keys', tablefmt='fancy_grid', showindex=False))

Enter the details of the property you are looking for: 9448025
╒═══════════════╤═══════════════╤══════════════════════╤═════════╤═════════════════╤══════════╤═══════════╤═════════════╕
│   property_id │   location_id │ location             │ city    │ province_name   │    price │ purpose   │ agent       │
╞═══════════════╪═══════════════╪══════════════════════╪═════════╪═════════════════╪══════════╪═══════════╪═════════════╡
│       9319011 │           495 │ Scheme 33            │ Karachi │ Sindh           │ 11000000 │ For Sale  │ Qasim Naqvi │
├───────────────┼───────────────┼──────────────────────┼─────────┼─────────────────┼──────────┼───────────┼─────────────┤
│       9318408 │         11233 │ P & T Colony         │ Karachi │ Sindh           │  7000000 │ For Sale  │ nan         │
├───────────────┼───────────────┼──────────────────────┼─────────┼─────────────────┼──────────┼───────────┼─────────────┤
│       9322169 │           233 │ Gulshan-e-Iqbal Town │ Karachi │ Sindh           