In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
destinations = pd.read_csv("data.csv")

In [3]:
destinations.head()

Unnamed: 0,City,Rating,Description,Img Link,Best Time
0,Manali,4.5,"With spectacular valleys, breathtaking views,...",https://www.holidify.com/images/bgImages/MANAL...,October to June
1,Leh Ladakh,4.6,Ladakh is a union territory in the Kashmir re...,https://www.holidify.com/images/bgImages/LADAK...,Jun to Sep
2,Coorg,4.2,Located amidst imposing mountains in Karnatak...,https://www.holidify.com/images/bgImages/COORG...,October to March
3,Andaman,4.5,Replete with turquoise blue water beaches and...,https://www.holidify.com/images/bgImages/ANDAM...,October to Jun
4,Lakshadweep,4.0,"Formerly known as Laccadive Islands, Lakshadw...",https://www.holidify.com/images/bgImages/LAKSH...,October to Jun


## Data Cleaning

In [4]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [5]:
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

[nltk_data] Error loading stopwords: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>
[nltk_data] Error loading wordnet: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>
[nltk_data] Error loading punkt: <urlopen error [Errno 11001]
[nltk_data]     getaddrinfo failed>


False

In [6]:
stop_words = stopwords.words('english')
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # remove URLs and hashtags
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"#\w+", "", text)
    # remove non-alphanumeric characters and lowercasing
    text = re.sub(r"[^a-zA-Z0-9]", " ", text.lower())
    # tokenize the text
    words = nltk.word_tokenize(text)
    # remove stop words and lemmatize the words
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    # join the words back into a string
    text = " ".join(words)
    return text

In [7]:
destinations['Clean_description'] = destinations['Description'].apply(preprocess_text)

In [8]:
pd.set_option('display.max_colwidth', None)

In [9]:
destinations['Clean_description']

0                         spectacular valley breathtaking view snowcapped mountain lush forest oak deodar pine manali magical hill station northern end kullu valley himachal pradesh gifted world mighty himalaya known one popular destination
1                  ladakh union territory kashmir region india formerly falling state jammu kashmir ladakh administered union territory 31st october 2019 extending siachen glacier main great himalaya ladakh land like dominated dramatic land
2                             located amidst imposing mountain karnataka perpetually misty landscape coorg popular coffee producing hill station popular beautiful green hill stream cutting right also stand popular destination culture people
3                      replete turquoise blue water beach bit history andaman nicobar island little slice paradise tucked around 1 400 km away east coast mainland india port blair capital union territory major airport seaport connected rest
4                       formerly kno

## Text Representation

In [10]:
vectorizer = TfidfVectorizer()
destination_vectors = vectorizer.fit_transform(destinations['Clean_description'])

## Text Similarity

In [11]:
def recommend_destinations(query, n=10):
    query_vector = vectorizer.transform([query])
    similarity_scores = cosine_similarity(query_vector, destination_vectors).flatten()
    ranked_indices = similarity_scores.argsort()[::-1][:n]
    recommended_destinations = destinations.iloc[ranked_indices]
    return recommended_destinations

## Recommendation

In [12]:
# query = "I love clubs. #nightlife"
# query = "clubs #nightlife"
# query = "I love to visit temples"
# query = "I love beaches. #boating"
query = "temple, pilgrimage, peace" 
recommend_destinations(query)

Unnamed: 0,City,Rating,Description,Img Link,Best Time,Clean_description
61,Tirupati,4.5,"Situated in the Chittoor district of Andhra Pradesh, Tirupati is known for Lord Venkateshwara Temple, one of the most visited pilgrimage centers in the world. It is also one of the world's richest temples known by different names - Tirumala Temple, Tirupati Temple, and Tirupati Balaji Temple. Tiruma...",https://www.holidify.com/images/bgImages/TIRUPATI.jpg,Throughout the year,situated chittoor district andhra pradesh tirupati known lord venkateshwara temple one visited pilgrimage center world also one world richest temple known different name tirumala temple tirupati temple tirupati balaji temple tiruma
62,Ujjain,4.4,"Ujjain, considered to be one of the holiest cities of India, is an ancient city situated on the eastern bank of the Shipra River in the Malwa region of Madhya Pradesh. Ujjain is one of the four sites for the Kumbh Mela, the largest peace-time gathering on the planet that attracts 100 million people ...",https://www.holidify.com/images/bgImages/UJJAIN.jpg,July to March,ujjain considered one holiest city india ancient city situated eastern bank shipra river malwa region madhya pradesh ujjain one four site kumbh mela largest peace time gathering planet attracts 100 million people
56,Bodh Gaya,4.4,"Bodh Gaya is a Buddhist pilgrimage site in Gaya District of Bihar. Famous for the Mahabodhi Temple, It was here under the Bodhi tree that Gautama Buddha attained enlightenment.",https://www.holidify.com/images/bgImages/BODH-GAYA.jpg,October to June,bodh gaya buddhist pilgrimage site gaya district bihar famous mahabodhi temple bodhi tree gautama buddha attained enlightenment
83,Bhubaneswar,4.1,"Bhubaneswar, the temple city of India, once known for its architecture and grand temples, is presently a thriving centre for commerce and business.",https://www.holidify.com/images/bgImages/BHUBANESWAR.jpg,October to June,bhubaneswar temple city india known architecture grand temple presently thriving centre commerce business
88,Puri,4.3,Puri in Odisha is one of the four must-visit pilgrimage sites for Hindus because of Jagannath Temple that forms the part of Char Dham in India. Puri is a beach city located on the coast of Bay of Bengal which is one of the favourites among people of West Bengal for a short trip.,https://www.holidify.com/images/bgImages/PURI.jpg,October to Apr,puri odisha one four must visit pilgrimage site hindu jagannath temple form part char dham india puri beach city located coast bay bengal one favourite among people west bengal short trip
55,Amarnath,4.6,"Amarnath, located in union territory of Jammu and Kashmir is one of the most important pilgrimage in India for the worshippers of Lord Shiva. The Amarnath cave has an enshrined Shivaling formed naturally with ice, which resembles Lord Shiva.",https://www.holidify.com/images/bgImages/AMARNATH.jpg,October to March,amarnath located union territory jammu kashmir one important pilgrimage india worshipper lord shiva amarnath cave enshrined shivaling formed naturally ice resembles lord shiva
82,Thanjavur,4.4,"Also known as Tanjore or the city of temples, Thanjavur has a lot of cultural value and is known for its famous Tanjore paintings, antiques and handicrafts, textiles and saris , its Carnatic music and musical instruments and of course the temples.",https://www.holidify.com/images/bgImages/THANJEVUR.jpg,July to March,also known tanjore city temple thanjavur lot cultural value known famous tanjore painting antique handicraft textile sari carnatic music musical instrument course temple
69,Vrindavan,4.3,"One of the oldest cities on the banks of Yamuna, Vrindavan is considered to be one of the most important places of pilgrimage for the devotees of Krishna. It is said that Lord Krishna had spent his childhood in Vrindavan. The name of the city has been derived from Vrinda (meaning basil) and van (mea...",https://www.holidify.com/images/bgImages/VRINDAVAN.jpg,October to June,one oldest city bank yamuna vrindavan considered one important place pilgrimage devotee krishna said lord krishna spent childhood vrindavan name city derived vrinda meaning basil van mea
41,Haridwar,4.0,"Haridwar is one of the seven holiest cities in India, situated in Uttarakhand. It is located where the sacred river Ganga enters the Indo-Gangetic plains for the first time. Dotted with temples, ashrams and narrow lanes across the city, Haridwar is a famous Hindu temple town where millions of d...",https://www.holidify.com/images/bgImages/HARIDWAR.jpg,September to March,haridwar one seven holiest city india situated uttarakhand located sacred river ganga enters indo gangetic plain first time dotted temple ashram narrow lane across city haridwar famous hindu temple town million
84,Ajmer,3.8,"Surrounded by Aravali ranges, the city of Ajmer is most famous for the Ajmer Sharif Dargah of saint Muin-ud-din Chishti. This makes Ajmer one of the most significant places of Islamic pilgrimage in the world. Located at a distance of 130 km from Jaipur and 14 kms from Pushkar in Rajasthan,&nbsp...",https://www.holidify.com/images/bgImages/AJMER.jpg,September to March,surrounded aravali range city ajmer famous ajmer sharif dargah saint muin ud din chishti make ajmer one significant place islamic pilgrimage world located distance 130 km jaipur 14 km pushkar rajasthan nbsp


## Saving the model

In [13]:
import pickle

with open('model.pickle', 'wb') as f:
    pickle.dump(recommend_destinations, f)