In [1]:
from google.colab import drive

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
import re
import random

df = pd.read_csv("hotel_bandung_english.csv")
df.head()

Unnamed: 0,name,address,description
0,Capital O 253 Topas Galeria Hotel,"Jl. Dr. Djundjunan No. 153, 40173 Bandung, Ind...","A 10-minute drive from Bandung Airport, Topas ..."
1,Sheraton Bandung Hotel & Towers,"Jl. Ir H Juanda 390, 40135 Bandung, Indonesia",Sheraton Hotel & Towers offers 5-star accommod...
2,OYO 794 Ln 9 Bandung Residence,"Jalan Lemahnendeut No 9, Sukajadi, 40164 Bandu...","Conveniently located in Sukajadi, Bandung, OYO..."
3,OYO 226 LJ hotel,"Jl. Malabar No.2, Malabar, Lengkong, Dago, Asi...","Featuring a shared lounge, OYO 226 LJ hotel is..."
4,OYO 230 Maleo Residence,"JI. Dangeur Indah II No. 15, Sukagalih, Sukaja...",Attractively set in the Sukajadi district of B...


In [3]:
df.describe()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105 entries, 0 to 104
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   name         105 non-null    object
 1   address      105 non-null    object
 2   description  105 non-null    object
dtypes: object(3)
memory usage: 2.6+ KB


In [5]:
def print_description(index):
    example = df[df.index == index][['description', 'name', 'address']].values[0]
    if len(example) > 0:
        print(example[0])
        print('Nama:', example[1])
        print('Alamat:', example[2])

In [6]:
print_description(1)

Sheraton Hotel & Towers offers 5-star accommodation in the middle of a green landscape in Bandung. All spacious rooms come with a flat-screen cable TV. The hotel offers an outdoor pool, spa center and restaurant with mountain views. Wi-Fi access is available free in all areas of the hotel. Elegant rooms have modern interiors, light wood furnishings and large windows. Each provides a comfortable seating area, DVD player and private bathroom with shower. You can work out in the gym or enjoy body treatments at the spa. Reception staff are ready to serve your needs for 24 hours. International and Asian dishes are offered at Feast Restaurant, while soft drinks are served at Samsara Lounge. A variety of cocktails and snacks are also available at Poolside Terrace. Sheraton Bandung Hotel & Towers is a 10-minute drive from Juanda Culture Park and Dago area, where various factory outlets are located. Husein Sastranegara Airport is a 30-minute drive away.
Nama: Sheraton Bandung Hotel & Towers
Ala

In [7]:
print_description(50)

Featuring an outdoor pool and a restaurant, House-Sangkuriang is conveniently located just a 5-minute walk from Dago’s factory outlets. It has a 24-hour front desk and provides free Wi-Fi access in all areas. Elegant and warmly lit, the air-conditioned rooms in House-Sangkuriang include hardwood floors. A flat-screen satellite TV, an electric kettle and a free one-time minibar are among the in-room comforts, and a shower, slippers and a hairdryer are included in the private bathrooms. The hotel also serves daily afternoon tea in the lobby and on the pool terrace. Cihampelas Walk Mall is a 10-minute drive from the property, and Husein Sastranegara Airport is a 20-minute drive away. Airport transportation can be arranged upon request. The staff at the front desk can assist with valet parking and luggage storage. Housing a business center, the hotel also provides laundry service for a fee. International dishes are served at Dining Room. Guests can also dine in the comfort of their rooms.


In [9]:
import nltk
nltk.download('stopwords')
clean_spcl = re.compile('[/(){}\[\]\|@,;]')
clean_symbol = re.compile('[^0-9a-z #+_]')
stopworda = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower() # lowercase text
    text = clean_spcl.sub(' ', text)
    text = clean_symbol.sub('', text)
    text = ' '.join(word for word in text.split() if word not in stopworda) # hapus stopword dari kolom deskripsi
    return text

# Buat kolom tambahan untuk data description yang telah dibersihkan
df['desc_clean'] = df['description'].apply(clean_text)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [10]:
# Deskripsi kedua (Setelah preprocessing)
def print_description_clean(index):
    example = df[df.index == index][['desc_clean', 'name', 'address']].values[0]
    if len(example) > 0:
        print(example[0])
        print('Nama:', example[1])
        print('Alamat:', example[2])

In [11]:
print_description_clean(1)

sheraton hotel towers offers 5star accommodation middle green landscape bandungall spacious rooms come flatscreen cable tvthe hotel offers outdoor pool spa center restaurant mountain viewswifi access available free areas hotelelegant rooms modern interiors light wood furnishings large windowseach provides comfortable seating area dvd player private bathroom showeryou work gym enjoy body treatments spareception staff ready serve needs 24 hoursinternational asian dishes offered feast restaurant soft drinks served samsara loungea variety cocktails snacks also available poolside terracesheraton bandung hotel towers 10minute drive juanda culture park dago area various factory outlets locatedhusein sastranegara airport 30minute drive away
Nama: Sheraton Bandung Hotel & Towers
Alamat: Jl. Ir H Juanda 390, 40135 Bandung, Indonesia


In [12]:
print_description_clean(50)

featuring outdoor pool restaurant housesangkuriang conveniently located 5minute walk dagos factory outlets 24hour front desk provides free wifi access areas elegant warmly lit airconditioned rooms housesangkuriang include hardwood floors flatscreen satellite tv electric kettle free onetime minibar among inroom comforts shower slippers hairdryer included private bathrooms hotel also serves daily afternoon tea lobby pool terrace cihampelas walk mall 10minute drive property husein sastranegara airport 20minute drive away airport transportation arranged upon request staff front desk assist valet parking luggage storage housing business center hotel also provides laundry service fee international dishes served dining room guests also dine comfort rooms
Nama: House Sangkuriang
Alamat: Jl. Sangkuriang no.1 Dago, Kecamatan Coblong, 40135 Bandung, Indonesia


In [13]:
df.set_index('name', inplace=True)
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['desc_clean'])
cos_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
cos_sim

array([[1.        , 0.02250818, 0.01254879, ..., 0.01044102, 0.04017144,
        0.03531754],
       [0.02250818, 1.        , 0.01040992, ..., 0.01269843, 0.02856891,
        0.01847406],
       [0.01254879, 0.01040992, 1.        , ..., 0.12575247, 0.01082423,
        0.02511644],
       ...,
       [0.01044102, 0.01269843, 0.12575247, ..., 1.        , 0.01065003,
        0.02392556],
       [0.04017144, 0.02856891, 0.01082423, ..., 0.01065003, 1.        ,
        0.03826221],
       [0.03531754, 0.01847406, 0.02511644, ..., 0.02392556, 0.03826221,
        1.        ]])

In [14]:
# Set index utama di kolom 'name'
indices = pd.Series(df.index)
indices[:50]

0                Capital O 253 Topas Galeria Hotel
1                  Sheraton Bandung Hotel & Towers
2                   OYO 794 Ln 9 Bandung Residence
3                                 OYO 226 LJ hotel
4                          OYO 230 Maleo Residence
5                        OYO 167 Dago's Hill Hotel
6                   OYO 794 Ln 9 Bandung Residence
7                       OYO 196 Horizone Residence
8     OYO 483 Flagship Tamansari Panoramic Bandung
9               OYO 295 Grha Ciumbuleuit Residence
10                            OYO 193 SM Residence
11              Capital O 874 Hotel Nyland Pasteur
12                            OYO 352 Sabang Hotel
13                                  Hilton Bandung
14             InterContinental Bandung Dago Pakar
15                                Aryaduta Bandung
16               Art Deco Luxury Hotel & Residence
17                            Crowne Plaza Bandung
18          Best Western Premier La Grande Bandung
19                         éL R

In [15]:
def recommendations(name, cos_sim = cos_sim):

    recommended_hotel = []

    # Mengambil nama hotel berdasarkan variabel indicies
    idx = indices[indices == name].index[0]

    # Membuat series berdasarkan skor kesamaan
    score_series = pd.Series(cos_sim[idx]).sort_values(ascending = False)

    # mengambil index dan dibuat 10 baris rekomendasi terbaik
    top_10_indexes = list(score_series.iloc[1:11].index)

    for i in top_10_indexes:
        recommended_hotel.append(list(df.index)[i])

    return recommended_hotel

In [16]:
recommendations('Benua Hotel')

['FOX Lite Hotel Metro Indah Bandung',
 'InterContinental Bandung Dago Pakar',
 'Zest Sukajadi Hotel Bandung',
 'M Premiere Hotel Dago Bandung',
 'Ibis Bandung Pasteur',
 'Serela Cihampelas Hotel',
 'Grand Cordela Hotel Bandung ',
 'Favehotel Hyper Square',
 'HARRIS Hotel & Conventions Ciumbuleuit - Bandung',
 'Hemangini Hotel Bandung']

In [17]:
recommendations("Serela Cihampelas Hotel")

['Vio Cihampelas',
 'Grand Sovia Hotel',
 'Neo Dipatiukur Bandung',
 'Grand Tjokro Bandung',
 'HARRIS Hotel & Conventions Ciumbuleuit - Bandung',
 'InterContinental Bandung Dago Pakar',
 'Ibis Bandung Pasteur',
 'Tebu Hotel Bandung',
 'Aryaduta Bandung',
 'Benua Hotel']