In [224]:
import pandas as pd
import numpy as np
from typing import List, Dict
import tensorflow as tf
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

In [225]:
from sklearn.metrics.pairwise import cosine_similarity

In [226]:
!wget -qN https://widasofiyaa.github.io/wids-portfolio/Dataset/tourism_with_id.xlsx

In [227]:
places = pd.read_excel('tourism_with_id.xlsx')

In [228]:
places = places.sample(n=113).reset_index(drop=True)

In [229]:
places.head(10)

Unnamed: 0,Place_Id,Place_Name,Description,Category,City,Price,Rating,Cordinate,Lat,Long
0,102,Pura Taman Saraswati,Pura Taman Saraswati Ubud adalah Pura Hindu ya...,Budaya,Gianyar,10000,4.4,"{'lat': -8.5059569, 'lng': 115.2614926}",-8.505957,115.261493
1,105,Danau Batur,Danau Batur merupakan danau kawah yang berloka...,Alam,Bangli,0,4.6,"{'lat': -8.2548217, 'lng': 115.332523}",-8.254822,115.332523
2,37,Barong Tanah Kilap,Grup Sari Wisata Budaya merupakan salah satu g...,Budaya,Badung,100000,4.6,"{'lat': -8.7188273, 'lng': 115.1902701}",-8.718827,115.19027
3,24,Manta Point,Manta point adalah salah satu tempat menyelam ...,Hobi,Denpasar,150000,4.5,"{'lat': -8.7939035, 'lng': 115.527015}",-8.793904,115.527015
4,75,Museum Subak,Museum Subak adalah museum yang menyajikan inf...,Budaya,Tabanan,10000,4.2,"{'lat': -8.5453051, 'lng': 115.1380888}",-8.545305,115.138089
5,12,Taman Sukasada,"Taman Sukasada, adalah sebuat taman di banjar ...",Alam,Karangasem,10000,4.6,"{'lat': -8.4647081, 'lng': 115.6301561}",-8.464708,115.630156
6,87,Amed Beach,"Pemandangan pantai di Amed sangat fantastis, t...",Alam,Karangasem,0,4.6,"{'lat': -8.3353526, 'lng': 115.6161036}",-8.335353,115.616104
7,98,Museum Blanco,Museum lukisannya ada di objek wisata Ubud Bal...,Budaya,Gianyar,35000,4.3,"{'lat': -8.5055928, 'lng': 115.2542561}",-8.505593,115.254256
8,96,Pasar Seni Sukawati,Pasar tradisional Sukawati tempat yang sangat ...,Budaya,Gianyar,0,4.3,"{'lat': -8.5964879, 'lng': 115.2651387}",-8.596488,115.265139
9,95,Taman Kertha Gosa,Hal utama yang menjadi daya tarik tempat wisat...,Budaya,Klungkung,15000,4.5,"{'lat': -8.5356552, 'lng': 115.3854711}",-8.535655,115.385471


In [230]:
places['Description'] = places['Description'].str.replace(r'\n', '')

  """Entry point for launching an IPython kernel.


In [233]:
factory = StopWordRemoverFactory()
stopword = factory.create_stop_word_remover()

In [234]:
for i, Description in enumerate (places['Description']):
    stop = stopword.remove(Description)
    print(i,stop + '\n')

0 Pura Taman Saraswati Ubud Pura Hindu di peruntukan tempat memuja Dewi Saraswati. Di pura Taman Saraswati Ubud terdapat ukiran khas Ubud Bali, memiliki kolam bunga teratai, mengelilingi panggung pementasan tari Bali.

1 Danau Batur merupakan danau kawah berlokasi Kabupaten Bangli. Lokasinya berada dalam kaldera gunung berapi aktif, Gunung Batur namanya. Berada ketinggian 1.050 meter atas permukaan laut, cocok buat kamu mencari suasana sejuk jauh keramaian.

2 Grup Sari Wisata Budaya merupakan salah satu grup tari tradisional Bali Denpasar rutin mengadakan Pertunjukan Tari Barong Kecak. Teaternya luas memiliki pengaturan tempat duduk tepat. Pertunjukan Barong hari dimulai pukul 21:30 hari, tari kecak pukul 18:00.

3 Manta point salah satu tempat menyelam paling terkenal Nusa Penida Anda melihat pari Manta hampir sepanjang tahun. Dan panduan baik, Anda melihat lebih Manta: hiu, makro, pari marmer, pari elang, pari menyengat. Kami bahkan melihat Mola Mola sana. Kesempatan penemuan hebat 

In [235]:
MAX_TOKENS = 100

In [236]:
tfidf = tf.keras.layers.experimental.preprocessing.TextVectorization(
  standardize = 'lower_and_strip_punctuation',
  split       = 'whitespace',
  max_tokens  = MAX_TOKENS,
  output_mode ='tf-idf',
  pad_to_max_tokens=False)

In [237]:
tfidf.adapt(places['Description'])
desc = tfidf(places['Description'])

In [239]:
cosine_similarities = cosine_similarity(desc) 

In [240]:
similarities = {}

In [241]:
for i in range(len(cosine_similarities)):
    similar_indices = cosine_similarities[i].argsort()[:-50:-1] 
    similarities[places['Place_Name'].iloc[i]] = [(cosine_similarities[i][x], places['Place_Name'][x], places['Category'][x], places['Rating'][x], places['City'][x], places['Price'][x], places['Lat'][x], places['Long'][x]) for x in similar_indices][1:]


In [242]:
class ContentBasedRecommender:
    def __init__(self, matrix):
        self.matrix_similar = matrix

    def _print_message(self, place, recom_place):
        rec_items = len(recom_place)
        
        print(f'The {rec_items} recommended places for {place} are:')
        for i in range(rec_items):
            print(f"Number {i+1}:")
            print(f"{recom_place[i][1]} Category {recom_place[i][2]} with {round(recom_place[i][0], 3)} similarity score") 
            print("--------------------")
        
    def recommend(self, recommendation):
        place = recommendation['place']
        number_places = recommendation['number_places']
        recom_place = self.matrix_similar[place][:number_places]
        self._print_message(place=place, recom_place=recom_place)

In [243]:
recommedations = ContentBasedRecommender(similarities)

In [250]:
recommendation = {
    "place": places['Place_Name'].iloc[4],
    "number_places": 4 
}

In [251]:
recommedations.recommend(recommendation)

The 4 recommended places for Museum Subak are:
Number 1:
Museum Seni Agung Rai Category Budaya with 0.9909999966621399 similarity score
--------------------
Number 2:
Museum Puri Lukisan Category Budaya with 0.9879999756813049 similarity score
--------------------
Number 3:
Museum Pasifika Category nan with 0.9860000014305115 similarity score
--------------------
Number 4:
Museum Blanco Category Budaya with 0.9769999980926514 similarity score
--------------------


In [246]:
import pickle
saved_model = pickle.dumps(recommedations)
knn_from_pickle = pickle.loads(saved_model)


In [252]:
recommendation2 = {
    "place": places['Place_Name'].iloc[4],
    "number_places": 4 
}

In [253]:
knn_from_pickle.recommend(recommendation2)

The 4 recommended places for Museum Subak are:
Number 1:
Museum Seni Agung Rai Category Budaya with 0.9909999966621399 similarity score
--------------------
Number 2:
Museum Puri Lukisan Category Budaya with 0.9879999756813049 similarity score
--------------------
Number 3:
Museum Pasifika Category nan with 0.9860000014305115 similarity score
--------------------
Number 4:
Museum Blanco Category Budaya with 0.9769999980926514 similarity score
--------------------


In [249]:
with open('model.pkl', 'wb') as files:
    pickle.dump(recommedations, files)