In [41]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import haversine_distances, euclidean_distances
from math import radians

In [42]:
# Merge all dataset sheets
file_path = "Dataset Model.xlsx"
sheets =  pd.read_excel(file_path, sheet_name=None)
dataframes = []

for sheet_name, sheet_df in sheets.items():
  sheet_df['Kota'] = sheet_name
  dataframes.append(sheet_df)

df = pd.concat(dataframes, ignore_index=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Nama Tempat  168 non-null    object 
 1   Longitude    168 non-null    float64
 2   Latitude     168 non-null    float64
 3   Rating       168 non-null    float64
 4   Kota         168 non-null    object 
dtypes: float64(3), object(2)
memory usage: 6.7+ KB


In [43]:
# Normalization of Latitude and Longitude columns
scaler = MinMaxScaler()
pd_normalized = scaler.fit_transform(df[['Latitude', 'Longitude']])
df_normalized = pd.DataFrame(pd_normalized)
df_normalized.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       168 non-null    float64
 1   1       168 non-null    float64
dtypes: float64(2)
memory usage: 2.8 KB


In [44]:
# Define the autoencoder model
input_dim = df_normalized.shape[1]  # Number of features (2: lat, lon)
encoding_dim = 2  # Latent space dimension (can adjust this)

# Input layer
input_layer = Input(shape=(input_dim,))
# Encoding layers
encoded = Dense(128, activation='relu')(input_layer)
encoded = Dense(encoding_dim, activation='relu')(encoded)
# Decoding layers
decoded = Dense(256, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

# Autoencoder model
autoencoder = Model(input_layer, decoded)

# Encoder model (for embeddings)
encoder = Model(input_layer, encoded)

# Compile the autoencoder
autoencoder.compile(optimizer='adam', loss='mse',metrics=['accuracy'])

# Train the autoencoder
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
autoencoder.fit(df_normalized, df_normalized, epochs=100, batch_size=32, verbose=1, callbacks=[early_stopping])

Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9703 - loss: 0.1474
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9647 - loss: 0.1446 
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9707 - loss: 0.1342 
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9762 - loss: 0.1253 
Epoch 5/100


  current = self.get_monitor_value(logs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9842 - loss: 0.1126 
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9723 - loss: 0.0964 
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9744 - loss: 0.0883 
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9722 - loss: 0.0776 
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9580 - loss: 0.0641 
Epoch 10/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9848 - loss: 0.0615 
Epoch 11/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9751 - loss: 0.0560 
Epoch 12/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9651 - loss: 0.0516 
Epoch 13/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x7d7682550ca0>

In [45]:
# Get the embedding (compression representation) of the data
embeddings = encoder.predict(pd_normalized) # hidden representation of latitude and longitude
embedding_df = pd.DataFrame(embeddings, columns=['dim1', 'dim2'])
print(embedding_df)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
         dim1      dim2
0    2.162496  0.128832
1    2.142273  0.122392
2    2.231614  0.115442
3    2.214486  0.109681
4    2.257060  0.099063
..        ...       ...
163  0.747131  0.761187
164  0.780589  0.746636
165  0.733084  0.748017
166  0.807429  0.730525
167  0.798640  0.743228

[168 rows x 2 columns]


In [55]:
# Cluster embeddings
kmeans = KMeans(n_clusters=3, random_state=42)
df['Cluster'] = kmeans.fit_predict(embeddings)

In [56]:
# Function to find the closest location
def find_nearest_locations_with_rating(user_location, df, kmeans, encoder, scaler, n_neighbors, weight_distance, weight_rating):
  """
     Search for nearby locations based on geographical distance and place rating.
     Args:
        user_location (list): User location coordinates [latitude, longitude].
        df (DataFrame): DataFrame dengan kolom ['Nama Tempat', 'Latitude', 'Longitude', 'Cluster', 'Rating'].
        kmeans (KMeans): KMeans model for clustering.
        encoder: Encoder model for location embedding.
        scaler: Normalizer for input data.
        n_neighbors (int): Number of closest locations taken.
        weight_distance (float): Weight for distance (0-1).
        weight_rating (float): Weight for rating (0-1).

    Returns:
        DataFrame: Place recommendation DataFrame with columns ['Nama Tempat', 'Latitude', 'Longitude', 'Jarak_km', 'Rating'].
    """
  # Normalize and encode user location
  user_location_arr = np.array([user_location])
  user_location_normalized = scaler.transform(user_location_arr)
  user_location_embedding = encoder.predict(user_location_normalized)
  # Cluster prediction
  new_cluster = kmeans.predict(user_location_embedding)[0]
  # Coordinate to radian conversion for Haversine calculation
  def prepare_coordinates(lat, lon):
      return np.array([[radians(lat), radians(lon)]])
  user_loc_radians = prepare_coordinates(user_location[0], user_location[1])
  # Filter data based on cluster and nearest cluster
  cluster_radius = 1  # Cluster radius for search (customizable)
  nearby_clusters = np.where(
      euclidean_distances(kmeans.cluster_centers_[new_cluster].reshape(1, -1),
                          kmeans.cluster_centers_) < cluster_radius)[1]

  potential_locations = df[df['Cluster'].isin(nearby_clusters)].copy()
  # If there are no locations in adjacent clusters, use the same cluster
  if len(potential_locations) == 0:
        potential_locations = df[df['Cluster'] == new_cluster].copy()
  # Calculate the Haversine distance for potential locations
  locations_radians = np.radians(
      potential_locations[['Latitude', 'Longitude']].values
  )
  # Calculate the distance in kilometers (earth radius = 6371 km)
  distances = haversine_distances(user_loc_radians, locations_radians)[0] * 6371

  # Add distance to DataFrame and sort
  potential_locations['Jarak_km'] = distances
  # Hitung skor berdasarkan kombinasi jarak dan rating
  potential_locations['Score'] = (
      weight_distance * potential_locations['Jarak_km'] +
      weight_rating * (-potential_locations['Rating'])  # Negative rating to prioritize higher value
  )
  # Sort by score
  nearest_locations = potential_locations.nsmallest(n_neighbors, 'Score')

  # Output format
  result = nearest_locations.copy()
  result['Jarak_km'] = result['Jarak_km'].round(2)
  return result


In [60]:
# User input
Lat_inp = float(input('Masukkan Latitude Lokasi: '))
Log_inp = float(input('Masukkan Longitude Lokasi: '))
user_location = [Lat_inp, Log_inp]

Masukkan Latitude Lokasi: -7.2794
Masukkan Longitude Lokasi: 112.7889


In [61]:
# Find recommended service places
place_recommendation = find_nearest_locations_with_rating(
    user_location=user_location,
    df=df,
    kmeans=kmeans,
    encoder=encoder,
    scaler=scaler,
    n_neighbors=5,
    weight_distance=0.5,  # Distance weight
    weight_rating=0.5     # Rating weight
)

print("Rekomendasi tempat servis yang ditemukan:")
print(place_recommendation[['Nama Tempat', 'Latitude', 'Longitude', 'Jarak_km', 'Rating']])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Rekomendasi tempat servis yang ditemukan:
                                        Nama Tempat  Latitude   Longitude  \
4                              Geeko Komputer - ITS -7.279407  112.788917   
54                 Alpu Service Laptop dan Komputer -7.283019  112.797904   
65                             Rytech Comp Surabaya -7.284759  112.799222   
63                                  Frizta Computer -7.271988  112.793729   
70  Calosa Laptop & Gadget Store Klampis by Skytech -7.280223  112.776789   

    Jarak_km  Rating  
4       0.00     4.9  
54      1.07     4.9  
65      1.29     5.0  
63      0.98     4.6  
70      1.34     4.9  


