In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import json

In [18]:
# Load the JSON data
with open('../data/data_kost.json', 'r', encoding='utf-8') as f:
    kost_data = json.load(f)

# Create a list to store the transformed data
transformed_data = []

# Function to check for specific facility
def check_facility(facilities, keyword):
    return 'Yes' if any(keyword.lower() in facility.lower() for facility in facilities) else 'No'

# Iterate over each kost and transform the data
for kost_index, kost in kost_data.items():
    common_data = {
        'kost_index': kost_index,
        'name': kost.get('name'),
        'address': kost.get('address'),
        'gender': kost.get('gender'),
        'furnished': kost.get('furnished'),
        'price': kost.get('price'),
        'description': kost.get('description'),
        'parking_area': check_facility(kost.get('facilities', []), 'parkir'),
        'kitchen_room': check_facility(kost.get('facilities', []), 'dapur'),
        'wifi': check_facility(kost.get('facilities', []), 'wifi'),
        'guest_room': check_facility(kost.get('facilities', []), 'tamu'),
        'laundry': check_facility(kost.get('facilities', []), 'laundry'),
        'rooftop': check_facility(kost.get('facilities', []), 'rooftop')
    }
    for room_index, room in enumerate(kost.get('rooms', [])):
        room_data = {
            'room_index': room_index,
            'room_name': room.get('room_name'),
            'room_size': room.get('room_size'),
            'room_info': ', '.join(room.get('room_info', [])),
            'room_facilities': ', '.join(room.get('room_facilities', [])),
            'room_price': room.get('room_price')
        }
        transformed_data.append({**common_data, **room_data})

# Convert the list to a DataFrame
df = pd.DataFrame(transformed_data)

# Check the DataFrame
print(df)

    kost_index                                              name  \
0            0                      Kost Ibu Ani Coblong Bandung   
1            1  Kost Griya Sidiq Jl Cisitu Indah Coblong Bandung   
2            2                   Tamsis 32 Gatot Subroto Bandung   
3            2                   Tamsis 32 Gatot Subroto Bandung   
4            2                   Tamsis 32 Gatot Subroto Bandung   
..         ...                                               ...   
142        767                          Kost Exclusive Bergengsi   
143        767                          Kost Exclusive Bergengsi   
144        767                          Kost Exclusive Bergengsi   
145        767                          Kost Exclusive Bergengsi   
146        767                          Kost Exclusive Bergengsi   

                                               address       gender  \
0    Jl. Imperial 2 No.7, Dago Asri, Kecamatan Cobl...   Kost Putri   
1    Komp Griya Sidiq Jl Cisitu Indah Dal

In [19]:
import re

# Load the JSON data
with open('../data/data_kost.json', 'r', encoding='utf-8') as f:
    kost_data = json.load(f)

# Function to check for specific facility
def check_facility(facilities, keyword):
    return 'Yes' if any(keyword.lower() in facility.lower() for facility in facilities) else 'No'

# Function to extract information from room_info
def extract_room_info(room_info):
    electricity = 'Yes'
    bathroom = 'Outdoor'
    bed_size = ''
    
    for info in room_info:
        if 'tidak termasuk listrik' in info.lower():
            electricity = 'No'
        if 'Kamar mandi dalam' in info:
            bathroom = 'Indoor'
        if '(' in info and ')' in info:
            bed_size = info.split('(')[1].split(')')[0]
    
    return electricity, bathroom, bed_size

# Create lists to store the transformed data
kost_room_data = []
kost_data_list = []
facility_data_list = []

room_id_counter = 0

# Iterate over each kost and transform the data
for kost_index, kost in kost_data.items():
    kost_id = kost_index
    common_data = {
        'id': kost_id,
        'name': kost.get('name'),
        'street': kost.get('address').split(',')[0],
        'subdistrict': kost.get('address').split(',')[1].strip() if len(kost.get('address').split(',')) > 1 else '',
        'district': re.sub(r'^(Kecamatan|Kec\.)\s*', '', kost.get('address').split(',')[2].strip()) if len(kost.get('address').split(',')) > 2 else '',
        'description': kost.get('description'),
        'gender': 'Putri' if kost.get('gender') == 'Kost Putri' else 'Putra' if kost.get('gender') == 'Kost Putra' else 'Campur',
        'furnished': kost.get('furnished')
    }
    kost_data_list.append(common_data)
    
    facility_data = {
        'id': kost_id,
        'parking_area': check_facility(kost.get('facilities', []), 'parkir'),
        'kitchen': check_facility(kost.get('facilities', []), 'dapur'),
        'wifi': check_facility(kost.get('facilities', []), 'wifi'),
        'guest_room': check_facility(kost.get('facilities', []), 'tamu'),
        'laundry': check_facility(kost.get('facilities', []), 'laundry')
    }
    facility_data_list.append(facility_data)

    for room_index, room in enumerate(kost.get('rooms', [])):
        room_info = room.get('room_info', [])
        electricity, bathroom, bed_size = extract_room_info(room_info)
        room_size = re.findall(r'\d+\.?\d*', room.get('room_size', ''))[0] if room.get('room_size') else ''
        room_price = re.sub(r'Rp\s*|/bulan', '', room.get('room_price', '').replace('.', ''))
        room_data = {
            'id_room': room_id_counter,
            'kost_id': kost_id,
            'name': room.get('room_name'),
            'size': room_size,
            'including_electricity': electricity,
            'bathroom': bathroom,
            'bedsize': bed_size,
            'facilities': ', '.join(room.get('room_facilities', [])),
            'price': room_price
        }
        kost_room_data.append(room_data)
        room_id_counter += 1

# Save the transformed data into JSON files
with open('../data/kost_room.json', 'w', encoding='utf-8') as f:
    json.dump(kost_room_data, f, ensure_ascii=False, indent=4)

with open('../data/kost.json', 'w', encoding='utf-8') as f:
    json.dump(kost_data_list, f, ensure_ascii=False, indent=4)

with open('../data/facility.json', 'w', encoding='utf-8') as f:
    json.dump(facility_data_list, f, ensure_ascii=False, indent=4)

print("Data successfully split into kost_room.json, kost.json, and facility.json")

Data successfully split into kost_room.json, kost.json, and facility.json
