 Input Data | Output Data |
| --- | --- |
| Region | Tokyo - city/ward, district/town/village |
| Price range (Korean standards) | Accurate price (price) |
| Room Type | Private room, Entire home/apt |
| Accommodates number of guests | Host-written accommodation description (neighborhood_overview) |
| Bathrooms (bathrooms_text) | Host identity verification (host_identity_verified) |
| Bedrooms (bedrooms) | Customer-written accommodation description (description) |
| Beds: number of beds | Minimum number of nights (minimum_nights) |
| Rating (review_scores_rating) range | Overall rating score (review_scores_rating), Number of reviews in the last 30 days (number_of_reviews_l30d), Date of the last review (last_review), Date calendar was last updated (calendar_updated) |

In [1]:
import pandas as pd
import numpy as np

In [1]:
class AirbnbInput:
    def __init__(self, user_id, review_scores_rating, neighbourhood_cleansed, room_type, accommodates, price,minimum_nights , bathrooms_text = None, bedrooms = None, beds = None):
        self.user_id = user_id
        self.review_scores_rating = review_scores_rating
        self.neighbourhood_cleansed = neighbourhood_cleansed
        self.room_type = room_type
        self.accommodates = accommodates
        self.price = price
        self.bathrooms_text = bathrooms_text
        self.bedrooms = bedrooms
        self.beds = beds
        self.minimum_nights = minimum_nights

    def __getitem__(self, key):
        # 인덱싱된 키에 따라 속성 값을 반환
        if key == 'user_id':
            return self.user_id
        elif key == 'listing_url':
            return self.listing_url
        elif key == 'description':
            return self.description
        elif key == 'review_scores_rating':
            return self.review_scores_rating
        elif key == 'neighbourhood_cleansed':
            return self.neighbourhood_cleansed
        elif key == 'room_type':
            return self.room_type
        elif key == 'accommodates':
            return self.accommodates
        elif key == 'bathrooms_text':
            return self.bathrooms_text
        elif key == 'bedrooms':
            return self.bedrooms
        elif key == 'beds':
            return self.beds
        elif key == 'price':
            return self.price
        elif key == 'minimum_nights':
            return self.minimum_nights
        else:
            raise KeyError(f"Invalid key: {key}")
        

    def __str__(self):
        return f"User ID: {self.user_id}, Review Scores Rating: {self.review_scores_rating}, Neighbourhood Cleansed: {self.neighbourhood_cleansed}, Room Type: {self.room_type}, Accommodates: {self.accommodates}, Price: {self.price}, Minimum Nights: {self.minimum_nights}"

# 총 리뷰개수 
class AirbnbOutput:
    def __init__(self, user_id, host_id, listing_url, description, neighborhood_overview, picture_url, amenities, number_of_reviews_l30d, review_scores_rating, instant_bookable, neighbourhood_cleansed, room_type, accommodates, price, bathrooms_text, bedrooms, beds, minimum_nights):
        self.user_id = user_id
        self.host_id = host_id
        self.listing_url = listing_url
        self.description = description
        self.neighborhood_overview = neighborhood_overview
        self.picture_url = picture_url
        self.amenities = amenities
        self.number_of_reviews_l30d = number_of_reviews_l30d
        self.review_scores_rating = review_scores_rating
        self.instant_bookable = instant_bookable
        self.neighbourhood_cleansed = neighbourhood_cleansed
        self.room_type = room_type
        self.accommodates = accommodates
        self.price = price
        self.bathrooms_text = bathrooms_text
        self.bedrooms = bedrooms
        self.beds = beds
        self.minimum_nights = minimum_nights

    def __getitem__(self, key):
        # 인덱싱된 키에 따라 속성 값을 반환
        if key == 'user_id':
            return self.user_id
        elif key == 'host_id':
            return self.host_id
        elif key == 'listing_url':
            return self.listing_url
        elif key == 'description':
            return self.description
        elif key == 'neighbourhood_cleansed':
            return self.neighbourhood_cleansed
        elif key == 'room_type':
            return self.room_type
        elif key == 'accommodates':
            return self.accommodates
        elif key == 'price':
            return self.price
        elif key == 'bathrooms_text':
            return self.bathrooms_text
        elif key == 'bedrooms':
            return self.bedrooms
        elif key == 'beds':
            return self.beds
        elif key == 'minimum_nights':
            return self.minimum_nights
        else:
            raise KeyError(f"Invalid key: {key}")

class HostInfo:
    def __init__(self, host_id, host_url, host_name, host_about, host_thumbnail_url, host_picture_url, host_identity_verified):
        self.host_id =  host_id
        self.host_url =  host_url
        self.host_name =  host_name
        self.host_about =  host_about
        self.host_thumbnail_url =  host_thumbnail_url
        self.host_picture_url =  host_picture_url
        self.host_identity_verified =  host_identity_verified

    def __getitem__(self, key):
        # 인덱싱된 키에 따라 속성 값을 반환
        if key == 'host_id':
            return self.host_id
        elif key == 'host_url':
            return self.host_url
        elif key == 'host_name':
            return self.host_name
        elif key == 'host_about':
            return self.host_about
        elif key == 'host_thumbnail_url':
            return self.host_thumbnail_url
        elif key == 'host_picture_url':
            return self.host_picture_url
        elif key == 'host_identity_verified':
            return self.host_identity_verified
        else:
            raise KeyError(f"Invalid key: {key}")


In [3]:
import csv
import gzip

class AirbnbRecommend:
    def __init__(self, data_file, input_data):
        self.data = []  # 데이터를 저장할 리스트
        self.load_data(data_file)  # 데이터 파일로부터 데이터 로드
        self.input_data = input_data
        self.output_data = []
        self.filter_data()  # 데이터 필터링 및 추천 알고리즘 수행

    def load_data(self, data_file):
        # Gzip 압축된 데이터 파일을 읽어서 데이터를 처리하는 로직 구현
        with gzip.open(data_file, 'rt') as file:
            csv_reader = csv.DictReader(file)  # Gzip 압축된 CSV 파일을 딕셔너리 형태로 읽음
            for row in csv_reader:                
                self.data.append(row)
  

    def filter_data(self):
        for item in self.data:
            # 입력 데이터와 필터링 대상 데이터 간의 일치 여부 확인
            if self.is_match(item):
                airbnb_output = AirbnbOutput(
                    user_id=0000,
                    host_id=item['host_id'],
                    listing_url=item['listing_url'], 
                    description=item['description'],  
                    neighborhood_overview=item['neighborhood_overview'],  
                    picture_url=item['picture_url'],  
                    amenities=item['amenities'],  
                    number_of_reviews_l30d=item['number_of_reviews_l30d'],  
                    review_scores_rating=item['review_scores_rating'],  
                    instant_bookable=item['instant_bookable'], 
                    neighbourhood_cleansed=item['neighbourhood_cleansed'],
                    room_type=item['room_type'],
                    accommodates=item['accommodates'],
                    price=item['price'],
                    bathrooms_text=item['bathrooms_text'],  
                    bedrooms=item['bedrooms'],  
                    beds=item['beds'],  
                    minimum_nights=item['minimum_nights']  
                )
                self.output_data.append(airbnb_output)

    def is_match(self, item):
        # 입력 데이터와 필터링 대상 데이터 간의 일치 여부를 검사하는 함수
        if (
            # 필수컬럼
            item['review_scores_rating'] == self.input_data.review_scores_rating and
            item['neighbourhood_cleansed'] == self.input_data.neighbourhood_cleansed and
            item['room_type'] == self.input_data.room_type and
            item['accommodates'] == self.input_data.accommodates and
            item['price'] == self.input_data.price and
            item['minimum_nights'] == self.input_data.minimum_nights and
            # 선택컬럼 
            (self.input_data.bathrooms_text is None or item['bathrooms_text'] == self.input_data.bathrooms_text) and
            (self.input_data.bedrooms is None or item['bedrooms'] == self.input_data.bedrooms) and
            (self.input_data.beds is None or item['beds'] == self.input_data.beds) 
        ):
            return True
        return False

    def get_recommend_airbnb(self):
        return self.output_data

    def __str__(self):
        return f"Recommend Airbnb List: {len(self.output_data)}"


In [4]:
data_file = '../Tokyo_Airbnb/data/listings.csv.gz'

listing_gz = pd.read_csv(data_file, compression='gzip', header=0, sep=',', quotechar='"')
listing_gz.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,197677,https://www.airbnb.com/rooms/197677,20230629055629,2023-06-29,city scrape,Rental unit in Sumida · ★4.78 · 1 bedroom · 2 ...,<b>The space</b><br />We are happy to welcome ...,,https://a0.muscache.com/pictures/38437056/d27f...,964081,...,4.83,4.53,4.79,M130003350,f,1,1,0,0,1.21
1,776070,https://www.airbnb.com/rooms/776070,20230629055629,2023-06-29,city scrape,Home in Kita-ku · ★4.98 · 1 bedroom · 1 bed · ...,We have been in airbnb since 2011 and it has g...,We love Nishinippori because is nearer to Toky...,https://a0.muscache.com/pictures/efd9f039-dbd2...,801494,...,4.98,4.83,4.91,,f,1,0,1,0,1.89
2,905944,https://www.airbnb.com/rooms/905944,20230629055629,2023-06-29,city scrape,Rental unit in Shibuya · ★4.76 · 2 bedrooms · ...,NEWLY RENOVATED property entirely for you & yo...,Hatagaya is a great neighborhood located 4 min...,https://a0.muscache.com/pictures/miso/Hosting-...,4847803,...,4.9,4.77,4.77,Hotels and Inns Business Act | 渋谷区保健所長 | 31渋健生...,t,5,5,0,0,1.49
3,1016831,https://www.airbnb.com/rooms/1016831,20230629055629,2023-06-29,city scrape,Home in Setagaya · ★4.94 · 1 bedroom · 2 beds ...,"Hi there, I am Wakana and I live with my two f...",The location is walkable distance to famous Sh...,https://a0.muscache.com/pictures/airflow/Hosti...,5596383,...,4.98,4.92,4.89,,f,1,0,1,0,1.96
4,1196177,https://www.airbnb.com/rooms/1196177,20230629055629,2023-06-29,city scrape,Home in 足立区 · ★4.71 · 1 bedroom · 1.5 shared b...,Ｓtay with host.We can help your travel.<br />B...,There are shopping mall near Senjuohashi stati...,https://a0.muscache.com/pictures/72890882/05ec...,5686404,...,4.88,4.67,4.75,,f,1,0,1,0,0.79


In [5]:
input_data = AirbnbInput(
    user_id="12345",
    review_scores_rating=listing_gz['review_scores_rating'][0],
    neighbourhood_cleansed=listing_gz['neighbourhood_cleansed'][0],
    room_type=listing_gz['room_type'][0],
    accommodates=listing_gz['accommodates'][0],
    price=listing_gz['price'][0],
    bathrooms_text=listing_gz['bathrooms_text'][0],
    bedrooms=listing_gz['bedrooms'][0],
    beds=listing_gz['beds'][0],
    minimum_nights=listing_gz['minimum_nights'][0]
)

print(input_data)

# Airbnb 추천 객체 초기화
recommendation = AirbnbRecommend(data_file, input_data)

# Airbnb 추천 목록 가져오기
recommended_listings = recommendation.get_recommend_airbnb()

# 추천된 목록 출력
for listing in recommended_listings:
    print(f"Listing Description: {listing['description']}")
    print(f"Listing Price: {listing['price']}")
    print("------------------------------")

User ID: 12345, Review Scores Rating: 4.78, Neighbourhood Cleansed: Sumida Ku, Room Type: Entire home/apt, Accommodates: 2, Price: $11,000.00, Minimum Nights: 3


In [7]:
class User:
    def __init__(self, unicode, id, password, region = None, price = None, accommodates = None, minimum_nights = None):
        self.unicode = unicode
        self.id = id
        self.password = password
        self.region = region
        self.price = price
        self.price = price
        self.accommodates = accommodates
        self.minimum_nights = minimum_nights

        def __getitem__(self, key):
        # 인덱싱된 키에 따라 속성 값을 반환
            if key == 'unicode':
                return self.unicode
            elif key == 'id':
                return self.id
            elif key == 'password':
                return self.password
            elif key == 'region':
                return self.region
            elif key == 'price':
                return self.price
            elif key == 'accommodates':
                return self.accommodates
            elif key == 'minimum_nights':
                return self.minimum_nights
            else:
                raise KeyError(f"Invalid key: {key}")
        

    def __str__(self):
        return f"User unicode: {self.unicode}, id : {self.id}, password : {self.password}, region : {self.region}, price: {self.price}, accommodates: {self.accommodates}, minimum_nights : {self.minimum_nights}"
    
    def create_csv(self):
        pass



    

In [9]:
user_list = []

for i in range(100):
    user = User(str(i), f'user{i}', '1234', None, None, None, None)
    user_list.append(user)


