In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta 
from dateutil.relativedelta import relativedelta
from src.func_data_engineering import *
from src.func_data_diagnosis import *

pd.set_option('display.max_columns', None)

# auto load modules
%load_ext autoreload
%autoreload

%cd C:\Users\tyler\repos\airbnb-forecast

In [None]:
# read listing data
path = r'data\test'
listing_name = "listings*.csv"
listings = read_data(path,listing_name, 'last_scraped', listing_data = True )
df_listing = pd.concat(listings,axis=0,ignore_index=True)

In [None]:
# read calendar data
cal_name = "calendar*.csv"
calendars = read_data(path, cal_name, 'date')

# for each monthly scrape, only keep the calendar data b/w the scraped date and the date of next scrape run
cal_months = []
num_cal = len(calendars)
for i in range(num_cal):
    if i < num_cal - 1:
        date_end = calendars[i + 1].SCRAPED_DATE[0]
        df = calendars[i]
        df = df[df.date < date_end]
    else:
        df = calendars[i]
        date_start = pd.to_datetime(df.SCRAPED_DATE[0]).date()
        date_end = date_start + relativedelta.relativedelta(months=1)
        df = df[df.date < str(date_end)]
    cal_months.append(df)

df_cal = pd.concat(cal_months, axis=0, ignore_index=True)

In [None]:
# cleanse listing data
df_listing = cleanse_data(df_listing)

In [None]:
# cleanse calendar data

df_cal = cleanse_data(df_cal, list_data=False)

In [None]:
# aggregate calendar data to monthly
df_cal = agg_to_monthly(df_cal)
df_cal.shape

In [None]:
# merge data
df_listing['YEAR_MONTH'] = df_listing.SCRAPED_DATE.str[:7]
df_data = df_cal.merge(df_listing, on = ['ID', 'YEAR_MONTH'])
df_data.info()

In [None]:
# fs list
df_list = fs_listing(df_data,output_all=True)
df_list.info()

In [None]:
# fs_price
df_price = fs_price(df_data, monthly=True)
df_price.info()

In [None]:
# fs time
df_time = fs_time(df_data, output_all=True)
df_time.info()

In [None]:
# fs host
df_host = fs_host(df_data, output_all=True)
df_host.info()

In [None]:
# fs location
df_location = fs_location(df_data, output_all=True)
df_location.info()

In [None]:
# fs review
df_review = fs_review(df_data, output_all=True)
df_review.info()

In [None]:
# fs calendar
df_calendar = fs_calendar(df_data, output_all=True)
df_calendar.info()

In [None]:
# fs booked
df_booked = fs_booked(df_data, output_all=True)
df_booked.info()

In [None]:
engine = connect_my_db('secrets/db_string')

In [None]:
fs_upload(engine,df_ls,'FS_LIST')

In [None]:
fs_upload(engine,df_host,'FS_HOST')

In [None]:
fs_upload(engine,df_review,'FS_REVIEW')

In [None]:
fs_upload(engine,df_location,'FS_LOCATION_RAW')