### Market Level Forecasting

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import preprocessing
from preprocessing import preprocessingMLS, featureEngineering
from getGoogleTrends import getGoogleTrendsData
import models
from imp import reload
import matplotlib.ticker as ticker

### Step 1: Read and preprocess data
House market data, Google Trends, and Economic indicators

In [3]:
data_dir = 'index-team-data/denver/'

In [4]:
#read in market data
process1 = preprocessingMLS(data_dir+'listing_dates_with_ct.csv')
monthData = process1.get_monthly_data(columns=['count_sale','count_list','sale_price'],start_date='2016-03', end_date='2020-09')


#get google trends data from API/read in data
#----------------------------------------------------
#gt = getGoogleTrendsData(geo='US-GA',city='Atlanta')
#trend = gt.getData(times=10,date='2016-03-01 2020-09-30',scaled=True)
#trend.to_csv('google_trends_atlanta_scaled.csv')
#----------------------------------------------------
trend = pd.read_csv(data_dir+'google_trends/google_trends_scaled.csv')


#read in economics indicator features, if needed
#econ = pd.read_csv('econ_data.csv')

merged = pd.merge(monthData, trend).drop(['Unnamed: 0'],axis=1)

In [5]:
#create lag features, pct change features and one hot month features with featureEngineering
feature_column_names = merged.columns.delete(0)
fe = preprocessing.featureEngineering(merged,feature_column_names)
fe.create_lag_features(lag_num_list=[1,3])
fe.create_pct_change_features(lag_num_list=[1])
fe.create_month_one_hot()
df = fe.get_dataFrame()
feature_column_names = fe.get_feature_names()
print(len(feature_column_names))

136


### Step 2: Make predictions

### 2.1 predict 3 months ahead -- google trends

In [6]:
# predict 3 months ahead -- google trends
target_col = 'count_sale'
horizon = 3
model = models.rollingModel(df,horizon,target_col,feature_column_names,
                            modelName='linear',lead_target=True)
pred, pred_boot, pred_scale, pred_samples = model.fit_predict(df[feature_column_names], df[target_col])

<models.linearModel object at 0x7f8fa98a46d0>
<models.linearModel object at 0x7f8f9e48f2d0>
<models.linearModel object at 0x7f8f9e48f290>
prediction_horizon 1


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


prediction_horizon 2


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


prediction_horizon 3


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


### 2.2 predict 3 months ahead -- time series

In [7]:
#predict 3 months ahead -- Time series
model = models.BTSM()
model.fit(df['count_sale'].values.astype(float),0)
pred_ts, pred_ts_scale, pred_ts_samples = model.predict(np.ones(3))

Instructions for updating:
`AffineScalar` bijector is deprecated; please use `tfb.Shift(loc)(tfb.Scale(...))` instead.
Instructions for updating:
Do not call `graph_parents`.
Instructions for updating:
`MultivariateNormalFullCovariance` is deprecated, use `MultivariateNormalTriL(loc=loc, scale_tril=tf.linalg.cholesky(covariance_matrix))` instead.
Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.
Instructions for updating:
Previously, `initial_step` would return the value passed to the constructor, even if that value was not `Tensor`-like. This behavior is now deprecated. In the future, it will always return a `Tensor`-like object.
Instructions for updating:
The signature for `trace_fn`s passed to `minimize` has changed. Trace functions now take a single `traceable_quantities` argument, which is a `tfp.math.MinimizeTraceableQuantities` namedtuple containing `traceable_quantities.loss`, `traceable_quantities.gradients`, etc. Please update your `trace_

### 2.3 predict 3 months ahead -- linear combination

In [8]:
#combine the two prediciton
#params may differ in different market
params = [0.6,0.4]
pred_com = params[0]*np.array(pred)+params[1]*pred_ts
pred_com_scale = params[0]*pred_scale+params[1]*pred_ts_scale

print(pred_com)

[7878.82040817 6472.99039788 6418.25346059]


### 2.4 predict 6 months ahead  -- number of sales

In [9]:
#predict 6 months ahead
model = models.rollingCombinedModel(df=df,predict_horizon_total=6, predict_horizon=3,
                                    feature_column_names = feature_column_names,
                                   target_column = target_col, modelName='linear',
                                   lead_target=True)

<models.linearModel object at 0x7f8fbd70dbd0>
<models.linearModel object at 0x7f8f93b69310>
<models.linearModel object at 0x7f8f91f4cc90>


In [10]:
pred6_sale, pred6_scale_sale, pred6_samples_sale = model.make_prediction(df[feature_column_names], df[target_col])

prediction_horizon 1


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


prediction_horizon 2


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


prediction_horizon 3


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


In [11]:
pred6_samples_sale.shape

(50, 6)

### 2.5 predict 6 months ahead  -- number of new listings

In [12]:
target_col = 'count_list'
#predict 6 months ahead
model = models.rollingCombinedModel(df=df,predict_horizon_total=6, predict_horizon=3,
                                    feature_column_names = feature_column_names,
                                   target_column = target_col, modelName='linear',
                                   lead_target=True)

<models.linearModel object at 0x7f8f8f7a3790>
<models.linearModel object at 0x7f8f8f7a3c10>
<models.linearModel object at 0x7f8f93616090>


In [13]:
pred6_listing, pred6_scale_listing, pred6_samples_listing = model.make_prediction(df[feature_column_names], df[target_col])

prediction_horizon 1


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


prediction_horizon 2


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


prediction_horizon 3


  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


In [14]:
pred6_samples_listing.shape

(50, 6)

### Step 3: Save the prediction samples for further use

In [15]:
for i in range(6):
    save = pd.DataFrame(columns=['number of sales', 'number of listings'])
    save['number of sales'] = pred6_samples_sale[:,i]
    save['number of listings'] = pred6_samples_listing[:,i]
    save.to_csv('outputs/denver/'+str(i+1)+'_month_market_level_forecast_samples.csv', index=False)