In [1]:
import pandas as pd
import numpy as np
import os, sys, inspect
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# add parent dir to system dir
currdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootdir = os.path.dirname(currdir)
sys.path.insert(0, rootdir)
mpl.style.use("seaborn")
from src.utils import get_temporal_data, get_spatial_data
from src.utils import create_difference_sequence

In [2]:
ffname_realestate = os.path.join(rootdir, "data", "int", "realestate_demographics.csv")
realestate_df = pd.read_csv(ffname_realestate)

ffname_zipcodes = os.path.join(rootdir, "data", "int", "zip_codes.csv")
zipcode_df = pd.read_csv(ffname_zipcodes)
# zipcode_df.insert(loc=0, column="78701", value=78701)
# zipcode_df = zipcode_df.rename(columns={"78701": "Zip Code"})
# zipcode_df.loc[40, "Zip Code"] = 78701
# zipcode_df.astype(int, inplace=True)
# zipcode_df.sort_values(by=["Zip Code"], inplace=True)
# zipcode_df.reset_index(inplace=True)
# zipcode_df.to_csv(ffname_zipcodes, columns=["Zip Code"], index=False

In [3]:
period = [2000, 2017]
figsize = (15, 30)
titlefontsize = 20
xtickfontsize = 17
ytickfontsize = 17
legendfontsize = 19

columns = ["Home Value Index",
           "Median Listing Price Per sqf",
           "Median Listing Price",
           "Median Price Cut",
           "Median Price of Reduction",
           "Median Rental Price",
           "Inventory Measure",
           "Sales",
           "Mortgage Rate_15",
           "Mortgage Rate_30",
           "Total_NewConstructions",
           "Total_RemodelsRepairs",
           "TotalCost_NewConstructions",
           "Median Income (dollars)",
           "Number of Households",]


In [4]:
# plot raw trends
for i, row in zipcode_df.iterrows():
    zipcode = row["Zip Code"]
    print(f"ploting raw trends for {zipcode}...", end="")
    df = get_temporal_data(zipcode, realestate_df)
    df = df[(df["Year"] >= period[0]).values &  (df["Year"] < period[1]).values]
    
    time_stamp = df[["Year", "Month"]].astype(str).apply(lambda x: "-".join(x), axis=1)
    time_stamp = df["Year"].astype(str)
    
    fig = plt.figure(figsize=figsize)
    ax = fig.subplots(len(columns), 1)

    for i, column in enumerate(columns):

        if column in [
            "Home Value Index",
            "Median Listing Price Per sqf",
            "Median Listing Price",
            "Median Price Cut",
            "Median Price of Reduction",
            "Median Rental Price",]:
            color = "violet"
        elif column in [
            "Inventory Measure",
            "Sales",]:
            color = "coral"
        elif column in [
            "Mortgage Rate_15",
            "Mortgage Rate_30",]:
            color = "red"
        elif column in [
            "Total_NewConstructions",
            "Total_RemodelsRepairs",
            "TotalCost_NewConstructions",]:
            color = "tan"
        elif column in [
            "Median Income (dollars)",
            "Number of Households",]:
            color = "green"

        ax[i].plot(np.arange(0, df.shape[0]), df[column].values, color=color, label=column)
        ax[i].set_xlim([0, df.shape[0]])
        ax[i].tick_params(
            axis='x',          # changes apply to the x-axis
            which='both',      # both major and minor ticks are affected
            bottom=True,      # ticks along the bottom edge are off
            top=False,         # ticks along the top edge are off
            labelbottom=False,  # # labels along the bottom edge are off
            length=5,
            width=2,
        )
        ax[i].tick_params(
            axis='y',    
            labelsize=ytickfontsize)

        if column in ["Home Value Index", "Median Listing Price", "TotalCost_NewConstructions",
                      "Median Income (dollars)", "Median Price Cut",]:
            yticks_ = ["{:3,.0f}K".format(elem) for elem in  ax[i].get_yticks()/1000]
            ax[i].set_yticklabels(yticks_)

        if column in ["Number of Households"]:
            yticks_ = ["{:2.1f}K".format(elem) for elem in  ax[i].get_yticks()/1000]
            ax[i].set_yticklabels(yticks_)
        ax[i].set_xticks(np.arange(0, df.shape[0])[::12])

    #     ax[i].grid(True, 'major', color='snow', linestyle='-', linewidth=0.5, axis="y")
    #     ax[i].grid(True, 'minor', color='snow', linestyle='-', linewidth=0.5, axis="y")
    #     ax[i].patch.set_facecolor('gainsboro')
        ax[i].legend(loc="upper center", fontsize=legendfontsize, framealpha=0.1)

    ax[i].set_xticks(np.arange(0, df.shape[0])[::12])
    ax[i].set_xticklabels(time_stamp[::12], rotation=90)
    ax[i].tick_params(
            axis='x',          
            which='both',      
            bottom=True, 
            top=False,   
            labelbottom=True,
            labelsize=xtickfontsize)
    ax[i].tick_params(
            axis='y',    
            which='both',
            right=True,
            left=True,
            labelsize=ytickfontsize)

    ax[0].set_title(f"Raw Trend Plots ({zipcode} | {period[0]}-{period[1]})",
        fontsize=titlefontsize,
        fontweight="bold"
    )
    fig.subplots_adjust(hspace=.01)
    plt.tight_layout()
    fname = f"trend-plot-raw-{period[0]}-{period[1]}-{zipcode}.png"
    fig.savefig(os.path.join(rootdir, "Reports", "Figures", fname), transparent=False)
    plt.close(fig)
    print("completed.")


ploting raw trends for 78701...completed.
ploting raw trends for 78702...completed.
ploting raw trends for 78703...completed.
ploting raw trends for 78704...completed.
ploting raw trends for 78705...completed.
ploting raw trends for 78717...completed.
ploting raw trends for 78721...completed.
ploting raw trends for 78722...completed.
ploting raw trends for 78723...completed.
ploting raw trends for 78724...completed.
ploting raw trends for 78725...completed.
ploting raw trends for 78726...completed.
ploting raw trends for 78727...completed.
ploting raw trends for 78728...completed.
ploting raw trends for 78729...completed.
ploting raw trends for 78730...completed.
ploting raw trends for 78731...completed.
ploting raw trends for 78732...completed.
ploting raw trends for 78733...completed.
ploting raw trends for 78734...completed.
ploting raw trends for 78735...completed.
ploting raw trends for 78736...completed.
ploting raw trends for 78737...completed.
ploting raw trends for 78738...com

In [5]:
# plot scaled raw trends
for i, row in zipcode_df.iterrows():
    zipcode = row["Zip Code"]
    print(f"ploting scaled trends for {zipcode}...", end="")
    df = get_temporal_data(zipcode, realestate_df)
    df = df[(df["Year"] >= period[0]).values &  (df["Year"] < period[1]).values]
    scaled_df = (df[columns]  - df[columns].min()) / (df[columns].max() - df[columns].min())
#     scaler = MinMaxScaler()
#     scaled_df = pd.DataFrame(scaler.fit_transform(scaled_df.values))

    fig = plt.figure(figsize=figsize)
    ax = fig.subplots(len(columns), 1)

    for i, column in enumerate(columns):

        if column in [
            "Home Value Index",
            "Median Listing Price Per sqf",
            "Median Listing Price",
            "Median Price Cut",
            "Median Price of Reduction",
            "Median Rental Price",]:
            color = "violet"
        elif column in [
            "Inventory Measure",
            "Sales",]:
            color = "coral"
        elif column in [
            "Mortgage Rate_15",
            "Mortgage Rate_30",]:
            color = "red"
        elif column in [
            "Total_NewConstructions",
            "Total_RemodelsRepairs",
            "TotalCost_NewConstructions",]:
            color = "tan"
        elif column in [
            "Median Income (dollars)",
            "Number of Households",]:
            color = "green"

        ax[i].plot(np.arange(0, scaled_df.shape[0]), scaled_df[column].values, color=color, label=column)
        ax[i].set_xlim([0, scaled_df.shape[0]])
        ax[i].tick_params(
            axis='x',          # changes apply to the x-axis
            which='both',      # both major and minor ticks are affected
            bottom=True,      # ticks along the bottom edge are off
            top=False,         # ticks along the top edge are off
            labelbottom=False,  # # labels along the bottom edge are off
            length=5,
            width=2,
        )
        ax[i].tick_params(
            axis='y',    
            labelsize=ytickfontsize)

        ax[i].set_xticks(np.arange(0, scaled_df.shape[0])[::12])

    #     ax[i].grid(True, 'major', color='snow', linestyle='-', linewidth=0.5, axis="y")
    #     ax[i].grid(True, 'minor', color='snow', linestyle='-', linewidth=0.5, axis="y")
    #     ax[i].patch.set_facecolor('gainsboro')
        ax[i].legend(loc="upper center", fontsize=legendfontsize, framealpha=0.1)

    ax[i].set_xticks(np.arange(0, scaled_df.shape[0])[::12])
    ax[i].set_xticklabels(time_stamp[::12], rotation=90)
    ax[i].tick_params(
            axis='x',          
            which='both',      
            bottom=True, 
            top=False,   
            labelbottom=True,
            labelsize=xtickfontsize)
    ax[i].tick_params(
            axis='y',    
            which='both',
            right=True,
            left=True,
            labelsize=ytickfontsize)

    ax[0].set_title(f"Raw Trend Plots ({zipcode} | {period[0]}-{period[1]})",
        fontsize=titlefontsize,
        fontweight="bold"
    )
    fig.subplots_adjust(hspace=.01)
    plt.tight_layout()
    fname = f"trend-plot-scaled-{period[0]}-{period[1]}-{zipcode}.png"
    fig.savefig(os.path.join(rootdir, "Reports", "Figures", fname), transparent=False)
    plt.close(fig)
    print("completed.")


ploting scaled trends for 78701...completed.
ploting scaled trends for 78702...completed.
ploting scaled trends for 78703...completed.
ploting scaled trends for 78704...completed.
ploting scaled trends for 78705...completed.
ploting scaled trends for 78717...completed.
ploting scaled trends for 78721...completed.
ploting scaled trends for 78722...completed.
ploting scaled trends for 78723...completed.
ploting scaled trends for 78724...completed.
ploting scaled trends for 78725...completed.
ploting scaled trends for 78726...completed.
ploting scaled trends for 78727...completed.
ploting scaled trends for 78728...completed.
ploting scaled trends for 78729...completed.
ploting scaled trends for 78730...completed.
ploting scaled trends for 78731...completed.
ploting scaled trends for 78732...completed.
ploting scaled trends for 78733...completed.
ploting scaled trends for 78734...completed.
ploting scaled trends for 78735...completed.
ploting scaled trends for 78736...completed.
ploting sc