In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
"""
 The Zillow csv provides ZHVI for neighborhoods in the US.
 Zillow Home Value Index (ZHVI): A measure of the typical home value and market changes across a given region and housing type. 
 It reflects the typical value for homes in the 35th to 65th percentile range.
 More info about ZHVI: https://www.zillow.com/research/methodology-neural-zhvi-32128/
"""

df = pd.read_csv('Neighborhood_zillow.csv')
print("shape:", df.shape)

In [None]:
# print columns
print("Columns:")
print(df.columns.to_list())

chicago_df = df[df['City'] == "Chicago"]

# Different neighborhoods
print()
print("Chicago neighborhoods:")
print(chicago_df['RegionName'].unique()[:15])

# isolate Little Italy
print()
little_italy = chicago_df[chicago_df['RegionName'] == 'University Village - Little Italy']
print("Little Italy 2000-01", little_italy['2000-01-31'])
print("Little Italy 2024-09", little_italy['2024-09-30'])


In [None]:
# Unpivot date columns so data is more rectangular
def reshape_dates(df):
    # Identify date columns 
    date_columns = [col for col in df.columns if col.startswith('20')]
    
    # Melt dataframe to convert date columns into rows
    df_melted = df.melt(id_vars=['RegionID', 'SizeRank', 'RegionName'],
                        value_vars=date_columns,
                        var_name='Date', value_name='ZHVI')
    
    # Convert Date to datetime fromat
    df_melted['Date'] = pd.to_datetime(df_melted['Date'])

    return df_melted

reshaped_df = reshape_dates(chicago_df)
reshaped_df.head()

In [None]:
# Filter for Lake View
lake_view_df = reshaped_df[reshaped_df['RegionName'] == 'Lake View']

# Plot ZHVI over time for Lake View
plt.figure(figsize=(12, 6))
sns.lineplot(x='Date', y='ZHVI', data=lake_view_df)
plt.title("ZHVI Value Over Time for Lake View")
plt.xlabel("Date")
plt.ylabel("ZHVI")
plt.show()

In [None]:
pd.read_csv('Chicago_ZIP_Populations_2010.csv')

In [None]:
# Clean up populations csv's
def clean_population(year):
    filename = 'Chicago_ZIP_Populations_' + str(year) + '.csv'
    df = pd.read_csv(filename)

    # Transpose columns and rows
    df = df.set_index('Label (Grouping)').transpose().reset_index()

    # Rename columns
    df.columns = ['Zip Code', 'Total']

    # Remove prefix from Zip codes
    df['Zip Code'] = df['Zip Code'].str[6:]
    df['Year'] = year
    
    return df

pop_2010 = clean_population(2010)
pop_2020 = clean_population(2020)

# Combine both years into one dataframe
population_df = pd.concat([pop_2010, pop_2020], ignore_index=True)

population_df