[![lite-badge](https://jupyterlite.rtfd.io/en/latest/_static/badge.svg)](https://jupyter.org/try-jupyter)
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/zyang91/spatial_analysis/blob/main/first_class.ipynb#scrollTo=4c43e7f5)

In [None]:
import pandas as pd

In [None]:
city_names = pd.Series(["San Francisco", "San Jose", "Binghamton"])

In [None]:
population = pd.Series([850000, 1010085, 487900])

In [None]:
city_names

In [None]:
cities_df = pd.DataFrame({"City Name": city_names, "population": population})

In [None]:
cities_df

In [None]:
cities_df["City Name"]

In [None]:
cities_df["population"]

In [None]:
cities_list = ["San Francisco", "San Jose", "Binghamton"]

In [None]:
cities_list[:2]

In [None]:
cities_df

In [None]:
cities_df[:2]

In [None]:
cities_df[:1]

In [None]:
cities_df["population"].median()

In [None]:
city = pd.Series(["Binghamton", "Vestal", "JC"])

In [None]:
population = pd.Series([50000, 45000, 30000])

In [None]:
city

In [None]:
city_df = pd.DataFrame({"city name": city, "population": population})

In [None]:
city_df

In [None]:
city_df["population"].median()

In [None]:
import numpy as np

In [None]:
np.median(city_df["population"])

In [None]:
def get_large_city(population):
    return population >= 40000

In [None]:
large_city_sel = city_df["population"].apply(get_large_city)

In [None]:
large_city_sel

In [None]:
city_df["large city"] = large_city_sel

In [None]:
small_city = lambda population: population < 40000

In [None]:
city_df["small city"] = city_df["population"].apply(small_city)

In [None]:
city_df

In [None]:
city_df["large city"]

In [None]:
city_df.loc[city_df["large city"]]

In [None]:
city_df["population"] > 45000

In [None]:
city_df.loc[city_df["population"] < 45000]

In [None]:
city_df.loc[~large_city_sel]

In [None]:
city_df.query("population<45000")

In [None]:
census_df = pd.read_csv("C:\data\census\ACS_19_1YR_B25003.csv")

In [None]:
census_df.head(n=5)

In [None]:
columns = ["GEO.display-label", "HD01_VD01", "HD01_VD03"]
census_df = census_df[columns]

In [None]:
census_df.head()

In [None]:
census_df.columns = ["City", "Total", "Rentals"]

In [None]:
census_df.head(n=5)

In [None]:
census_df["Rental Rate"] = census_df["Rentals"] / census_df["Total"] * 100

In [None]:
census_df.head()

In [None]:
len(census_df)

In [None]:
top_cities_list = [
    "Philadelphia city, Pennsylvania",
    "New York city, New York",
    "Los Angeles city, California",
    "Houston city, Texas",
    "Chicago city, Illinois",
    "Phoenix city, Arizona",
]

In [None]:
census_df["City"].isin(top_cities_list)

In [None]:
census_df.loc[census_df["City"].isin(top_cities_list)]

In [None]:
census_df.query("City in @top_cities_list")

In [None]:
year_str = "19"

In [None]:
path = f"C:\data\census\ACS_{year_str}_1YR_B25003.csv"

In [None]:
print(path)

In [None]:
all_rental_data = []
start_year = 2005
stop_year = 2019
for year in range(start_year, stop_year + 1):
    print(f"loading data for year= {year}...")

In [None]:
all_rental_data = []
start_year = 2005
stop_year = 2019
for year in range(start_year, stop_year + 1):

    # ---------------------------------------------
    # Step 0: Print out year
    # The variable year changes for each iteration of the loop
    # ----------------------------------------------
    print(f"Loading data for year = {year}...")

    # ---------------------------------
    # Step 1: Read data for this year
    # ---------------------------------
    year_str = str(year)  # convert integer value of "year" to a string
    year_str = year_str[2:]  # extract the last two digits of the year string
    rental_df = pd.read_csv(f"C:\data\census\ACS_{year_str}_1YR_B25003.csv")

    # ---------------------------------
    # Step 2: Columns we want
    # ---------------------------------
    columns = ["GEO.display-label", "HD01_VD01", "HD01_VD03"]
    rental_df = rental_df[columns]

    # ---------------------------------
    # Step 3: Rename columns
    # ---------------------------------
    rental_df.columns = ["City", "Total", "Rentals"]

    # ----------------------------------
    # Step 4: Calculate the rental rate
    # ----------------------------------
    rental_df["Rental Rate"] = rental_df["Rentals"] / rental_df["Total"] * 100

    # ----------------------------------
    # Step 5: Select the cities we want
    # ----------------------------------
    selection = rental_df["City"].isin(top_cities_list)
    top_cities_df = rental_df.loc[selection].copy()

    # ----------------------------------
    # Step 6: Add a column for the year
    # ----------------------------------
    top_cities_df["Year"] = year

    # -----------------------------------
    # Step 7: Save this dataframe in the list
    # -----------------------------------
    all_rental_data.append(top_cities_df)

# ---------------------------------------------------
# Step 8: After the for loop, combine all of the
# data frames into one along the row axis
# ---------------------------------------------------
all_rental_data = pd.concat(all_rental_data, axis=0)

In [None]:
type(all_rental_data)

In [None]:
all_rental_data.head()

In [None]:
all_rental_data.tail()

In [None]:
len(all_rental_data)

In [None]:
from matplotlib import pyplot as plt

In [None]:
all_rental_data["City"].unique()

In [None]:
with plt.style.context("ggplot"):
    fig, ax = plt.subplots(figsize=(10, 6))
    for city in all_rental_data["City"].unique():
        print(city)
        this_city = all_rental_data["City"] == city
        city_df = all_rental_data.loc[this_city]
        ax.plot(city_df["Year"], city_df["Rental Rate"], label=city, linewidth=4)
    ax.legend(loc=0, ncol=3, fontsize=9)
    ax.set_ylim(30, 72)
    ax.set_ylabel("Rental Rate in Percentage")
    plt.show()

In [None]:
all_rental_data["City"] == "Philadelphia city, Pennsylvania"

In [None]:
philly = all_rental_data.loc[
    all_rental_data["City"] == "Philadelphia city, Pennsylvania"
]

philly

In [None]:
philly.loc[393]

In [None]:
philly.iloc[2]

In [None]:
all_rental_data.loc[393]

In [None]:
philly.reset_index(drop=True)

In [None]:
philly_2005 = philly.loc[philly["Year"] == 2005]


philly_2005

In [None]:
philly_2005["Rental Rate"]

In [None]:
philly_2005["Rental Rate"].iloc[0]

In [None]:
len(philly_2005["Rental Rate"])

In [None]:
philly_2019 = philly.loc[philly["Year"] == 2019].squeeze()
value_2019 = philly_2019["Rental Rate"]

In [None]:
print(value_2019)

In [None]:
philly_2005 = philly.loc[philly["Year"] == 2005].squeeze()
value_2005 = philly_2005["Rental Rate"]
value_2005

In [None]:
percent_change = (value_2019 - value_2005) / value_2005 * 100

In [None]:
print(f"change in rental rate in Philadelphia from 2005 to 2019= {percent_change}")