# NYC Apartment Search

_[Project prompt](https://docs.google.com/document/d/1BYVyFBDcTywdUlanH0ysfOrNWPgl7UkqXA7NeewTzxA/edit#heading=h.bpxu7uvknnbk)_

_This scaffolding notebook may be used to help setup your final project. It's **totally optional** whether you make use of this or not._

_If you do use this notebook, everything provided is optional as well - you may remove or add code as you wish._

_**All code below should be consider "pseudo-code" - not functional by itself, and only an idea of a possible approach.**_

## Setup

In [28]:
# All import statements needed for the project
import json
import pathlib
import urllib.parse
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import requests
import sqlalchemy as db
import seaborn as sns
import folium
from shapely.geometry import Point

In [29]:
# Any constants you might need; some have been added for you

# Where data files will be read from/written to - this should already exist
DATA_DIR = pathlib.Path("data")
ZIPCODE_DATA_FILE = DATA_DIR / "nyc_zipcodes" / "nyc_zipcodes.shp"
ZILLOW_DATA_FILE = DATA_DIR / "zillow_rent_data.csv"
NYC_DATA_APP_TOKEN = "SMCztjSAKApgTYc1CbAxcdksE"
DB_SCHEMA_FILE = "schema.sql"
# directory where DB queries for Part 3 will be saved
QUERY_DIR = pathlib.Path("queries")

In [30]:
# Make sure the QUERY_DIRECTORY exists
if not QUERY_DIR.exists():
    QUERY_DIR.mkdir()

## Part 1: Data Preprocessing

In [31]:
def download_nyc_geojson_data(url, force=False):
    parsed_url = urllib.parse.urlparse(url)
    url_path = parsed_url.path.strip("/")
    
    filename = DATA_DIR / url_path
    
    if force or not filename.exists():
        print(f"Downloading {url} to {filename}...")
        
        ...
        
        with open(filename, "w") as f:
            json.dump(..., f)
        print(f"Done downloading {url}.")

    else:
        print(f"Reading from {filename}...")

    return filename

In [32]:
# Read the ZIP code data file into a GeoDataFrame, keep the necessary columns: zipcode and geometry,
# and remove any rows that contain missing values. 
def load_and_clean_zipcodes():
    df = gpd.read_file(ZIPCODE_DATA_FILE)
    df['centroid'] = df.geometry.centroid
    df.geometry = df['centroid']
    columns_to_keep = ['ZIPCODE', 'geometry']
    df_subset = df[columns_to_keep].dropna()
    return df_subset

In [33]:
# Download and clean complaint 311 data
def download_and_clean_311_data():
    # Define date range for the SoQL query
    start_date = '2023-12-07T00:00:00'
    end_date = '2023-12-08T23:59:59'
    limit = 50000

    # Build the query URL with SoQL parameters
    base_url = "https://data.cityofnewyork.us/resource/erm2-nwe9.json"
    soql_query = f"?$where=created_date between '{start_date}' and '{end_date}'&$limit={limit}"
    url = base_url + soql_query
    
    headers = {
        'Accept': 'application/json', 
        'X-App-Token': NYC_DATA_APP_TOKEN  
    }

    response = requests.get(url, headers=headers)
    data = response.json()
    df = pd.json_normalize(data)

    # Keep necessary columns: unique key, date, zipcode, complaint type, latitude, and longitude
    # and create point column defined by longitude and latitude
    columns_to_keep = ['unique_key', 'created_date', 'incident_zip', 'complaint_type', 'latitude', 'longitude']
    df_subset = df[columns_to_keep].dropna()
    df_subset['created_date'] = pd.to_datetime(df_subset['created_date'])
    geometry = [Point(xy) for xy in zip(df_subset['longitude'], df_subset['latitude'])]
    df_subset = gpd.GeoDataFrame(df_subset, geometry=geometry, crs='EPSG:4326')

    return df_subset

In [34]:
# Download and clean tree data
def download_and_clean_tree_data():
    url = "https://data.cityofnewyork.us/resource/5rq2-4hqu.json"
    
    # Select necessary columns with limit on number of data
    params = {
        '$SELECT': "tree_id, status, health, zipcode, spc_common, latitude, longitude",
        '$LIMIT': 50000
    }

    # Define headers with token
    headers = {
        'Accept': 'application/json', 
        'X-App-Token': NYC_DATA_APP_TOKEN  
    }

    response = requests.get(url, params=params, headers=headers) 

    # If the request is successful, create point geometry, and convert to GeoDataFrame
    # Otherwise, print the error information and return an empty DataFrame
    if response.status_code == 200:
        data = response.json() 
        df = pd.json_normalize(data).dropna()
        geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
        gdf = gpd.GeoDataFrame(df, geometry=geometry, crs='EPSG:4326') 
        return gdf
    else:
        print("Failed to retrieve data:", response.status_code)
        print("Details:", response.text)
        return pd.DataFrame()

In [35]:
# Download and clean zillow data
def load_and_clean_zillow_data():
    df = pd.read_csv(ZILLOW_DATA_FILE)
    drop_column = ["SizeRank", "RegionID", "RegionType", "StateName", "State", "Metro"] 
    df = df.drop(columns=drop_column).dropna() # Drop unnecessary columns and drop missing values
    df = df.melt(id_vars=["RegionName", "City", "CountyName"], var_name='Date', value_name='Rent') # Melt df to convert to long format
    df = df[df["City"] == "New York"] # Filter to only NY data
    return df

In [36]:
def load_all_data():
    geodf_zipcode_data = load_and_clean_zipcodes()
    geodf_311_data = download_and_clean_311_data()
    geodf_tree_data = download_and_clean_tree_data()
    df_zillow_data = load_and_clean_zillow_data()
    return (
        geodf_zipcode_data,
        geodf_311_data,
        geodf_tree_data,
        df_zillow_data
    )

In [37]:
geodf_zipcode_data, geodf_311_data, geodf_tree_data, df_zillow_data = load_all_data()

In [38]:
# Show basic info about each dataframe
geodf_zipcode_data.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 263 entries, 0 to 262
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   ZIPCODE   263 non-null    object  
 1   geometry  263 non-null    geometry
dtypes: geometry(1), object(1)
memory usage: 4.2+ KB


In [39]:
# Show first 5 entries about each dataframe
geodf_zipcode_data.head()

Unnamed: 0,ZIPCODE,geometry
0,11436,POINT (1040696.262 185580.222)
1,11213,POINT (1001912.872 183803.113)
2,11212,POINT (1008365.375 180768.945)
3,11225,POINT (997023.193 180870.353)
4,11218,POINT (990785.291 173656.048)


In [40]:
geodf_311_data.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 19795 entries, 0 to 20098
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   unique_key      19795 non-null  object        
 1   created_date    19795 non-null  datetime64[ns]
 2   incident_zip    19795 non-null  object        
 3   complaint_type  19795 non-null  object        
 4   latitude        19795 non-null  object        
 5   longitude       19795 non-null  object        
 6   geometry        19795 non-null  geometry      
dtypes: datetime64[ns](1), geometry(1), object(5)
memory usage: 1.2+ MB


In [41]:
geodf_311_data.head()

Unnamed: 0,unique_key,created_date,incident_zip,complaint_type,latitude,longitude,geometry
0,59678029,2023-12-08 23:59:52,10023,Noise - Residential,40.77381859641822,-73.98791943512036,POINT (-73.98792 40.77382)
1,59684522,2023-12-08 23:59:46,10014,Noise - Commercial,40.72854727656185,-74.00464710009602,POINT (-74.00465 40.72855)
2,59678462,2023-12-08 23:59:23,11369,Blocked Driveway,40.75918996561309,-73.8822493498847,POINT (-73.88225 40.75919)
3,59685881,2023-12-08 23:59:11,11378,Noise - Commercial,40.72763257870112,-73.89248674712505,POINT (-73.89249 40.72763)
4,59683789,2023-12-08 23:59:09,11378,Animal-Abuse,40.7217501174298,-73.90118341338044,POINT (-73.90118 40.72175)


In [42]:
geodf_tree_data.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 47589 entries, 0 to 49999
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   tree_id     47589 non-null  object  
 1   status      47589 non-null  object  
 2   health      47589 non-null  object  
 3   zipcode     47589 non-null  object  
 4   spc_common  47589 non-null  object  
 5   latitude    47589 non-null  object  
 6   longitude   47589 non-null  object  
 7   geometry    47589 non-null  geometry
dtypes: geometry(1), object(7)
memory usage: 3.3+ MB


In [43]:
geodf_tree_data.head()

Unnamed: 0,tree_id,status,health,zipcode,spc_common,latitude,longitude,geometry
0,180683,Alive,Fair,11375,red maple,40.72309177,-73.84421522,POINT (-73.84422 40.72309)
1,200540,Alive,Fair,11357,pin oak,40.79411067,-73.81867946,POINT (-73.81868 40.79411)
2,204026,Alive,Good,11211,honeylocust,40.71758074,-73.9366077,POINT (-73.93661 40.71758)
3,204337,Alive,Good,11211,honeylocust,40.71353749,-73.93445616,POINT (-73.93446 40.71354)
4,189565,Alive,Good,11215,American linden,40.66677776,-73.97597938,POINT (-73.97598 40.66678)


In [44]:
df_zillow_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5040 entries, 3 to 61422
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   RegionName  5040 non-null   int64  
 1   City        5040 non-null   object 
 2   CountyName  5040 non-null   object 
 3   Date        5040 non-null   object 
 4   Rent        5040 non-null   float64
dtypes: float64(1), int64(1), object(3)
memory usage: 236.2+ KB


In [45]:
df_zillow_data.head()

Unnamed: 0,RegionName,City,CountyName,Date,Rent
3,11226,New York,Kings County,2015-01-31,1944.609891
6,10025,New York,New York County,2015-01-31,3068.951823
10,11206,New York,Kings County,2015-01-31,2482.829299
11,11221,New York,Kings County,2015-01-31,2125.738807
16,11235,New York,Kings County,2015-01-31,1687.789898


## Part 2: Storing Data

In [46]:
!createdb 4501_FINAL_PROJECT

createdb: error: database creation failed: ERROR:  database "4501_FINAL_PROJECT" already exists


In [47]:
!psql --dbname 4501_FINAL_PROJECT -c 'CREATE EXTENSION postgis;'

ERROR:  extension "postgis" already exists


### Creating Tables


These are just a couple of options to creating your tables; you can use one or the other, a different method, or a combination.

In [48]:
engine = db.create_engine('postgresql://postgres:1220@localhost:5432/4501_FINAL_PROJECT')

#### SQL

In [49]:
# Define the SQL statements to create 4 tables using SQL
ZIPCODE_SCHEMA = """
CREATE TABLE IF NOT EXISTS zipcode (
    id INTEGER PRIMARY KEY,
    ZIPCODE VARCHAR(10),
    geometry GEOMETRY(Point, 4326)
);

DROP INDEX IF EXISTS idx_zipcode_geometry;
CREATE INDEX idx_zipcode_geometry ON zipcode USING GIST (geometry);
"""

NYC_311_SCHEMA = """
CREATE TABLE IF NOT EXISTS complaint (
    unique_key INTEGER PRIMARY KEY,
    created_date TIMESTAMP,
    incident_zip VARCHAR(15),
    complaint_type VARCHAR(100),
    latitude DECIMAL,
    longitude DECIMAL,
    geometry GEOMETRY(Point, 4326)
);

DROP INDEX IF EXISTS idx_zipcode_geometry;
CREATE INDEX IF NOT EXISTS idx_complaint_geometry ON complaint USING GIST (geometry);
"""

NYC_TREE_SCHEMA = """
CREATE TABLE IF NOT EXISTS tree (
    tree_id INTEGER PRIMARY KEY,
    status VARCHAR(50),
    health VARCHAR(50),
    zipcode VARCHAR(10),
    spc_common VARCHAR(100),
    latitude DECIMAL,
    longitude DECIMAL,
    geometry GEOMETRY(Point, 4326)
);

DROP INDEX IF EXISTS idx_tree_geometry;
CREATE INDEX idx_tree_geometry ON tree USING GIST (geometry);
"""

ZILLOW_SCHEMA = """
CREATE TABLE IF NOT EXISTS rent (
    id INTEGER PRIMARY KEY,
    RegionName VARCHAR(10),
    City VARCHAR(50),
    CountyName VARCHAR(50),
    Date DATE,
    Rent DECIMAL(10, 2)
);
"""

In [50]:
# create that required schema.sql file
with open(DB_SCHEMA_FILE, "w") as f:
    f.write(ZIPCODE_SCHEMA)
    f.write(NYC_311_SCHEMA)
    f.write(NYC_TREE_SCHEMA)
    f.write(ZILLOW_SCHEMA)

In [51]:
# Execute the schema files to create tables
schema_file = 'schema.sql'

# Execute the schema file
with engine.connect() as connection:
    with open(schema_file, 'r') as file:
        schema_sql = file.read()
        connection.execute(schema_sql)

### Add Data to Database

These are just a couple of options to write data to your tables; you can use one or the other, a different method, or a combination.

#### SQL

In [52]:


# Write DataFrame to table
def write_dataframes_to_table(tablename_to_dataframe, engine):
    for tablename, dataframe in tablename_to_dataframe.items():
        # Check if the dataframe is a GeoDataFrame
        if isinstance(dataframe, gpd.GeoDataFrame):
            try:
                # Transform the CRS to EPSG:4326 if it's different
                if dataframe.crs and dataframe.crs.to_string() != 'EPSG:4326':
                    dataframe = dataframe.to_crs('EPSG:4326')

                # Write GeoDataFrame to PostGIS, append if the table already exists
                dataframe.to_postgis(name=tablename, con=engine, if_exists='replace', index=False)
                print(f"GeoDataFrame written successfully to table {tablename}")
            except Exception as e:
                print(f"Error writing GeoDataFrame to table {tablename}: {e}")
        else:
            try:
                # Write regular DataFrame to SQL
                dataframe.to_sql(name=tablename, con=engine, if_exists='replace', index=False)
                print(f"DataFrame written successfully to table {tablename}")
            except Exception as e:
                print(f"Error writing DataFrame to table {tablename}: {e}")


In [53]:
tablename_to_dataframe = {
    "zipcodes": geodf_zipcode_data,
    "complaints": geodf_311_data,
    "trees": geodf_tree_data,
    "rents": df_zillow_data,
}

In [55]:
write_dataframes_to_table(tablename_to_dataframe, engine)

GeoDataFrame written successfully to table zipcodes
GeoDataFrame written successfully to table complaints
GeoDataFrame written successfully to table trees
DataFrame written successfully to table rents


## Part 3: Understanding the Data

### Query 1

In [56]:
# Helper function to write the queries to file
def write_query_to_file(query, outfile):
    with open(outfile, 'w') as file:
        file.write(query)

In [57]:
QUERY_1_FILENAME = QUERY_DIR / "complaints_by_zipcodes.sql"

QUERY_1 = """
SELECT
    incident_zip,
    COUNT(*) AS num_complaints
FROM
    complaints
GROUP BY
    incident_zip
ORDER BY
    num_complaints DESC;
"""

In [58]:
with engine.connect() as conn:
    result = conn.execute(db.text(QUERY_1))
    for row in result:
        print(row)

('10025', 408)
('11201', 390)
('10458', 346)
('10003', 340)
('10457', 321)
('10467', 311)
('11238', 308)
('11226', 293)
('10456', 290)
('10468', 270)
('10453', 266)
('10452', 251)
('11207', 244)
('11221', 233)
('10031', 230)
('10032', 218)
('11215', 212)
('11213', 212)
('11385', 209)
('10027', 204)
('10011', 199)
('11208', 199)
('11217', 196)
('11231', 195)
('10466', 194)
('10019', 190)
('11225', 189)
('11230', 185)
('11212', 183)
('11373', 182)
('11368', 180)
('11377', 179)
('11209', 179)
('11233', 179)
('11218', 177)
('10016', 176)
('11203', 171)
('11236', 171)
('11220', 165)
('10036', 164)
('11223', 163)
('11235', 163)
('11216', 160)
('11211', 158)
('10451', 158)
('10030', 157)
('10463', 156)
('10128', 155)
('11214', 155)
('10002', 154)
('11206', 151)
('11239', 151)
('11229', 149)
('10024', 149)
('11372', 148)
('10034', 148)
('11357', 148)
('10472', 146)
('11204', 146)
('10029', 145)
('10026', 144)
('11101', 144)
('11234', 144)
('10023', 143)
('10040', 132)
('10462', 128)
('10009', 

In [59]:
write_query_to_file(QUERY_1, QUERY_1_FILENAME)

### Query 2

In [61]:
QUERY_2_FILENAME = QUERY_DIR / "top_10_zipcodes_most_greenery.sql"

QUERY_2 = """
SELECT
    zipcode,
    COUNT(*) AS num_trees
FROM
    trees
GROUP BY
    zipcode
ORDER BY
    num_trees DESC
LIMIT 10;
"""

In [62]:
with engine.connect() as conn:
    result = conn.execute(db.text(QUERY_2))
    for row in result:
        print(row)

('10306', 2355)
('11230', 1545)
('10466', 1400)
('11375', 1215)
('11215', 1172)
('10312', 1115)
('11426', 1107)
('11105', 1056)
('11218', 1011)
('11372', 970)


In [63]:
write_query_to_file(QUERY_2, QUERY_2_FILENAME)

### Query 3

In [64]:
QUERY_3_FILENAME = QUERY_DIR / "average_rent_in_Aug_for_top_10_greenery_zipcode.sql"

QUERY_3 = """
SELECT
    trees.zipcode,
    AVG(rents."Rent") AS average_rent
FROM
    trees
JOIN
    rents ON trees.zipcode = CAST(rents."RegionName" AS text)
WHERE
    rents."Date" BETWEEN '2023-08-01' AND '2023-08-31'
GROUP BY
    trees.zipcode
ORDER BY
    COUNT(trees.tree_id) DESC
LIMIT 10;
"""

In [65]:
with engine.connect() as conn:
    result = conn.execute(db.text(QUERY_3))
    for row in result:
        print(row)

('11375', 2743.403881587553)
('11215', 3575.650073516554)
('10024', 3797.939788729083)
('10025', 4037.7657250597435)
('11222', 4159.847869342067)
('10023', 4370.073658086057)
('11211', 4342.652098659593)
('11231', 4074.7752245799525)
('11226', 2785.3201367435768)
('11238', 3760.1397967521966)


In [66]:
write_query_to_file(QUERY_3, QUERY_3_FILENAME)

### Query 4

In [67]:
QUERY_4_FILENAME = QUERY_DIR / "correlation_between_rent_trees_and_complaints.sql"

QUERY_4 = """
WITH RentData AS (
    SELECT
        r."RegionName" AS zip_code,
        CAST(AVG(r."Rent") AS NUMERIC(10, 2)) AS average_rent
    FROM
        rents r
    WHERE
        r."Date" BETWEEN '2023-01-01' AND '2023-01-31'
    GROUP BY
        r."RegionName"
),
TreeData AS (
    SELECT
        t.zipcode::NUMERIC,  -- Cast to NUMERIC to match the data type in RentData
        COUNT(t.tree_id) AS tree_count
    FROM
        trees t
    GROUP BY
        t.zipcode::NUMERIC  -- Cast to NUMERIC
),
ComplaintData AS (
    SELECT
        c.incident_zip::NUMERIC,  -- Cast to NUMERIC to match the data type in RentData
        COUNT(c.unique_key) AS complaint_count
    FROM
        complaints c
    WHERE
        c.created_date BETWEEN '2023-01-01' AND '2023-01-31'
    GROUP BY
        c.incident_zip::NUMERIC  -- Cast to NUMERIC
),
CombinedData AS (
    SELECT
        rd.zip_code,
        rd.average_rent,
        COALESCE(td.tree_count, 0) AS tree_count,
        COALESCE(cd.complaint_count, 0) AS complaint_count
    FROM
        RentData rd
    LEFT JOIN
        TreeData td ON rd.zip_code::NUMERIC = td.zipcode
    LEFT JOIN
        ComplaintData cd ON rd.zip_code::NUMERIC = cd.incident_zip
)

(SELECT * FROM CombinedData ORDER BY average_rent DESC LIMIT 5)
UNION ALL
(SELECT * FROM CombinedData ORDER BY average_rent LIMIT 5);
"""

In [68]:
with engine.connect() as conn:
    result = conn.execute(db.text(QUERY_4))
    for row in result:
        print(row)

(10013, Decimal('5480.11'), 108, 0)
(10014, Decimal('4617.94'), 301, 0)
(10018, Decimal('4612.25'), 4, 0)
(10001, Decimal('4577.86'), 26, 0)
(11249, Decimal('4518.98'), 80, 0)
(10040, Decimal('2166.27'), 69, 0)
(11235, Decimal('2367.89'), 459, 0)
(10032, Decimal('2568.75'), 344, 0)
(11375, Decimal('2568.96'), 1215, 0)
(11374, Decimal('2584.12'), 182, 0)


In [70]:
write_query_to_file(QUERY_4, QUERY_4_FILENAME)

### Query 5

In [71]:
QUERY_5_FILENAME = QUERY_DIR / "most_greenery_trees_and_zipcodes.sql"

QUERY_5 = """
WITH TreeZipcodeCounts AS (
    SELECT
        zipcodes."ZIPCODE" AS zip_code,
        COUNT(trees.tree_id) AS tree_count
    FROM
        zipcodes
    LEFT JOIN
        trees ON ST_Within(trees.geometry, zipcodes.geometry) and trees."zipcode" = zipcodes."ZIPCODE"
    GROUP BY
        zipcodes."ZIPCODE"
    ORDER BY
        tree_count DESC
    LIMIT 10
)

SELECT
    tzc.zip_code,
    tzc.tree_count
FROM
    TreeZipcodeCounts tzc
ORDER BY
    tzc.tree_count DESC;
"""

In [72]:
with engine.connect() as conn:
    result = conn.execute(db.text(QUERY_5))
    for row in result:
        print(row)

('10001', 0)
('10002', 0)
('10003', 0)
('10004', 0)
('10005', 0)
('10006', 0)
('10007', 0)
('10009', 0)
('10010', 0)
('00083', 0)


In [73]:
write_query_to_file(QUERY_5, QUERY_5_FILENAME)

### Query 6

In [74]:
QUERY_6_FILENAME = QUERY_DIR / "immediate_area.sql"

QUERY_6 = """
SELECT
    t.tree_id AS id,
    t.spc_common AS species,
    t.health,
    t.status,
    ST_AsText(t.geometry) AS coordinate_location
FROM
    trees t
WHERE 
    ST_DWithin(
        t.geometry::geography,
        ST_MakePoint(-73.96253174434912, 40.80737875669467)::geography,
        804.672  -- Distance in meters (½ mile)
    )
"""

In [75]:
with engine.connect() as conn:
    result = conn.execute(db.text(QUERY_6))
    for row in result:
        print(row)

('198514', 'pin oak', 'Good', 'Alive', 'POINT(-73.9620798 40.80230109)')
('209919', 'London planetree', 'Good', 'Alive', 'POINT(-73.96331506 40.80881155)')
('209921', 'London planetree', 'Good', 'Alive', 'POINT(-73.96340334 40.80874458)')
('203887', 'willow oak', 'Good', 'Alive', 'POINT(-73.96071917 40.80572583)')
('196440', 'American elm', 'Fair', 'Alive', 'POINT(-73.96412322 40.81114538)')
('209913', 'pin oak', 'Good', 'Alive', 'POINT(-73.96312072 40.80907709)')
('178550', 'Norway maple', 'Good', 'Alive', 'POINT(-73.95739877 40.80690345)')
('189403', 'Callery pear', 'Fair', 'Alive', 'POINT(-73.95861206 40.80754134)')
('196606', 'honeylocust', 'Good', 'Alive', 'POINT(-73.96719944 40.80732247)')
('198512', 'pin oak', 'Good', 'Alive', 'POINT(-73.96230577 40.80239611)')
('196541', 'honeylocust', 'Good', 'Alive', 'POINT(-73.9654983 40.80929314)')
('203996', 'Callery pear', 'Good', 'Alive', 'POINT(-73.96134249 40.80528416)')
('189414', 'ginkgo', 'Good', 'Alive', 'POINT(-73.95983152 40.8080

In [76]:
write_query_to_file(QUERY_6, QUERY_6_FILENAME)

## Part 4: Visualizing the Data

### Visualization 1

In [None]:
# use a more descriptive name for your function
def plot_visual_1(dataframe):
    figure, axes = plt.subplots(figsize=(20, 10))
    
    values = "..."  # use the dataframe to pull out values needed to plot
    
    # you may want to use matplotlib to plot your visualizations;
    # there are also many other plot types (other 
    # than axes.plot) you can use
    axes.plot(values, "...")
    # there are other methods to use to label your axes, to style 
    # and set up axes labels, etc
    axes.set_title("Some Descriptive Title")
    
    plt.show()

In [None]:
def get_data_for_visual_1():
    # Query your database for the data needed.
    # You can put the data queried into a pandas/geopandas dataframe, if you wish
    raise NotImplementedError()

In [None]:
some_dataframe = get_data_for_visual_1()
plot_visual_1(some_dataframe)