# Importing Data and Libraries 

In [9]:
import pandas as pd
import numpy as np
import folium
import json

In [11]:

# Create a path
geojson_path = '/Users/muhammaddildar/Downloads/us-states.json'


In [13]:
with open(geojson_path) as file:
    geojson_data = json.load(file)

In [15]:
geojson_data['features'][0]

{'type': 'Feature',
 'id': 'AL',
 'properties': {'name': 'Alabama'},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-87.359296, 35.00118],
    [-85.606675, 34.984749],
    [-85.431413, 34.124869],
    [-85.184951, 32.859696],
    [-85.069935, 32.580372],
    [-84.960397, 32.421541],
    [-85.004212, 32.322956],
    [-84.889196, 32.262709],
    [-85.058981, 32.13674],
    [-85.053504, 32.01077],
    [-85.141136, 31.840985],
    [-85.042551, 31.539753],
    [-85.113751, 31.27686],
    [-85.004212, 31.003013],
    [-85.497137, 30.997536],
    [-87.600282, 30.997536],
    [-87.633143, 30.86609],
    [-87.408589, 30.674397],
    [-87.446927, 30.510088],
    [-87.37025, 30.427934],
    [-87.518128, 30.280057],
    [-87.655051, 30.247195],
    [-87.90699, 30.411504],
    [-87.934375, 30.657966],
    [-88.011052, 30.685351],
    [-88.10416, 30.499135],
    [-88.137022, 30.318396],
    [-88.394438, 30.367688],
    [-88.471115, 31.895754],
    [-88.241084, 33.796253],
    [-88.098683, 34.8

# Data Cleaning

In [29]:
# Load clean dataset
df = pd.read_csv('/Users/muhammaddildar/Desktop/cleaned_kc_house_data.csv')


In [31]:
# Load the GeoJSON
geojson_path = '/Users/muhammaddildar/Downloads/us-states.json'
with open(geojson_path) as file:
    geojson_data = json.load(file)

In [35]:
df.columns

Index(['date', 'price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot',
       'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above',
       'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode', 'lat', 'long',
       'sqft_living15', 'sqft_lot15', 'was_renovated'],
      dtype='object')

In [37]:
df.head()

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,...,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,was_renovated
0,2014-10-13,221900.0,3,1.0,1180,5650,1.0,0,0,3,...,1180,0,1955,0,98178,47.5112,-122.257,1340,5650,0
1,2014-12-09,538000.0,3,2.25,2570,7242,2.0,0,0,3,...,2170,400,1951,1991,98125,47.721,-122.319,1690,7639,1
2,2015-02-25,180000.0,2,1.0,770,10000,1.0,0,0,3,...,770,0,1933,0,98028,47.7379,-122.233,2720,8062,0
3,2014-12-09,604000.0,4,3.0,1960,5000,1.0,0,0,5,...,1050,910,1965,0,98136,47.5208,-122.393,1360,5000,0
4,2015-02-18,510000.0,3,2.0,1680,8080,1.0,0,0,3,...,1680,0,1987,0,98074,47.6168,-122.045,1800,7503,0


# Data Wrangling

In [39]:
# Mapping the first digits of zipcodes to approximate states
zip_to_state = {
    '99': 'Washington',
    '98': 'Washington',
    '97': 'Oregon',
    '94': 'California',
    '95': 'California',
    '90': 'California',
    '80': 'California',
    '85': 'Arizona',
    '84': 'Utah',
    '83': 'Idaho',
    # Add more if needed based on your dataset — King County = Washington
}


In [41]:
df['STATE_NAME'] = df['zipcode'].astype(str).str[:2].map(zip_to_state)


In [43]:
df['STATE_NAME'].value_counts()


STATE_NAME
Washington    21613
Name: count, dtype: int64

In [49]:
state_price = df.groupby('STATE_NAME')['price'].mean().reset_index()
state_price.columns = ['STATE_NAME', 'avg_price']


In [51]:
state_price.head()


Unnamed: 0,STATE_NAME,avg_price
0,Washington,540088.141767


# Plotting a Choropleth with Folium

In [53]:

# Create a base map centered on the US
map = folium.Map(location=[47.5, -120.5], zoom_start=6)

# Add choropleth layer
folium.Choropleth(
    geo_data='/Users/muhammaddildar/Downloads/us-states.json',
    data=state_price,
    columns=['STATE_NAME', 'avg_price'],
    key_on='feature.properties.name',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Average House Price'
).add_to(map)

# Display the map
map


## Geospatial Analysis Summary

In this analysis, I created a choropleth map using the `folium` library to visualize the **average house prices by U.S. state**, based on the King County housing dataset.

After importing the necessary libraries and datasets, I performed the following steps:

- Extracted state information from the zip code using a custom mapping.
- Grouped the data by state and calculated the average house price.
- Mapped the results onto a GeoJSON file representing U.S. states.

### Key Insight:
The only state present in the dataset is **Washington**, as the King County data is limited to that region. The average house price in Washington is approximately **$540,088**.

### Further Questions:
- How do house prices vary within different counties of Washington?
- Could this map be expanded with more state-level housing data for a national comparison?


In [60]:
map.save('/Users/muhammaddildar/Desktop/choropleth_house_prices.html')
