# Exploratory Data Analysis
Initial data exploration
* Update precipitation CSV with new data
* View the beginning and end date of each station 
* View the locations of each station on a map

In [1]:
import numpy as np
import os
import pandas as pd
import sys
sys.path.append(os.path.dirname(os.getcwd()))

import functions

## Option 1: Explore new weather stations

In [9]:
df = pd.read_csv('../data/raw_temp_chicago_stations.csv')[[
    'STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION',
]].drop_duplicates().reset_index(drop=True)
df

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION
0,US1ILCK0393,"TINLEY PARK 1.2 WNW, IL US",41.582370,-87.821920,215.8
1,US1ILCK0150,"ARLINGTON HEIGHTS 1.2 S, IL US",42.077974,-87.983814,208.8
2,USC00115097,"LISLE MORTON ARBORETUM, IL US",41.812710,-88.072750,206.3
3,US1ILWL0173,"LOCKPORT 5.0 W, IL US",41.579960,-88.143802,192.9
4,US1ILCK0390,"ALSIP 0.9 E, IL US",41.672600,-87.718100,182.0
...,...,...,...,...,...
168,US1ILCK0163,"ARLINGTON HEIGHTS 1.2 SW, IL US",42.081944,-87.997116,215.8
169,US1ILDP0150,"BLOOMINGDALE 1.2 ESE, IL US",41.939720,-88.066570,221.0
170,US1ILCK0278,"HOFFMAN ESTATES 1.6 SE, IL US",42.048122,-88.116188,242.0
171,US1ILWL0149,"BULT FIELD MONEE 4.9 SE, IL US",41.377400,-87.679750,236.8


In [30]:
# # Optional - add these stations to the geo_data file
# df_geo_data = pd.read_csv('../data/geo_data.csv')
# df_new = pd.concat([df_geo_data, df])
# df_new.to_csv('../data/geo_data.csv')

In [25]:
# Map the stations 
functions.map_stations(
    df,
    default_lat=41.8,
    default_long=-87.5,
    default_zoom=9)

## Option 2: Add new data to an existing file

In [None]:
# Append new data to an existing file

# Import new weather data
NEW_DATA_TO_ADD_PATH = '../data/raw_precipitation_97016.csv'
EXISTING_DATA_PATH = '../data/precipitation_97016.csv'
df = functions.update_data(new_data_path=NEW_DATA_TO_ADD_PATH,
                           existing_data_path=EXISTING_DATA_PATH,
                           overwrite='yes')

# Import geospatial data and analyze the date range
station_dates = functions.view_station_date_ranges(df)
geo_data = functions.update_data(new_data_path='../data/geo_data.csv',
                 existing_data_path='../data/geo_data.csv',
                 overwrite='yes')

# The stations with recent history have geo_data available
station_dates.merge(geo_data, on=['STATION', 'NAME'], how='left')

Unnamed: 0,STATION,NAME,min_date,max_date,LATITUDE,LONGITUDE,ELEVATION
0,US1ORCB0013,"CLATSKANIE 3.0 NE, OR US",1998-06-17,2024-06-28,46.131337,-123.15711,44.5
1,US1ORCB0027,"CLATSKANIE 5.0 NE, OR US",2013-04-13,2016-01-04,,,
2,US1ORCB0039,"BIRKENFELD 3.1 NNW, OR US",2016-09-09,2017-01-18,,,
3,US1ORCB0040,"CLATSKANIE 8.8 SW, OR US",2017-05-30,2025-01-13,46.006917,-123.324535,194.2
4,US1ORCB0041,"CLATSKANIE 6.1 W, OR US",2018-01-06,2025-01-13,46.1157,-123.3313,4.6
5,USC00351643,"CLATSKANIE, OR US",1935-05-01,2024-12-31,46.1081,-123.2058,6.7
6,USC00355838,"MOUNTAIN HOME, OR US",1909-06-01,1917-07-31,,,
7,USC00359121,"WAUNA, OR US",2011-01-01,2016-12-31,,,
8,USR0000OMLL,"MILLER OREGON, OR US",1996-10-17,2025-01-13,46.0228,-123.2711,332.2


In [9]:
# Map the stations 
functions.map_stations(geo_data)