# Exploratory Data Analysis
* Explore new weather stations
    * Date range by station
    * Geospatial location by station (map)
* Add new data to an existing file

In [1]:
import numpy as np
import os
import pandas as pd
import sys
sys.path.append(os.path.dirname(os.getcwd()))

import functions

## Explore new weather stations

### Date range by station

In [12]:
df = pd.read_csv('../data/eugene_temp_data.csv')
station_dates = functions.view_station_date_ranges(df, 'TMIN')
station_dates

Unnamed: 0,STATION,NAME,min_date,max_date
0,USC00352867,"FERN RIDGE VENETA, OR US",2020-01-01,2024-12-31
1,USW00024221,"EUGENE MAHLON SWEET FIELD, OR US",2020-01-01,2024-12-31


In [None]:
# # Check for missing data

# # Which DATE are missing in x?
# missing_dates = pd.date_range(start=x['DATE'].min(), end=x['DATE'].max()).difference(pd.to_datetime(x['DATE']))
# missing_dates

# # Count the number of missing dates by month from missing_dates
# missing_dates_df = pd.DataFrame(missing_dates, columns=['DATE'])
# missing_dates_df['month'] = pd.to_datetime(missing_dates_df['DATE']).dt.month
# missing_counts = missing_dates_df['month'].value_counts().sort_index()
# missing_counts

# # Count the number of missing dates by year from missing_dates
# missing_dates_df = pd.DataFrame(missing_dates, columns=['DATE'])
# missing_dates_df['year'] = pd.to_datetime(missing_dates_df['DATE']).dt.year
# missing_counts = missing_dates_df['year'].value_counts().sort_index()
# missing_counts

### Geospatial location by station

In [None]:
df = pd.read_csv('../data/geo_data.csv')
!pwd

Unnamed: 0.1,Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION
177,171,US1ILWL0149,"BULT FIELD MONEE 4.9 SE, IL US",41.3774,-87.67975,236.8
178,172,USC00111577,"CHICAGO MIDWAY AIRPORT 3 SW, IL US",41.73727,-87.77734,189.0
179,173,USW00012842,"TAMPA INTERNATIONAL AIRPORT, FL US",27.96331,-82.54,1.8
180,174,USW00094261,"PORTLAND HILLSBORO AIRPORT, OR US",45.54762,-122.95572,
181,175,USW00094225,"HOQUIAM BOWERMAN AIRPORT, WA US",46.97288,-123.93074,4.5


In [17]:
!pwd

/Users/alberta/Documents/git_token/weather_python/notebooks


In [None]:
# Map the stations, save the html file
map_html = functions.map_stations(
    df.head(),
    # default_lat=41.8, default_long=-87.5,default_zoom=9 # Chicago
    default_lat=45.5, default_long=-122.6,default_zoom=8 # Northwest Oregon
    )
path = '<enter_path_here>'
map_html.save(path + '/map.html')

## Add new data to an existing file

In [22]:
# Import existing data
df = pd.read_csv('../data/geo_data.csv')
df.tail()

Unnamed: 0.1,Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION
177,171,US1ILWL0149,"BULT FIELD MONEE 4.9 SE, IL US",41.3774,-87.67975,236.8
178,172,USC00111577,"CHICAGO MIDWAY AIRPORT 3 SW, IL US",41.73727,-87.77734,189.0
179,173,USW00012842,"TAMPA INTERNATIONAL AIRPORT, FL US",27.96331,-82.54,1.8
180,174,USW00094261,"PORTLAND HILLSBORO AIRPORT, OR US",45.54762,-122.95572,
181,175,USW00094225,"HOQUIAM BOWERMAN AIRPORT, WA US",46.97288,-123.93074,4.5


In [24]:
# Filter new data to only stations with relevant history
df_new = pd.read_csv('../data/eugene_geo.csv')[[
    'STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION'
]]
df_new = df_new[df_new['STATION'].isin(station_dates['STATION'].unique())].drop_duplicates()
df_new.tail()

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION
10933,USW00024221,"EUGENE MAHLON SWEET FIELD, OR US",44.13311,-123.21563,109.0
51167,USC00352867,"FERN RIDGE VENETA, OR US",44.0494,-123.3702,147.8


In [None]:
# Append the data to existing data and remove duplicates
combined = pd.concat([df, df_new]).drop_duplicates()
combined.tail(10)

Unnamed: 0.1,Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION
174,168.0,US1ILCK0163,"ARLINGTON HEIGHTS 1.2 SW, IL US",42.081944,-87.997116,215.8
175,169.0,US1ILDP0150,"BLOOMINGDALE 1.2 ESE, IL US",41.93972,-88.06657,221.0
176,170.0,US1ILCK0278,"HOFFMAN ESTATES 1.6 SE, IL US",42.048122,-88.116188,242.0
177,171.0,US1ILWL0149,"BULT FIELD MONEE 4.9 SE, IL US",41.3774,-87.67975,236.8
178,172.0,USC00111577,"CHICAGO MIDWAY AIRPORT 3 SW, IL US",41.73727,-87.77734,189.0
179,173.0,USW00012842,"TAMPA INTERNATIONAL AIRPORT, FL US",27.96331,-82.54,1.8
180,174.0,USW00094261,"PORTLAND HILLSBORO AIRPORT, OR US",45.54762,-122.95572,
181,175.0,USW00094225,"HOQUIAM BOWERMAN AIRPORT, WA US",46.97288,-123.93074,4.5
10933,,USW00024221,"EUGENE MAHLON SWEET FIELD, OR US",44.13311,-123.21563,109.0
51167,,USC00352867,"FERN RIDGE VENETA, OR US",44.0494,-123.3702,147.8


In [None]:
# combined[['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION']].to_csv(
#     '../data/geo_data.csv', index=False
# )