**Downloading and Cleaning Meterological Data** <br>
Carolyn Vilter <br>
<br>
Inspiration / Meteorological API approach from Varsha Gopalakrishnan<br>
https://github.com/varsha2509/hyperlocal-aq-prediction/blob/master/Notebooks/Daymet-Data-API-Call.ipynb

In [91]:
# Setup
import daymetpy
import pandas as pd
import geopandas as gpd
import seaborn as sns
import numpy as np

In [92]:
# Basis of eventual met dataset: Vishal's air quality sensor data
# Air quality sensors will be the rows/obs of the final dataset

# Other users: change pathname if needed
air_quality_NO2 = pd.read_csv("/Users/carolynvilter/Documents/MSCAPP_Y1Q3/machine_learning/air-quality-prediction/data/cleaned/air_quality_NO2.csv")
air_quality_NO2

Unnamed: 0.1,Unnamed: 0,locationId,location,city,country,utc,local,parameter,value,unit,latitude,longitude
0,0,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000517,ppm,29.789446,-95.483822
1,1,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000756,ppm,29.750588,-95.419853
2,2,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.001427,ppm,29.810590,-95.348540
3,3,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.001262,ppm,29.786917,-95.298818
4,4,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000803,ppm,29.604903,-95.364682
...,...,...,...,...,...,...,...,...,...,...,...,...
11529,11529,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000631,ppm,29.806077,-95.393692
11530,11530,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000593,ppm,29.735933,-95.235322
11531,11531,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.001280,ppm,29.770637,-95.397326
11532,11532,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.001794,ppm,29.801806,-95.341444


In [93]:
# FIRST: tf_no2 data
# Create lat and lon rounded to 2 dp - the sensitivity of the met data API
air_quality_NO2["lat_short"] = air_quality_NO2["latitude"].round(2)
air_quality_NO2["lon_short"] = air_quality_NO2["longitude"].round(2)

# Create a unique list of lat, long pairs rounded to 2 decimal places
# These are the points we need meteorological data for 
air_quality_NO2_short = air_quality_NO2[["lat_short", "lon_short"]]
air_quality_NO2_short.drop_duplicates(inplace = True)
air_quality_NO2_short

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  air_quality_NO2_short.drop_duplicates(inplace = True)


Unnamed: 0,lat_short,lon_short
0,29.79,-95.48
1,29.75,-95.42
2,29.81,-95.35
3,29.79,-95.30
4,29.60,-95.36
...,...,...
3673,29.77,-95.26
3987,29.78,-95.46
4610,29.79,-95.49
5027,29.62,-95.36


In [94]:
met_data = None
for index, row in air_quality_NO2_short.iterrows():
    lat = row["lat_short"]
    lon = row["lon_short"]
    # for each lat, lon pair in the list, get 2017-2018 data
    data = daymetpy.daymet_timeseries(lon=lon, lat=lat, start_year=2017, end_year=2018)
    # then limit to the time range (July 2017 to March 2018, inclusive)
    data = data[((data["yday"] >= 182) & (data["year"] == 2017)) | ((data["yday"] <= 90) & (data["year"] == 2018))]
    # then compress to one row containing the average of each var for all observations
    # removing unnecessary variables
    data = data.mean(axis=0).to_frame().transpose().drop(["year", "dayl", "yday"], axis=1)
    # then add lat lon columns
    data["lat_short"] = lat
    data["lon_short"] = lon
    # then append the new row to the main df
    if index == 0:
        met_data = data
    else:
        met_data = pd.concat([met_data, data])


In [95]:
met_data

Unnamed: 0,prcp,srad,swe,tmax,tmin,vp,lat_short,lon_short
0,6.013942,324.280839,0.0,26.028285,15.117482,1923.620985,29.79,-95.48
0,6.062372,324.512628,0.0,26.011606,15.253796,1939.266788,29.75,-95.42
0,6.364124,323.496971,0.0,25.941131,15.169124,1930.899416,29.81,-95.35
0,6.543102,322.899088,0.0,25.879489,15.221825,1936.849307,29.79,-95.30
0,5.763905,325.528248,0.0,25.994635,15.577007,1976.188212,29.60,-95.36
...,...,...,...,...,...,...,...,...
0,6.648504,321.606569,0.0,25.818942,15.266934,1941.546131,29.77,-95.26
0,6.026204,324.602007,0.0,26.029562,15.162263,1928.930474,29.78,-95.46
0,6.008905,324.609124,0.0,26.041423,15.112956,1922.943285,29.79,-95.49
0,5.852336,325.209818,0.0,25.986642,15.545073,1972.396423,29.62,-95.36


In [101]:
# then join the new, main met df onto vishal's data
master = pd.merge(air_quality_NO2, met_data, on = ["lat_short", "lon_short"])
master

Unnamed: 0.1,Unnamed: 0,locationId,location,city,country,utc,local,parameter,value,unit,latitude,longitude,lat_short,lon_short,prcp,srad,swe,tmax,tmin,vp
0,0,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000517,ppm,29.789446,-95.483822,29.79,-95.48,6.013942,324.280839,0.0,26.028285,15.117482,1923.620985
1,60,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000457,ppm,29.791588,-95.481751,29.79,-95.48,6.013942,324.280839,0.0,26.028285,15.117482,1923.620985
2,202,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000511,ppm,29.789405,-95.483061,29.79,-95.48,6.013942,324.280839,0.0,26.028285,15.117482,1923.620985
3,242,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000579,ppm,29.794780,-95.477146,29.79,-95.48,6.013942,324.280839,0.0,26.028285,15.117482,1923.620985
4,389,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.000370,ppm,29.792618,-95.480138,29.79,-95.48,6.013942,324.280839,0.0,26.028285,15.117482,1923.620985
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11529,8188,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.001261,ppm,29.615010,-95.355268,29.62,-95.36,5.852336,325.209818,0.0,25.986642,15.545073,1972.396423
11530,8504,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.001300,ppm,29.615940,-95.355437,29.62,-95.36,5.852336,325.209818,0.0,25.986642,15.545073,1972.396423
11531,9467,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.001509,ppm,29.616374,-95.355169,29.62,-95.36,5.852336,325.209818,0.0,25.986642,15.545073,1972.396423
11532,9865,61509,,Houston,US,2018-03-31T06:00:00+00:00,2018-03-31T01:00:00-05:00,no2,0.001708,ppm,29.616391,-95.355433,29.62,-95.36,5.852336,325.209818,0.0,25.986642,15.545073,1972.396423
