# OpenWeather DataFrame for ETL Project

We get our Auto Insurance Data, use the zip codes (or cities) in our dataset and create an API to pull weather data for those zip codes

#### Import dependencies

In [49]:
import matplotlib.pyplot as plt
import requests
import pandas as pd
from config import api_key
from pprint import pprint
import os
import pandas as pd
import csv

In [73]:
# # Import Auto Insurance data
auto_data_path = os.path.join("Resources", "Average_Auto_Insurance_Data.csv")
auto_data_df = pd.read_csv(auto_data_path)

In [75]:
# # Set up a dataframe for the Auto insurance data
auto_data_df_v0 = auto_data_df .copy()
auto_data_df['Zip Code']=auto_data_df_v0['Zip Code'].apply(lambda x: '{0:0>5}'.format(x))
auto_data_df

Unnamed: 0,State,Zip Code,City,Average Auto Insurance Rate
0,Michigan,48226,Detroit,"$6,329"
1,New York,11212,New York,"$5,703"
2,Louisiana,70117,New Orleans,"$4,601"
3,Nevada,89101,Las Vegas,"$3,768"
4,California,91605,North Hollywood,"$3,767"
...,...,...,...,...
220,Louisiana,70117,New Orleans,"$9,303.50"
221,District Of Columbia,20002,Washington,"$4,440.79"
222,New Jersey,07102,Newark,"$8,512.59"
223,Delaware,06340,Groton,"$3,896.10"


In [52]:
# # Generate My lists
auto_data_list = auto_data_df.values.tolist()
zip_list = auto_data_df.Zip_Code.tolist() 
city_list1 = auto_data_df.City.tolist()

#### Save configuration and build query URL

In [53]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "Imperial"

# Build partial city query URL
query_url = f"{url}appid={api_key}&units={units}&q="
# print(query_url)

# Build partial zip code query URL
query2_url = f"{url}&units={units}&q="
# print(query2_url)

#### Loop through list of zip codes to request weather information for each

In [54]:
# Loop with a list of zipcodes

# set up lists to hold reponse info
lat_list = []
lon_list = []
hum_list = []
temp_list = []
maxTemp_list = []
minTemp_list = []
cloud_list = []
wind_list = []
city_list = []
weather_list = []
description_list =[]
# Loop through the list of cities and perform a request for data on each
for zips in zip_list:
    weather_url = f"{query2_url}{zips},US&appid={api_key}"
#     print(weather_url)
    response = requests.get(weather_url).json()
#     pprint(response)
    try:
        lon_list.append(response['coord']['lon'])
        lat_list.append(response['coord']['lat'])
    except:
        print(f"Oops, looks like we didn't get something (lat or long) from this zip code: {zips} gave us a problem. On to the next one")
        lon_list.append('NaN')
        lat_list.append('NaN')
    try:
        weather_list.append(response['weather'][0]['main'])
        description_list.append(response['weather'][0]['description'])
    except:
        print(f"Oops, looks like we didn't get something (weather or description) from this zip code: {zips} gave us a problem. On to the next one")
        weather_list.append('NaN')
        description_list.append('NaN')
    try:
        temp_list.append(response['main']['temp'])
        maxTemp_list.append(response['main']['temp_max'])
        minTemp_list.append(response['main']['temp_min'])
    except:
        print(f"Oops, looks like we didn't get something (temp, minTemp or maxtemp) from this zip code: {zips} gave us a problem. On to the next one")
        temp_list.append('NaN')
        maxTemp_list.append('NaN')
        minTemp_list.append('NaN')
    try:
        hum_list.append(response['main']['humidity'])
        cloud_list.append(response['clouds']['all'])
        wind_list.append(response['wind']['speed'])
        city_list.append(response['name'])
    except:
        print(f"Oops, looks like we didn't get something from this zip code: {zips} gave us a problem. On to the next one")
        hum_list.append('NaN')
        cloud_list.append('NaN')
        wind_list.append('NaN')
        city_list.append('NaN')

# Create lat lon pairs
latlongList = list(zip(lat_list, lon_list))
# latlongList

Oops, looks like we didn't get something (lat or long) from this zip code: 59089 gave us a problem. On to the next one
Oops, looks like we didn't get something (weather or description) from this zip code: 59089 gave us a problem. On to the next one
Oops, looks like we didn't get something (temp, minTemp or maxtemp) from this zip code: 59089 gave us a problem. On to the next one
Oops, looks like we didn't get something from this zip code: 59089 gave us a problem. On to the next one
Oops, looks like we didn't get something (lat or long) from this zip code: 71439 gave us a problem. On to the next one
Oops, looks like we didn't get something (weather or description) from this zip code: 71439 gave us a problem. On to the next one
Oops, looks like we didn't get something (temp, minTemp or maxtemp) from this zip code: 71439 gave us a problem. On to the next one
Oops, looks like we didn't get something from this zip code: 71439 gave us a problem. On to the next one
Oops, looks like we didn't g

#### Create a DataFrame from the zip code weather data

In [55]:
# Building df with zip code query
weather_dict = {"Zip_Code": zip_list, "City": city_list, "Weather_Forecast": weather_list, "Weather_Description": description_list, "Max_Temperature": maxTemp_list, "Min_Temperature": minTemp_list, "Humidity": hum_list}
weather_df = pd.DataFrame(weather_dict)
weather_df = weather_df[~weather_df.Max_Temperature.str.contains("NaN", na=False)]
weather_df

Unnamed: 0,Zip_Code,City,Weather_Forecast,Weather_Description,Max_Temperature,Min_Temperature,Humidity
0,02119,Boston,Clouds,overcast clouds,48.99,43,57
1,02119,Boston,Clouds,overcast clouds,48.99,43,57
2,02554,Fall River,Clouds,overcast clouds,50,48,61
3,02842,Newport,Clouds,overcast clouds,50,46.4,75
4,02907,Cranston,Clouds,overcast clouds,46.99,44.01,75
...,...,...,...,...,...,...,...
219,98108,Seattle,Clear,clear sky,39.99,33.01,64
220,98118,Seattle,Clear,clear sky,39.99,32,64
221,98368,Everett,Clouds,broken clouds,37.99,32,76
222,99504,Anchorage,Rain,moderate rain,37,33.01,92


#### Merge weather data with Auto insurance data

In [71]:
# Combine the data into a single dataset
Auto_data_weather_v1_df = pd.merge(auto_data_df, weather_df, how='left', on='Zip_Code')
Auto_data_weather_v1a_df = Auto_data_weather_v1_df.rename(columns={'City_x':'City'})
Auto_data_weather_v1a_df = Auto_data_weather_v1a_df.dropna(how='any')
Auto_data_weather_v1a_df

Unnamed: 0,State,Zip_Code,City,Average_Auto_Insurance_Rate,City_y,Weather_Forecast,Weather_Description,Max_Temperature,Min_Temperature,Humidity
0,Massachusetts,02119,Roxbury,"$2,943",Boston,Clouds,overcast clouds,48.99,43,57
1,Massachusetts,02119,Roxbury,"$2,943",Boston,Clouds,overcast clouds,48.99,43,57
2,Massachusetts,02119,Boston,"$5,333.79",Boston,Clouds,overcast clouds,48.99,43,57
3,Massachusetts,02119,Boston,"$5,333.79",Boston,Clouds,overcast clouds,48.99,43,57
4,Massachusetts,02554,Nantucket,"$2,378.90",Fall River,Clouds,overcast clouds,50,48,61
...,...,...,...,...,...,...,...,...,...,...
321,Washington,98368,Port Townsend,"$2,461.54",Everett,Clouds,broken clouds,37.99,32,76
322,Alaska,99504,Anchorage,"$1,877",Anchorage,Rain,moderate rain,37,33.01,92
323,Alaska,99504,Anchorage,"$1,877",Anchorage,Rain,moderate rain,37,33.01,92
324,Alaska,99504,Anchorage,"$3,132.74",Anchorage,Rain,moderate rain,37,33.01,92


#### Outputing data

In [72]:
# Output weather dataset
output_file = os.path.join("Resources", "Weather_Data_v1.csv")
weather_df.to_csv(output_file, index = False)

# Output merged dataset
output_file = os.path.join("Resources", "Average_Auto_Insurance_Weather_Data_v1.csv")
Auto_data_weather_v1a_df.to_csv(output_file, index = False)