In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pprint
import requests
import time
import json
from scipy.stats import linregress
import scipy.stats as st

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

In [None]:
# Import API key
from api_keys import weather_api_key

In [None]:
# Output File (CSV)
output_data_file = "output_data/cities.csv"

In [None]:
# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

In [None]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "Imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

In [None]:
view = requests.get(query_url + "china").json()
print(view)
#print(json.dumps(view,indent=4, sort_keys=True))

In [None]:
#So far I have a list of cites and an access to the api. I'm not sure if I have lat stored but i can easily collect it
#I also need Long, Temperature, Humidity, Cloudiness, and Windspeed, country, and time so i need to make empty lists for them
city_lst = []
lat_lst = []
lon_lst = []
temp_lst = []
hum_lst = []
cc_lst = []
ws_lst = []
con_lst = []
date_lst = []
count = 1

#Next I am going to loop through all the cities in my originalk list, but not all of them will have data, and will return and error
#To solve this I will make a new list of cities that only gets populated if there is data
for city in cities:
    response = requests.get(query_url + city).json()
    try:
        lat_lst.append(response['coord']['lat'])
        lon_lst.append(response['coord']['lon'])
        temp_lst.append(response['main']['temp'])
        hum_lst.append(response['main']['humidity'])
        cc_lst.append(response['clouds']['all'])
        ws_lst.append(response['wind']['speed'])
        con_lst.append(response['sys']['country']) 
        date_lst.append(response['dt']) 
        city_lst.append(city)
        print(f"{city}, #{count} of {len(cities)} has data and it has been added to the data base")
    except KeyError:
        print(f"{city}, #{count} of {len(cities)} was not found. Skipping")
    count +=1

In [None]:
#Now That I haqve the lists I can add them to a dataframe, first I need a dictionary
weather_dict = {
    'city': city_lst,
    'lat' : lat_lst,
    'lon' : lon_lst,
    'temp': temp_lst,
    'humidity' : hum_lst,
    'cloud_cover' : cc_lst,
    'wind_speed' : ws_lst,
    'country' : con_lst,
    'date' : date_lst
}

#Then to make the dataframe
weather_data = pd.DataFrame(weather_dict)

#to view the top rows, making sure everything is fine
weather_data.head()

In [None]:
#Saving the dataframe as a csv
weather_data.to_csv('weather_data.csv')

In [None]:
weather_data.describe()

In [None]:
#I need to split the dataframe into 2 hemisphers
north_df = weather_data[weather_data['lat']>=0]
south_df = weather_data[weather_data['lat']<0]

In [None]:
#Now to make graphs I am going to create a function
def make_me_a_graph(x,y,dataframe,head_title,regress=False):
    #First i need to take the strings that are passed to create variables based on those columns
    x_axis = dataframe[x]
    y_axis = dataframe[y]
    
    #Next to make the basic scatter plot with proper labels and a grid background
    fig, (ax1) = plt.subplots(1,1,figsize=(10,10))
    ax1.scatter(x_axis,y_axis)
    ax1.set(xlabel=x,ylabel=y,title=head_title)
    ax1.grid(linestyle = '-.')
    
    #Using a conditional i can add a regression line. Probably could have put this into another function and called it but w/e
    if regress == True:
        slope, intercept, r_value, p_value, std_err = st.linregress(x_axis,y_axis)
        nl = '\n'
        eq = f"y={round(slope,2)}*x + {round(intercept,2)}"
        ax1.plot(x_axis, slope*x_axis + intercept, color = 'r', label=f"R-value = {r_value}{nl} {eq}")
        ax1.legend(loc='best')
    
    #Finally I want to save the fig file as the title i pass
    fig.savefig(f"images/{head_title}.png")

In [None]:
make_me_a_graph("lat","temp",weather_data,"Temperature (F) vs. Latitude")
#The first thing i notice is there are more points the more positive the latitude, which makes sense
#More of the land mass in is the northern hemisphere. 
#Second, the temperature does appear to be latitude dependent but hits its peak around 20.
#This is probably a result of the oceanic currents along with the fact that is is spring time in the north, fall in the south

In [None]:
make_me_a_graph("lat","humidity",weather_data,"Humidity (%) vs. Latitude")
#Humidity appears to spike downward between +/- 20-40

In [None]:
make_me_a_graph("lat","cloud_cover",weather_data,"Cloudiness (%) vs. Latitude")
#There does not appear to be much of a correlation between cloudcover and latitude
#However, there are congrgations around 100, 90, 75, 40, 20, and 0. 
#This leads me to think that some fo the data collection is some person making a guestimate. 

In [None]:
make_me_a_graph("lat","wind_speed",weather_data,"Wind Speed (mph) vs. Latitude")
#There are a few outliers in this group that seem safe to ignore
#There is a slight correlation between higher windspeeds further from the equator. 

In [None]:
make_me_a_graph("lat","temp",north_df,"Northern Hemisphere - Temperature (F) vs. Latitude",regress=True)
#There is certainly a decent amout of correlation based on an r value
#as expected, the further from the equator we get, the cooler the temperatures

In [None]:
make_me_a_graph("lat","humidity",north_df,"Northern Hemisphere - Humidity (%) vs. Latitude",regress=True)
#The R value supports the claim that there is very little correcation between humidity and latitude
#It should be noted that the areas of lowest humidity are between 20 and 40

In [None]:
make_me_a_graph("lat","cloud_cover",north_df,"Northern Hemisphere - Cloudiness (%) vs. Latitude",regress=True)
#There is basically 0 correlation between cloud cover and latitude

In [None]:
make_me_a_graph("lat","wind_speed",north_df,"Northern Hemisphere - Wind Speed (mph) vs. Latitude",regress=True)
#There is basically 0 correlation between windspeed and latitude

In [None]:
make_me_a_graph("lat","temp",south_df,"Southern Hemisphere - Temperature (F) vs. Latitude",regress=True)
#There is certainly a decent amout of correlation based on the r value.
#as expected, the further from the equator we get, the cooler the temperatures

In [None]:
make_me_a_graph("lat","humidity",south_df,"Southern Hemisphere - Humidity (%) vs. Latitude",regress=True)
#The R value supports the claim that there is very little correcation between humidity and latitude
#It should be noted that the areas of lowest humidity are between -20 and -40

In [None]:
make_me_a_graph("lat","cloud_cover",south_df,"Southern Hemisphere - Cloudiness (%) vs. Latitude",regress=True)
#There is basically 0 correlation between cloud cover and latitude

In [None]:
make_me_a_graph("lat","wind_speed",south_df,"Southern Hemisphere - Wind Speed (mph) vs. Latitude",regress=True)
#There is basically 0 correlation between cloud cover and latitude