In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pprint
import requests
import time
import json
from scipy.stats import linregress

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

In [2]:
# Import API key
from api_keys import weather_api_key

In [3]:
# Output File (CSV)
output_data_file = "output_data/cities.csv"

In [4]:
# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [5]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

619

In [6]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "Imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

In [7]:
view = requests.get(query_url + "china").json()
print(view)
#print(json.dumps(view,indent=4, sort_keys=True))

{'coord': {'lon': -99.23, 'lat': 25.7}, 'weather': [{'id': 800, 'main': 'Clear', 'description': 'clear sky', 'icon': '01d'}], 'base': 'stations', 'main': {'temp': 98.11, 'feels_like': 97.05, 'temp_min': 98.11, 'temp_max': 98.11, 'pressure': 1010, 'humidity': 29, 'sea_level': 1010, 'grnd_level': 993}, 'wind': {'speed': 7.94, 'deg': 81}, 'clouds': {'all': 0}, 'dt': 1588708230, 'sys': {'country': 'MX', 'sunrise': 1588679816, 'sunset': 1588727407}, 'timezone': -18000, 'id': 3530839, 'name': 'China', 'cod': 200}


In [13]:
#So far I have a list of cites and an access to the api. I'm not sure if I have lat stored but i can easily collect it
#I also need Long, Temperature, Humidity, Cloudiness, and Windspeed, country, and time so i need to make empty lists for them
city_lst = []
lat_lst = []
lon_lst = []
temp_lst = []
hum_lst = []
cc_lst = []
ws_lst = []
con_lst = []
date_lst = []

#Next I am going to loop through all the cities in my originalk list, but not all of them will have data, and will return and error
#To solve this I will make a new list of cities that only gets populated if there is data
for city in cities:
    response = requests.get(query_url + city).json()
    try:
        lat_lst.append(response['coord']['lat'])
        lon_lst.append(response['coord']['lon'])
        temp_lst.append(response['main']['temp'])
        hum_lst.append(response['main']['humidity'])
        cc_lst.append(response['clouds']['all'])
        ws_lst.append(response['wind']['speed'])
        con_lst.append(response['sys']['country']) 
        date_lst.append(response['dt']) 
        city_lst.append(city)
    except KeyError:
        print(f"{city} was not found")

illoqqortoormiut was not found
belushya guba was not found
ishlei was not found
ardistan was not found
marcona was not found
nizhneyansk was not found
grand river south east was not found
aflu was not found
haibowan was not found
ust-bolsheretsk was not found
taolanaro was not found
solsvik was not found
kegayli was not found
sedelnikovo was not found
mutsamudu was not found
wahran was not found
umzimvubu was not found
barentsburg was not found
kuche was not found
bajo baudo was not found
vaitupu was not found
tsihombe was not found
samusu was not found
duz was not found
cam pha was not found
amderma was not found
saleaula was not found
tumannyy was not found
mys shmidta was not found
paucartambo was not found
sentyabrskiy was not found
addi ugri was not found
sinkat was not found
louisbourg was not found
utiroa was not found
warqla was not found
safwah was not found
fort saint john was not found
tabiauea was not found
halalo was not found
saryshagan was not found
falealupo was not fou

In [15]:
#Now That I haqve the lists I can add them to a dataframe, first I need a dictionary
weather_dict = {
    'city': city_lst,
    'lat' : lat_lst,
    'lon' : lon_lst,
    'temp': temp_lst,
    'humidity' : hum_lst,
    'cloud_cover' : cc_lst,
    'wind_speed' : ws_lst,
    'country' : con_lst,
    'date' : date_lst
}

#Then to make the dataframe
weather_data = pd.DataFrame(weather_dict)

#
weather_data.head()

Unnamed: 0,city,lat,lon,temp,humidity,cloud_cover,wind_speed,country,date
0,barrow,71.29,-156.79,14.0,78,1,11.41,US,1588714222
1,vaini,-21.2,-175.2,78.8,83,75,6.93,TO,1588714222
2,yanam,16.73,82.22,80.37,92,13,1.92,IN,1588714222
3,esperance,-33.87,121.9,57.2,58,90,33.33,AU,1588714222
4,qaanaaq,77.48,-69.36,17.91,88,98,5.57,GL,1588714192


In [17]:
weather_data.describe()

Unnamed: 0,lat,lon,temp,humidity,cloud_cover,wind_speed,date
count,566.0,566.0,566.0,566.0,566.0,566.0,566.0
mean,20.279912,18.776307,62.198799,69.90636,49.772085,8.364735,1588714000.0
std,32.761958,90.183979,18.896526,22.352685,38.342786,6.650517,127.2606
min,-54.8,-179.17,-2.83,1.0,0.0,0.16,1588714000.0
25%,-6.0075,-61.3725,49.4575,57.25,10.0,4.0,1588714000.0
50%,25.425,22.725,63.525,76.0,45.5,6.93,1588714000.0
75%,46.5725,99.7725,78.6375,87.0,90.0,10.81,1588714000.0
max,78.22,179.32,100.87,100.0,100.0,61.07,1588715000.0


In [24]:
#I need to split the dataframe into 2 hemisphers
north_df = weather_data[weather_data['lat']>=0]
south_df = weather_data[weather_data['lat']<0]

In [28]:
#Now to make graphs I am going to create a function
def make_me_a_graph(x,y,dataframe,regress=False):
    x_axis = dataframe[x]
    y_axis = dataframe[y]
    
    fig, (ax1) = plt.subplots(1,1,figsize=(10,10))
    ax1.scatter(x_axis,y_axis,label=x)
    ax1.set(xlabel=x,ylabel=y,title=f"{y} vs {x}")
    ax1.grid(linestyle = '-.')

In [35]:
#make_me_a_graph("lat","temp",weather_data)
#The first thing i notice is there are more points the more positive the latitude, which makes sense
#More of the land mass in is the northern hemisphere. 
#Second, the temperature does appear to be latitude dependent but hits its peak around 20.
#This is probably a result of the oceanic currents along with the fact that is is spring time in the north, fall in the south

In [31]:
#make_me_a_graph("lat","humidity",weather_data)

In [32]:
#make_me_a_graph("lat","cloud_cover",weather_data)

In [33]:
#make_me_a_graph("lat","wind_speed",weather_data)