In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from citipy import citipy
import requests
import json
import time
from datetime import datetime
from config import weather_api_key

In [2]:
# Create a set of random latitude and longitude combinations.
lats = np.random.uniform(-90, 90, size=1500)
lngs = np.random.uniform(-180, 180, size=1500)

# we'll pack the latitudes (lats) and longitudes (lngs) as pairs by zipping them (lat_lngs) with the zip() function.
lats_lngs = zip(lats,lngs)
lats_lngs

<zip at 0x2187e2ec8c8>

In [3]:
# Add the latitudes and longitudes to a list.
coordinates = list(lats_lngs)


In [4]:
# Use the print() function to display the latitude and longitude combinations.
# Using CitiPy module, get the nearest city name and country code for the list of lats_lngs.
# First, create a list to hold the cities.
cities = []

for coordinate in coordinates:

    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    # If the city is unique, then we will add it to the cities list
    if city not in cities:
        cities.append(city)
        
print(len(cities))

597


In [5]:
# URL to make the API call and get the weather data.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [6]:
# Create an empty list to hold the weather data
city_data = []

# Print the beginning of the logging
print("Beginning Data Retrieval")
print("------------------------")

# Create counters
record_count = 1
set_Count = 1

# Loop through all the cities in our list.
for i , city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_Count += 1
        record_count = 1
        time.sleep(60)

     # Create endpoint URL with each city.
    city_url = url +"&q=" + city.replace(" ","+")
    
    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_Count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    
    # Exception Handling
    try:
        response = requests.get(city_url)
        city_weather = response.json()
       
         # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime("%Y-%m-%d %H:%M:%S")
  
        # Append the city information into city_data list.
        city_data.append({"City":city.title(),
                             "Latitude":city_lat,
                             "Longitude":city_lng,
                             "Max Temp":city_max_temp,
                             "Humidity":city_humidity,
                             "Clouds":city_clouds,
                             "Wind":city_wind,
                             "Country":city_country,
                             "Date":city_date})
    except:
        print("City not found. Skipping...")
        pass
        
        
# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")
       

Beginning Data Retrieval
------------------------
Processing Record 1 of Set 1 | rikitea
Processing Record 2 of Set 1 | raudeberg
Processing Record 3 of Set 1 | tiksi
Processing Record 4 of Set 1 | margate
Processing Record 5 of Set 1 | hermanus
Processing Record 6 of Set 1 | launceston
Processing Record 7 of Set 1 | barrow
Processing Record 8 of Set 1 | bluff
Processing Record 9 of Set 1 | mar del plata
Processing Record 10 of Set 1 | vardo
Processing Record 11 of Set 1 | saint-philippe
Processing Record 12 of Set 1 | upernavik
Processing Record 13 of Set 1 | taolanaro
City not found. Skipping...
Processing Record 14 of Set 1 | ilulissat
Processing Record 15 of Set 1 | ponta do sol
Processing Record 16 of Set 1 | macusani
Processing Record 17 of Set 1 | sakakah
Processing Record 18 of Set 1 | carnarvon
Processing Record 19 of Set 1 | sitka
Processing Record 20 of Set 1 | pangnirtung
Processing Record 21 of Set 1 | ushuaia
Processing Record 22 of Set 1 | yambio
Processing Record 23 of 

Processing Record 37 of Set 4 | cockburn town
Processing Record 38 of Set 4 | lorengau
Processing Record 39 of Set 4 | slave lake
Processing Record 40 of Set 4 | puerto cabezas
Processing Record 41 of Set 4 | ust-kamchatsk
City not found. Skipping...
Processing Record 42 of Set 4 | dezhou
Processing Record 43 of Set 4 | inyonga
Processing Record 44 of Set 4 | narsaq
Processing Record 45 of Set 4 | ribeira grande
Processing Record 46 of Set 4 | chokurdakh
Processing Record 47 of Set 4 | anchorage
Processing Record 48 of Set 4 | zhigansk
Processing Record 49 of Set 4 | kirakira
Processing Record 50 of Set 4 | vastervik
City not found. Skipping...
Processing Record 1 of Set 5 | bima
Processing Record 2 of Set 5 | the valley
Processing Record 3 of Set 5 | mahadday weyne
City not found. Skipping...
Processing Record 4 of Set 5 | marsabit
Processing Record 5 of Set 5 | moerai
Processing Record 6 of Set 5 | hermon
Processing Record 7 of Set 5 | hambantota
Processing Record 8 of Set 5 | mocuba

KeyboardInterrupt: 

In [None]:
len(city_data)

In [None]:
# Convert the array of dictionaries into a DataFrame
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [None]:
# we'll reorder the columns as City, Country, Date, Lat, Lng, Max Temp, Humidity, Cloudiness, and Wind Speed
new_columns_list = ["City","Country","Date","Latitude","Longitude","Max Temp","Humidity","Clouds","Wind"]

city_data_df = city_data_df[new_columns_list]
city_data_df.columns

In [None]:
# Display the DataFrame
city_data_df.head(10)

In [None]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"

city_data_df.to_csv(output_data_file, index_label="City_ID")

## We'll create scatter plots for latitude vs. maximum temperature, humidity, cloudiness, and wind speed. 

In [None]:
# Extract relevant fields from the DataFrame for plotting.
city_data_df.columns

In [None]:
lats = city_data_df["Latitude"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
cloudiness = city_data_df["Clouds"]
wind_speed = city_data_df["Wind"]

## create a scatter plot for the latitude vs. maximum temperature.

In [None]:
# Today
today = time.strftime("%x")

fig, ax = plt.subplots()

ax.scatter(lats,max_temps, label="Cities", edgecolors="k",alpha=0.8)
ax.set_title("City Latitude vs. Max Temperature " + today)
ax.set_xlabel("Latitude")
ax.set_ylabel("Max Temperature (F)")
ax.grid()

# Save the figure.
plt.savefig("weather_data/Fig1.png")

plt.show()


## create a scatter plot that compares the latitude vs. the humidity.

In [None]:
# Today
today = time.strftime("%x")

fig, ax = plt.subplots()

ax.scatter(lats,humidity, label="Cities", edgecolors="k",alpha=0.8)
ax.set_title("City Latitude vs. Humidity " + today)
ax.set_xlabel("Latitude")
ax.set_ylabel("Humidity (%)")
ax.grid()

# Save the figure.
plt.savefig("weather_data/Fig2.png")

plt.show()

## create a scatter plot that compares the latitude vs. the cloudiness.

In [None]:
# Today
today = time.strftime("%x")

fig, ax = plt.subplots()

ax.scatter(lats,cloudiness, label="Cities", edgecolors="k",alpha=0.8)
ax.set_title("City Latitude vs. Cloudiness " + today)
ax.set_xlabel("Latitude")
ax.set_ylabel("Cloudiness (%)")
ax.grid()

# Save the figure.
plt.savefig("weather_data/Fig3.png")

plt.show()

## create a scatter plot that compares the latitude vs. Wind Speed.

In [None]:
# Today
today = time.strftime("%x")

fig, ax = plt.subplots()

ax.scatter(lats,wind_speed, label="Cities", edgecolors="k",alpha=0.8)
ax.set_title("City Latitude vs. Wind Speed " + today)
ax.set_xlabel("Latitude")
ax.set_ylabel("Wind Speed (mph)")
ax.grid()

# Save the figure.
plt.savefig("weather_data/Fig4.png")

plt.show()

### Create a function to create perform linear regression on the weather data and plot a regression line and the equation with the data.

In [None]:
# Import dependencies.
from scipy.stats import linregress

In [None]:
def plot_linear_regression(x_values,y_values,title,ylabel,text_coordinates):
    
    linregress(x_values, y_values)
    
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    
    # Create a line equation
    line_eq = "y=" + str(round(slope,2)) + "x +" + str(round(intercept,2))
    
    # Get a list of regression line values from the slope and intercept
    regress_values = [(x * slope + intercept) for x in x_values]
    
    # Create a scatter plot with x_values and y_values
    plt.scatter(x_values, y_values)
    
    # Plot the regression line in the scatter plot
    plt.plot(x_values, regress_values, "r")
    
    # Annotate the text for the line equation
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    
    # Add labels
    plt.title(title)
    plt.xlabel("Latitude")
    plt.ylabel(ylabel)
    
    plt.show()
    

### Create the Hemisphere DataFrames

In [None]:
city_data_df.head()
city_data_df.loc[(city_data_df["Latitude"] >= 0)].head()
#city_data_df.loc[(city_data_df["Latitude"]>=0).head()]

In [None]:
northern_hemi_df = city_data_df.loc[(city_data_df["Latitude"]>=0)]
southern_hemi_df = city_data_df.loc[(city_data_df["Latitude"]<0)]

### Perform Linear Regression on the Maximum Temperature for the Northern Hemisphere

In [None]:
latitudes = northern_hemi_df["Latitude"]
max_temps = northern_hemi_df["Max Temp"]
title = "Latitudes Vs Maximum Temperatures For Northern Hemisphere"
ylabel = "Maximum Temperature (F)"

# Call function
plot_linear_regression(latitudes, max_temps, title, ylabel, (10,40))

### Perform Linear Regression on the Maximum Temperature for the Northern Hemisphere

In [None]:
latitudes = southern_hemi_df["Latitude"]
max_temps = southern_hemi_df["Max Temp"]
title = 'Linear Regression on the Southern Hemisphere for Maximum Temperature'
ylabel = "Maximum Temperature (F)"

# Call function
plot_linear_regression(latitudes, max_temps, title, ylabel, (-50,90))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Latitude"]
y_values = northern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere for % Humidity', 
                       '% Humidity',(40,10))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Latitude"]
y_values = southern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere for % Humidity', 
                       '% Humidity',(-50,15))


In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Latitude"]
y_values = northern_hemi_df["Clouds"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere for cloudiness', 
                       'Cloudiness',(40,10))


In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Latitude"]
y_values = southern_hemi_df["Clouds"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere for % Cloudiness', '% Cloudiness',(-50,30))


In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Latitude"]
y_values = northern_hemi_df["Wind"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere for Wind Speed', 
                       'Wind Speed',(10,25))


In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Latitude"]
y_values = southern_hemi_df["Wind"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere for Wind Speed', 
                       'Wind Speed',(-50,25))