In [5]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from citipy import citipy
import requests
import json
import time
from datetime import datetime
from config import weather_api_key

In [6]:
# Create a set of random latitude and longitude combinations.
lats = np.random.uniform(-90, 90, size=1500)
lngs = np.random.uniform(-180, 180, size=1500)

# we'll pack the latitudes (lats) and longitudes (lngs) as pairs by zipping them (lat_lngs) with the zip() function.
lats_lngs = zip(lats,lngs)
lats_lngs

<zip at 0x240520df288>

In [7]:
# Add the latitudes and longitudes to a list.
coordinates = list(lats_lngs)


In [8]:
# Use the print() function to display the latitude and longitude combinations.
# Using CitiPy module, get the nearest city name and country code for the list of lats_lngs.
# First, create a list to hold the cities.
cities = []

for coordinate in coordinates:

    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    # If the city is unique, then we will add it to the cities list
    if city not in cities:
        cities.append(city)
        
print(len(cities))

599


In [9]:
# URL to make the API call and get the weather data.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [None]:
# Create an empty list to hold the weather data
city_data = []

# Print the beginning of the logging
print("Beginning Data Retrieval")
print("------------------------")

# Create counters
record_count = 1
set_Count = 1

# Loop through all the cities in our list.
for i , city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_Count += 1
        record_count = 1
        time.sleep(60)

     # Create endpoint URL with each city.
    city_url = url +"&q=" + city.replace(" ","+")
    
    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_Count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    
    # Exception Handling
    try:
        response = requests.get(city_url)
        city_weather = response.json()
       
         # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime("%Y-%m-%d %H:%M:%S")
  
        # Append the city information into city_data list.
        city_data.append({"City":city.title(),
                             "Latitude":city_lat,
                             "Longitude":city_lng,
                             "Max Temp":city_max_temp,
                             "Humidity":city_humidity,
                             "Clouds":city_clouds,
                             "Wind":city_wind,
                             "Country":city_country,
                             "Date":city_date})
    except:
        print("City not found. Skipping...")
        pass
        
        
# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")
       

Beginning Data Retrieval
------------------------
Processing Record 1 of Set 1 | mataura
Processing Record 2 of Set 1 | dikson
Processing Record 3 of Set 1 | kenai
Processing Record 4 of Set 1 | fort nelson
Processing Record 5 of Set 1 | chapais
Processing Record 6 of Set 1 | nampula
Processing Record 7 of Set 1 | busselton
Processing Record 8 of Set 1 | rikitea
Processing Record 9 of Set 1 | albany
Processing Record 10 of Set 1 | hasaki
Processing Record 11 of Set 1 | mar del plata
Processing Record 12 of Set 1 | iwanai
Processing Record 13 of Set 1 | taolanaro
City not found. Skipping...
Processing Record 14 of Set 1 | artyom
Processing Record 15 of Set 1 | san jose
Processing Record 16 of Set 1 | bethel
Processing Record 17 of Set 1 | vaini
Processing Record 18 of Set 1 | nikel
Processing Record 19 of Set 1 | mahebourg
Processing Record 20 of Set 1 | bredasdorp
Processing Record 21 of Set 1 | laguna
Processing Record 22 of Set 1 | nikolskoye
Processing Record 23 of Set 1 | victoria


Processing Record 38 of Set 4 | margate
Processing Record 39 of Set 4 | dukat
Processing Record 40 of Set 4 | paraopeba
Processing Record 41 of Set 4 | kobojango
City not found. Skipping...
Processing Record 42 of Set 4 | tacuati
Processing Record 43 of Set 4 | vardo
Processing Record 44 of Set 4 | denpasar
Processing Record 45 of Set 4 | sao filipe
Processing Record 46 of Set 4 | buqayq
City not found. Skipping...
Processing Record 47 of Set 4 | adrar
Processing Record 48 of Set 4 | salalah
Processing Record 49 of Set 4 | alta floresta
Processing Record 50 of Set 4 | krasnofarfornyy
Processing Record 1 of Set 5 | atsiki
City not found. Skipping...
Processing Record 2 of Set 5 | donegal
Processing Record 3 of Set 5 | sinnamary
Processing Record 4 of Set 5 | arraial do cabo
Processing Record 5 of Set 5 | isangel
Processing Record 6 of Set 5 | yarada
Processing Record 7 of Set 5 | georgetown
Processing Record 8 of Set 5 | jumla
Processing Record 9 of Set 5 | norman wells
Processing Recor

In [None]:
len(city_data)

In [None]:
# Convert the array of dictionaries into a DataFrame
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [None]:
# we'll reorder the columns as City, Country, Date, Lat, Lng, Max Temp, Humidity, Cloudiness, and Wind Speed
new_columns_list = ["City","Country","Date","Latitude","Longitude","Max Temp","Humidity","Clouds","Wind"]

city_data_df = city_data_df[new_columns_list]
city_data_df.columns

In [None]:
# Display the DataFrame
city_data_df.head(10)

In [None]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"

city_data_df.to_csv(output_data_file, index_label="City_ID")

## We'll create scatter plots for latitude vs. maximum temperature, humidity, cloudiness, and wind speed. 

In [None]:
# Extract relevant fields from the DataFrame for plotting.
city_data_df.columns