# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# set up lists to hold reponse info
compcities = []
lat = []
lng = []
tempmax = []
humidity = []
clouds = []
winds = []
countries = []
dates = []
count = 1

# Loop through the list of cities and perform a request for data on each
print('Data Retrieval In Process')
print('------------------------------')
for city in cities:
    response = requests.get(query_url + city).json()
    try:
        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        tempmax.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        clouds.append(response['clouds']['all'])
        winds.append(response['wind']['speed'])
        countries.append(response['sys']['country'])
        dates.append(response['dt'])
        compcities.append(city)
        print(f"Retrieving City #{count}: {city}.")
        count = count + 1
    except:
        print(f"City {city} not found. Skipping.")
        pass
print('------------------------------')
print('Data Retrieved')
print('------------------------------')

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
weather_df = pd.DataFrame({'City':compcities,"Lat":lat,
                         "Lng":lng,"Max Temp":tempmax,
                         "Humidity":humidity,"Cloudiness":clouds,
                         "Wind Speed":winds,"Country":countries,
                         "Date":dates})
weather_df.to_csv('citydata.csv',index=False)
weather_df.head()

In [None]:
weather_summary_df = weather_df.describe()
weather_summary_df

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Check if there are cities with humidity of more than 100%
print("Analyzing data for humidity values over 100%")
print('-----------------------------------------')
humidvals = weather_df['Humidity'].to_list()
humidcount = 0
for value in humidvals:
    if value > 100:
        humidcount == humidcount + 1
if humidcount == 0:
    print("There are no cities with more than 100% humidity, sorry!")
    print('-----------------------------------------')
    print("Analysis done")
    print('-----------------------------------------')
else:
    print(f"There are {humidcount} cities with more than 100% humidity")
    print('-----------------------------------------')
    print("Analysis done")
    print('-----------------------------------------')

In [None]:
#  Get the indices of cities that have humidity over 100%.
if humidcount == 0:
    print("Again, there are no cities with more than 100% humidity, sorry!")
else:
    humid_weather_df = weather_df[weather_df['Track Name'] > 100]
    humidindexes = list(humid_weather_df.index.values)
    humidindexes

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
if humidcount == 0:
    print("Again, there are no cities with more than 100% humidity, sorry!")
    clean_city_data = weather_df
else:
    clean_city_data = weather_df.drop(humidindexes)
clean_city_data.head()

In [None]:
# Obtain dates in correct format
clean_city_data = clean_city_data.rename(columns={"Date":"Epoch Time"})
tstamps = clean_city_data['Epoch Time'].to_list()
for tstamp in tstamps:
    clean_city_data['Timestamp'] = pd.to_datetime(tstamp, unit='s')
    clean_city_data['Date'] = pd.to_datetime(clean_city_data['Timestamp']).dt.date
clean_city_data

In [None]:
# Save date value
retrievedate = str(clean_city_data['Date'][0])
retrievedate

## Latitude vs. Temperature Plot

In [None]:
lattempscatter = clean_city_data[['City','Lat','Max Temp']]
plt.scatter(lattempscatter['Lat'],lattempscatter['Max Temp'],c='orchid',edgecolors='black')
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (Farenheit)")
plt.title(f'City Comparison: Latitude vs. Max Temperature ({retrievedate})', y=1.05)
plt.show()

## Latitude vs. Humidity Plot

In [None]:
humtempscatter = clean_city_data[['City','Lat','Humidity']]
plt.scatter(humtempscatter['Lat'],humtempscatter['Humidity'],
            c='paleturquoise',edgecolors='black')
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title(f'City Comparison: Latitude vs. Humidity ({retrievedate})', y=1.05)
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
clotempscatter = clean_city_data[['City','Lat','Cloudiness']]
plt.scatter(clotempscatter['Lat'],clotempscatter['Cloudiness'],
            c='mistyrose',edgecolors='black')
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title(f'City Comparison: Latitude vs. Cloudiness ({retrievedate})', y=1.05)
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
wsptempscatter = clean_city_data[['City','Lat','Wind Speed']]
plt.scatter(wsptempscatter['Lat'],wsptempscatter['Wind Speed'],
            c='skyblue',edgecolors='black')
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title(f'City Comparison: Latitude vs. Wind Speed ({retrievedate})', y=1.05)
plt.show()

## Linear Regression

In [None]:
# Get northern hemisphere dataframe
north_df = clean_city_data[clean_city_data['Lat'] >= 0]
# Get southern hemisphere dataframe
south_df = clean_city_data[clean_city_data['Lat'] < 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = north_df['Lat']
y_values = north_df['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values,c='orchid',edgecolors='magenta')
plt.plot(x_values,regress_values,"black")
plt.annotate(line_eq,(5,15),fontsize=12,color="black")
plt.annotate(f'R Value: {(rvalue**2).round(5)}',(7,21),fontsize=12,color="magenta")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (Farenheit)")
plt.title(f'Northern Hemisphere Comparison: Latitude vs. Max Temperature ({retrievedate})', y=1.05)
plt.show()
print(f'R Value: {(rvalue**2).round(5)}')

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = south_df['Lat']
y_values = south_df['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values,c='orchid',edgecolors='black')
plt.plot(x_values,regress_values,"black")
plt.annotate(line_eq,(-20,42),fontsize=12,color="magenta")
plt.annotate(f'R Value: {(rvalue**2).round(5)}',(-18.5,45),fontsize=12,color="magenta")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (Farenheit)")
plt.title(f'Southern Hemisphere Comparison: Latitude vs. Max Temperature ({retrievedate})', y=1.05)
plt.show()
print(f'R Value: {(rvalue**2).round(5)}')

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = north_df['Lat']
y_values = north_df['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values,c='paleturquoise',edgecolors='black')
plt.plot(x_values,regress_values,"black")
plt.annotate(line_eq,(50,12),fontsize=12,color="teal")
plt.annotate(f'R Value: {(rvalue**2).round(5)}',(53,17),fontsize=12,color="teal")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title(f'Northern Hemisphere Comparison: Latitude vs. Humidity ({retrievedate})', y=1.05)
plt.show()
print(f'R Value: {(rvalue**2).round(5)}')

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = south_df['Lat']
y_values = south_df['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values,c='paleturquoise',edgecolors='black')
plt.plot(x_values,regress_values,"black")
plt.annotate(line_eq,(-18,30),fontsize=12,color="teal")
plt.annotate(f'R Value: {(rvalue**2).round(5)}',(-17,35),fontsize=12,color="teal")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title(f'Southern Hemisphere Comparison: Latitude vs. Humidity ({retrievedate})', y=1.05)
plt.show()
print(f'R Value: {(rvalue**2).round(5)}')

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = north_df['Lat']
y_values = north_df['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values,c='mistyrose',edgecolors='black')
plt.plot(x_values,regress_values,"black")
plt.annotate(line_eq,(53,10),fontsize=12,color="mediumvioletred")
plt.annotate(f'R Value: {(rvalue**2).round(5)}',(54,19),fontsize=12,color="mediumvioletred")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title(f'Northern Hemisphere Comparison: Latitude vs. Cloudiness ({retrievedate})', y=1.05)
plt.show()
print(f'R Value: {(rvalue**2).round(5)}')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = south_df['Lat']
y_values = south_df['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values,c='mistyrose',edgecolors='black')
plt.plot(x_values,regress_values,"black")
plt.annotate(line_eq,(-55,10),fontsize=12,color="mediumvioletred")
plt.annotate(f'R Value: {(rvalue**2).round(5)}',(-52,19),fontsize=12,color="mediumvioletred")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title(f'Southern Hemisphere Comparison: Latitude vs. Cloudiness ({retrievedate})', y=1.05)
plt.show()
print(f'R Value: {(rvalue**2).round(5)}')

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = north_df['Lat']
y_values = north_df['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values,c='skyblue',edgecolors='black')
plt.plot(x_values,regress_values,"black")
plt.annotate(line_eq,(53,23),fontsize=12,color="royalblue")
plt.annotate(f'R Value: {(rvalue**2).round(5)}',(54,25),fontsize=12,color="royalblue")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title(f'Northern Hemisphere Comparison: Latitude vs. Wind Speed ({retrievedate})', y=1.05)
plt.show()
print(f'R Value: {(rvalue**2).round(5)}')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = south_df['Lat']
y_values = south_df['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values,c='skyblue',edgecolors='black')
plt.plot(x_values,regress_values,"black")
plt.annotate(line_eq,(-55,23),fontsize=12,color="royalblue")
plt.annotate(f'R Value: {(rvalue**2).round(5)}',(-52,25),fontsize=12,color="royalblue")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title(f'Southern Hemisphere Comparison: Latitude vs. Wind Speed ({retrievedate})', y=1.05)
plt.show()
print(f'R Value: {(rvalue**2).round(5)}')