# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
# len(cities)
cities

['port-gentil',
 'flinders',
 'rikitea',
 'uchkent',
 'norman wells',
 'chapais',
 'port alfred',
 'fort nelson',
 'east london',
 'miraflores',
 'bluff',
 'punta arenas',
 'sur',
 'marang',
 'kahului',
 'atbasar',
 'busselton',
 'mamallapuram',
 'hermanus',
 'saint-georges',
 'hofn',
 'bilma',
 'kapaa',
 'brae',
 'mataura',
 'camabatela',
 'atuona',
 'barrow',
 'taolanaro',
 'souillac',
 'thompson',
 'port lincoln',
 'ribeira grande',
 'longyearbyen',
 'dembi dolo',
 'hilo',
 'saleaula',
 'worthington',
 'saint-philippe',
 'mount gambier',
 'broome',
 'hami',
 'bambous virieux',
 'samarai',
 'chernyy yar',
 'na wa',
 'new norfolk',
 'katsuura',
 'mar del plata',
 'basco',
 'vila velha',
 'provideniya',
 'jega',
 'petropavlovsk-kamchatskiy',
 'pangai',
 'del rio',
 'glenwood springs',
 'soto la marina',
 'verkhnyaya inta',
 'faanui',
 'karauzyak',
 'vestmannaeyjar',
 'portland',
 'slave lake',
 'katangli',
 'vaitupu',
 'comodoro rivadavia',
 'yellowknife',
 'cravo norte',
 'elko',
 'hi

In [3]:
# My Code

# city_coordinates = pd.DataFrame(cities, lat_lng)

cities_df = pd.DataFrame(cities)
cities_df.columns = ['City']
# cities_df['City'].astype(str)
# cities_df['City'].dtype
# cities_df.head()
cities_df["City"][0]


'port-gentil'

In [4]:
# Define variables and request url

url = f'https://api.openweathermap.org/data/2.5/weather?q={cities_df["City"][0]}&appid={weather_api_key}&units=imperial'
results = requests.get(url).json()
print(results)
# print(json.dumps(results, indent=4, sort_keys=True))


{'coord': {'lon': 8.7815, 'lat': -0.7193}, 'weather': [{'id': 501, 'main': 'Rain', 'description': 'moderate rain', 'icon': '10n'}], 'base': 'stations', 'main': {'temp': 77.76, 'feels_like': 79.27, 'temp_min': 77.76, 'temp_max': 77.76, 'pressure': 1011, 'humidity': 86, 'sea_level': 1011, 'grnd_level': 1010}, 'visibility': 5738, 'wind': {'speed': 14.18, 'deg': 170, 'gust': 16.58}, 'rain': {'1h': 2.87}, 'clouds': {'all': 100}, 'dt': 1668918493, 'sys': {'country': 'GA', 'sunrise': 1668920764, 'sunset': 1668964510}, 'timezone': 3600, 'id': 2396518, 'name': 'Port-Gentil', 'cod': 200}


In [None]:
test = results["wind"]['speed']
test

In [5]:
# cities_df['Lat'] = ""
# cities_df['Lng'] = ""
# cities_df['Max_Temp'] = ""
# cities_df['Humidity'] = ""
# cities_df['Cloudiness'] = ""
# cities_df['Wind_Speed'] = ""
# cities_df['Country'] = ""
# cities_df['Date'] = ""
cities_df.head()


Unnamed: 0,City
0,port-gentil
1,flinders
2,rikitea
3,uchkent
4,norman wells


In [6]:
# city_list = []
# for x in cities_df:
#     city_list.append(cities_df[x])
# print(city_list)
# cities = []
# for x in 

[0       port-gentil
1          flinders
2           rikitea
3           uchkent
4      norman wells
           ...     
612            arak
613       ketchikan
614         iqaluit
615       maiduguri
616       taoudenni
Name: City, Length: 617, dtype: object]


In [16]:
# url = f'https://api.openweathermap.org/data/2.5/weather?q={cities_df['City'][index]}&appid={weather_api_key}&units=imperial'



Lat = []
Lng = []
Temp = []
Hum = []
Cloud =[]
Wind = []
Country = []
Date = []
city_loop = []

# url = f'https://api.openweathermap.org/data/2.5/weather?q={city_list}&appid={weather_api_key}&units=imperial'
set_count = 1
record_count = 1
# print(city_list)
for i, city_loop in enumerate(cities):
    
    if (i%50 == 0 and i >= 50):
        set_count += 1
        record_count = 0
    
    url = f'https://api.openweathermap.org/data/2.5/weather?q={city_loop}&appid={weather_api_key}&units=imperial'
    print(f'Processing Record {record_count} of set {set_count} | {city_loop}')
    results = requests.get(url).json()
    # results = response.json()
    # print(url)
    record_count += 1
    try:
        Lat.append(results['coord']['lat'])
        Lng.append(results['coord']['lon'])
        Temp.append(results['main']['temp_max'])
        Hum.append(results['main']['humidity'])
        Cloud.append(results['clouds']['all'])
        Wind.append(results['wind']['speed'])
        Country.append(results['sys']['country'])
        Date.append(results['dt'])

    except(KeyError):
        print('City not found. Skipping...')
        pass
    
print("------------")  


[0       port-gentil
1          flinders
2           rikitea
3           uchkent
4      norman wells
           ...     
612            arak
613       ketchikan
614         iqaluit
615       maiduguri
616       taoudenni
Name: City, Length: 617, dtype: object]
Processing Record 1 of set 1 | port-gentil
Processing Record 2 of set 1 | flinders
Processing Record 3 of set 1 | rikitea
Processing Record 4 of set 1 | uchkent
Processing Record 5 of set 1 | norman wells
Processing Record 6 of set 1 | chapais
Processing Record 7 of set 1 | port alfred
Processing Record 8 of set 1 | fort nelson
Processing Record 9 of set 1 | east london
Processing Record 10 of set 1 | miraflores
Processing Record 11 of set 1 | bluff
Processing Record 12 of set 1 | punta arenas
Processing Record 13 of set 1 | sur
Processing Record 14 of set 1 | marang
Processing Record 15 of set 1 | kahului
Processing Record 16 of set 1 | atbasar
Processing Record 17 of set 1 | busselton
Processing Record 18 of set 1 | mamallapura

SSLError: HTTPSConnectionPool(host='api.openweathermap.org', port=443): Max retries exceeded with url: /data/2.5/weather?q=nizhneudinsk&appid=55249fdd31523ceb5650c30248c397b8&units=imperial (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1129)')))

[1668919732,
 1668919732,
 1668919733,
 1668919734,
 1668919734,
 1668919735,
 1668919735,
 1668919526,
 1668919736,
 1668919737,
 1668919738,
 1668919646,
 1668919738,
 1668919739,
 1668919574,
 1668919740,
 1668919741,
 1668919741,
 1668919742,
 1668919743,
 1668919743,
 1668919744,
 1668919745,
 1668919746,
 1668919746,
 1668919747,
 1668919748,
 1668919748,
 1668919749,
 1668919750,
 1668919751,
 1668919751,
 1668919752,
 1668919753,
 1668919753,
 1668919755,
 1668919755,
 1668919756,
 1668919757,
 1668919757,
 1668919758,
 1668919759,
 1668919760,
 1668919760,
 1668919761,
 1668919762,
 1668919718,
 1668919763,
 1668919763,
 1668919764,
 1668919765,
 1668919525,
 1668919766,
 1668919767,
 1668919768,
 1668919570,
 1668919769,
 1668919769,
 1668919771,
 1668919664,
 1668919531,
 1668919773,
 1668919774,
 1668919774,
 1668919775,
 1668919776,
 1668919507,
 1668919777,
 1668919777,
 1668919778,
 1668919779,
 1668919779,
 1668919780,
 1668919781,
 1668919781,
 1668919782,
 1668919575,

In [None]:

# for index, row in cities_df.iterrows():

#     current_city = row['City']

#     print(f'Processing Record {index} of Set {row} | {current_city}')
    
#     # current_city = row['City']
#     # results = requests.get(url).json()

#     print(results['name'])
    # response

    # # results = response
    # try:
        
        

    #     cities_df.loc[index, 'Lat'] = results['coord']['lat']
    #     cities_df.loc[index, 'Lng'] = results['coord']['lon']
    #     cities_df.loc[index, 'Max_Temp'] = results['main']['temp_max']
    #     cities_df.loc[index, 'Humidity'] = results['main']['humidity']
    #     cities_df.loc[index, 'Cloudiness'] = results['clouds']['all']
    #     cities_df.loc[index, 'Wind_speed'] = results['wind']['speed']
    #     cities_df.loc[index, 'Country'] = results['sys']['country']
    #     cities_df.loc[index, 'Date'] = results['dt']

    # except (KeyError, IndexError):
    #     print('City not found. Skipping...')
        
    # print("------------")

In [None]:
cities_df.head()

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [28]:
# url = f'https://api.openweathermap.org/data/2.5/weather?q={(cities_df['City'])}&appid={weather_api_key}&units=imperial'

# for index, row in cities_df.iterrows():

#     current_city = row['City']
#     print(f'Processing Record {index} of Set {row} | {current_city}')
#     response = requests.get(url).json()
#     results = response['results']
    
#     try:
        
        

#         cities_df.loc[index, 'Lat'] = results['coord']['lat']
#         cities_df.loc[index, 'Lng'] = results['coord']['lon']
#         cities_df.loc[index, 'Max_Temp'] = results['main']['temp_max']
#         cities_df.loc[index, 'Humidity'] = results['main']['humidity']
#         cities_df.loc[index, 'Cloudiness'] = results['clouds']['all']
#         cities_df.loc[index, 'Wind_speed'] = results['wind']['speed']
#         cities_df.loc[index, 'Country'] = results['sys']['country']
#         cities_df.loc[index, 'Date'] = results['dt']

#     except (KeyError):
        
#         print('City not found. Skipping...')
cities.dtype


AttributeError: 'list' object has no attribute 'dtype'

In [37]:
zipped = list(zip(cities, Cloud, Country, Date, Hum, Lat, Lng, Temp, Wind))
cities_df_new = pd.DataFrame(zipped, columns= ['City', 'Cloudiness','Country','Date','Humidity','Lat','Lng','Max Temp','Wind Speed'])


# cities_df_new = pd.DataFrame([cities,
#                               Cloud,
#                               Country,
#                               Date,
#                               Hum,
#                               Lat, 
#                               Lng,
#                               Temp,
#                               Wind])

# cities_df_new = pd.DataFrame(['City', cities,
#                               'Cloudiness', Cloud,
#                               'Country', Country,
#                               'Date', Date,
#                               'Humidity', Hum,
#                               'Lat', Lat, 
#                               'Lng', Lng,
#                               'Max Temp', Temp,
#                               'Wind Speed', Wind])
cities_df_new

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,port-gentil,100,GA,1668919732,85,-0.7193,8.7815,77.92,14.27
1,flinders,0,AU,1668919732,40,-34.5833,150.8552,78.19,18.01
2,rikitea,100,PF,1668919733,67,-23.1203,-134.9692,71.87,9.06
3,uchkent,95,RU,1668919734,62,43.1143,47.0826,53.69,3.31
4,norman wells,75,CA,1668919734,93,65.2820,-126.8329,24.84,9.22
...,...,...,...,...,...,...,...,...,...
477,shimoda,0,US,1668919979,57,37.3394,-121.8950,55.90,4.00
478,lampazos de naranjo,0,US,1668920042,68,37.2001,-94.3502,25.56,2.59
479,celestun,12,MN,1668920028,78,47.9077,106.8832,-0.27,4.47
480,ayacucho,30,BR,1668920043,87,-10.4056,-36.4344,75.47,8.30


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [38]:
cities_df.to_csv('cities_test.csv')
cities_df_new.to_csv('new_cities_df')

In [None]:
path = 'output_data/cities.csv'
city_test = pd.read_csv(path, encoding='utf-8')
# city_test = pd.DataFrame('/output_data/cities.csv')
city_test.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
for humid in city_test:
    number = city_test.loc[city_test['Humidity'] > 100].index
city_test.count()


In [None]:
#  Get the indices of cities that have humidity over 100%.
cities_clean_data = city_test.loc[city_test['City_ID'] != 129]
cities_clean_data.head()

# cities_clean.to_csv('cities_clean.csv')

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


In [None]:
cities_clean_data.sort_values(['Lat'])


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
plt.scatter(cities_clean_data['Lat'], cities_clean_data['Max Temp'])
plt.title('City Latitude vs. Max Temperature(Date)')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.grid()
plt.show()

## Latitude vs. Humidity Plot

In [None]:
plt.scatter(cities_clean_data['Lat'], cities_clean_data['Humidity'])
plt.title('City Latitude vs. Humidity (Date)')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.grid()
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(cities_clean_data['Lat'], cities_clean_data['Cloudiness'])
plt.title('City Latitude vs. Cloudiness (Date)')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.grid()
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(cities_clean_data['Lat'], cities_clean_data['Wind Speed'])
plt.title('City Latitude vs. Wind Speed (Date)')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.grid()
plt.show()

## Linear Regression

In [None]:
north_hem = cities_clean_data.loc[cities_clean_data['Lat'] > 0]
south_hem = cities_clean_data.loc[cities_clean_data['Lat'] < 0]
north_hem.head()



####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
slope, inter, rvalue, pvalue, stderr = linregress(north_hem['Lat'], north_hem['Max Temp'])
regress_values = north_hem['Lat'] * slope + inter
line_eq = f'y = {str(round(slope,2))}x + {str(round(inter,2))}'
plt.scatter(north_hem['Lat'], north_hem['Max Temp'])
plt.plot(north_hem['Lat'],regress_values,"r-")
plt.annotate(line_eq, (21, 15), fontsize=12, color="red")
plt.title('Northern Hemisphere - Max Temp vs. Latitude Linear Regression')
plt.legend(['Max Temperature'], loc='upper right')
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.savefig('scatter_reg.png')
plt.show()
print(f'The r-value is {round(rvalue,2)}')

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
slope, inter, rvalue, pvalue, stderr = linregress(south_hem['Lat'], south_hem['Max Temp'])
regress_values = south_hem['Lat'] * slope + inter
line_eq = f'y = {str(round(slope,2))}x + {str(round(inter,2))}'
plt.scatter(south_hem['Lat'], south_hem['Max Temp'])
plt.plot(south_hem['Lat'],regress_values,"r-")
plt.annotate(line_eq, (-28, 40), fontsize=12, color="red")
plt.title('Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
plt.legend(['Max Temperature'], loc='upper right')
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.show()
print(f'The r-value is {round(rvalue,2)}')

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
slope, inter, rvalue, pvalue, stderr = linregress(north_hem['Lat'], north_hem['Humidity'])
regress_values = north_hem['Lat'] * slope + inter
line_eq = f'y = {str(round(slope,2))}x + {str(round(inter,2))}'
plt.scatter(north_hem['Lat'], north_hem['Humidity'])
plt.plot(north_hem['Lat'],regress_values,"r-")
plt.annotate(line_eq, (50, 5), fontsize=12, color="red")
plt.title('Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
plt.legend(['Humidity'], loc='upper right')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.savefig('scatter_reg.png')
plt.show()
print(f'The r-value is {round(rvalue,2)}')

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
slope, inter, rvalue, pvalue, stderr = linregress(south_hem['Lat'], south_hem['Humidity'])
regress_values = south_hem['Lat'] * slope + inter
line_eq = f'y = {str(round(slope,2))}x + {str(round(inter,2))}'
plt.scatter(south_hem['Lat'], south_hem['Humidity'])
plt.plot(south_hem['Lat'],regress_values,"r-")
plt.annotate(line_eq, (-50, 20), fontsize=12, color="red")
plt.title('Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
plt.legend(['Humidity (%)'], loc='upper right')
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.show()
print(f'The r-value is {round(rvalue,2)}')

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
slope, inter, rvalue, pvalue, stderr = linregress(north_hem['Lat'], north_hem['Cloudiness'])
regress_values = north_hem['Lat'] * slope + inter
line_eq = f'y = {str(round(slope,2))}x + {str(round(inter,2))}'
plt.scatter(north_hem['Lat'], north_hem['Cloudiness'])
plt.plot(north_hem['Lat'],regress_values,"r-")
plt.annotate(line_eq, (0, 25), fontsize=12, color="red")
plt.title('Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
plt.legend(['Cloudiness (%)'], loc='upper right')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.savefig('scatter_reg.png')
plt.show()
print(f'The r-value is {round(rvalue,2)}')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
slope, inter, rvalue, pvalue, stderr = linregress(south_hem['Lat'], south_hem['Cloudiness'])
regress_values = south_hem['Lat'] * slope + inter
line_eq = f'y = {str(round(slope,2))}x + {str(round(inter,2))}'
plt.scatter(south_hem['Lat'], south_hem['Cloudiness'])
plt.plot(south_hem['Lat'],regress_values,"r-")
plt.annotate(line_eq, (-50, 24), fontsize=12, color="red")
plt.title('Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
plt.legend(['Cloudiness (%)'], loc='upper right')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.show()
print(f'The r-value is {round(rvalue,2)}')

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
slope, inter, rvalue, pvalue, stderr = linregress(north_hem['Lat'], north_hem['Wind Speed'])
regress_values = north_hem['Lat'] * slope + inter
line_eq = f'y = {str(round(slope,2))}x + {str(round(inter,2))}'
plt.scatter(north_hem['Lat'], north_hem['Wind Speed'])
plt.plot(north_hem['Lat'],regress_values,"r-")
plt.annotate(line_eq, (0, 28), fontsize=12, color="red")
plt.title('Northern Hemisphere - Max Temp vs. Latitude Linear Regression')
plt.legend(['Wind Speed (mph)'], loc='upper right')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.savefig('scatter_reg.png')
plt.show()
print(f'The r-value is {round(rvalue,2)}')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
slope, inter, rvalue, pvalue, stderr = linregress(south_hem['Lat'], south_hem['Wind Speed'])
regress_values = south_hem['Lat'] * slope + inter
line_eq = f'y = {str(round(slope,2))}x + {str(round(inter,2))}'
plt.scatter(south_hem['Lat'], south_hem['Wind Speed'])
plt.plot(south_hem['Lat'],regress_values,"r-")
plt.annotate(line_eq, (-28, 40), fontsize=12, color="red")
plt.title('Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression')
plt.legend(['Wind Speed (mph)'], loc='upper right')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.show()
print(f'The r-value is {round(rvalue,2)}')