# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
pip install citipy

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import datetime
now = datetime.datetime.now()

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

610

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
x=1
count=1
city_df=[]
lat=[] 
long=[]
Max_Temp=[]
Humidity=[]
Cloudiness=[]
Wind_Speed=[]
Country=[]
Date=[]

url= f"http://api.openweathermap.org/data/2.5/weather?units=Imperial&appid={weather_api_key}&q="
city_responses= [ ]

print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print("Beginning Data Retrieval")
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

for city in cities:
    if x >= 51: 
        count+= 1
        x=1
    try:
        city_data=requests.get(url + city).json()
        city_df.append(city_data['name'])
        lat.append(city_data['coord']['lat'])
        long.append(city_data['coord']['lon'])
        Max_Temp.append(city_data['main']['temp_max'])
        Humidity.append(city_data['main']["humidity"])
        Cloudiness.append(city_data["clouds"]['all'])
        Wind_Speed.append(city_data["wind"]['speed'])
        Country.append(city_data['sys']['country'])
        Date.append(city_data['dt'])
        city_responses.append(city_data)
        print(f"Processing Record {x} of set {count}|{city}")
        x+=1
    except: 
        print("City not found. Skipping... ")
        
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
print("Data Retrieval Complete ")
print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

        

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#output_data_file = ".../WeatherPy/cities.csv"
output_data_file = "output_data/cities.csv"


In [None]:
city_weather=pd.DataFrame({
    "City":city_df,
    "Latitude":lat,
    "Longitute": long,
    "Max Temperature": Max_Temp,
    "Humidity": Humidity,
    "Cloudiness": Cloudiness,
    "Wind Speed": Wind_Speed,
    "Country": Country,
    "Date":Date,
})

# Option 2
#   quantile(d$Point, 0.95)
#   95% 
#   5800

#   df %>% filter(Point < quantile(df$Point, 0.95))
#   Group Point
#   1     B  5000
#   2     C  1000
#   3     D   100
#   4     F    70

# ----------------------------------------
# Option 1
# quantile(d$Point, 0.95)
# 95% 
# 5800 

# d[d$Point < quantile(d$Point, 0.95), ]
# Group Point
# 2     B  5000
# 3     C  1000
# 4     D   100
# 5     F    70

# -----------------------------------------
city_weather.head()
                 

##### Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#city_weather.count()

In [None]:
#city_weather=city_weather[city_weather["Humidity"]>=100]
#city_weather.count()

In [None]:
#  Get the indices of cities that have humidity over 100%.

#city_weather["Humidity"].max()

city_weather=city_weather[city_weather["Humidity"] <= 100]
city_weather.describe()

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index. ? 
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

city_weather=city_weather[city_weather["Humidity"] <= 101]

city_weather=pd.DataFrame({
    "City":city_df,
    "Latitude":lat,
    "Longitute": long,
    "Max Temperature": Max_Temp,
    "Humidity": Humidity,
    "Cloudiness": Cloudiness,
    "Wind Speed": Wind_Speed,
    "County": Country,
    "Date":Date,
})



city_weather.head()



In [None]:
# Extract relevant fields from the data frame


# Export the City_Data into a csv
city_weather.to_csv("Clean_City_Data.csv")

# city_weather.to_csv(WeatherPy_file, index_label = "clean_city_data")
#output_data_file = "output_data/cities.csv"
#city_weather.to_csv(output_data_file, index_label = "clean_city_data")

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
import datetime
now = datetime.datetime.now()

latitude = city_weather["Latitude"]
Max_temp = city_weather["Max Temperature"]

plt.scatter(latitude, Max_temp, marker="o", facecolors = "steelblue", edgecolor = 'black')

plt.grid()

#plt.title(f "City Latitude vs Max Temperature {datetime.now().strftime('%m-%d-%Y')}")
#print ("Current date and time : ")
#print (now.strftime("%Y-%m-%d %H:%M:%S"))

plt.title ("City Latitude vs Max Temperature 07/19/2020")
plt.xlabel ("City Latitude")
plt.ylabel ("Max Temperature")
plt.savefig ("Temp_vs_Lat.png")
plt.show()



## Latitude vs. Humidity Plot

In [None]:
latitude = city_weather["Latitude"]
Humidity = city_weather["Humidity"]

plt.scatter(latitude, Humidity, marker="o", facecolors = "steelblue", edgecolor = 'black')

plt.grid()
plt.title ("City Latitude vs Humidity 07/19/2020")
plt.xlabel ("City Latitude")
plt.ylabel ("Humidity")
plt.savefig ("Humidity_vs_Lat.png")
plt.show()


## Latitude vs. Cloudiness Plot

In [None]:
latitude = city_weather["Latitude"]
Cloudiness = city_weather["Cloudiness"]

plt.scatter(latitude, Cloudiness, marker="o", facecolors = "steelblue", edgecolor = 'black')

plt.grid()
plt.title ("City Latitude vs Cloudiness 07/19/2020")
plt.xlabel ("City Latitude")
plt.ylabel ("Cloudiness")
plt.savefig ("Cloudiness_vs_Lat.png")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
latitude = city_weather["Latitude"]
Wind_Speed = city_weather["Wind Speed"]

plt.scatter(latitude, Wind_Speed, marker="o", facecolors = "steelblue", edgecolor = 'black')

plt.grid()
plt.title ("City Latitude vs Wind Speed 07/19/2020")
plt.xlabel ("City Latitude")
plt.ylabel ("Wind Speed")
plt.savefig ("Wind Speed_vs_Lat.png")
plt.show()

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots (this was a nightmare)

In [None]:
# Create Northern and Southern Hemisphere DataFrames
#plt.plot(x, m*x + b)
Northern_df = city_weather[city_weather["Latitude"]>0]
Southern_df = city_weather[city_weather["Latitude"]<0]

In [None]:
def plot_linear_regression(x,y,title_coordinates, text_coordinates):
    
    (slope,intercept,rvalue,pvalue,stderr) = linregress (x,y)
    regress_values = x*slope + intercept
    
    #line_eq= f"y{round(slope,1)}x + {round(intercept,2)}"
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    plt.scatter(x, y, marker="o", facecolors="steelblue", edgecolor = 'black')
    plt.plot (x,regress_values, color = "blueviolet")
    plt.annotate (line_eq, (5,10), color= "blueviolet" ,fontsize = 15)
    plt.xlabel("Latitude")
    plt.ylabel ("Temperature")
    plt.title ("Northern Hemisphere - Max Temp vs. Latitude Linear Regression")
    plt.grid()
    #plt.ylim(25,120)
    #plt.xlim(-5,82.5)
    
    #plt.show()
#Added a grid because it adds a little more razzle dazzle and it keeps the same format from the previous charts
#Basically its there for continueity (spl?) 

    
    print(f"The r-squared IS: {rvalue**2}")
              

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Northern_df["Latitude"], Northern_df["Max Temperature"], "Max Temperature",(10,30))
plt.ylim(25,120)
plt.xlim(-5,82.5)

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Southern_df["Latitude"], Southern_df["Max Temperature"], "Max Temperature", (10,30) )

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Northern_df["Latitude"], Northern_df["Humidity"], "Humidity", (10,30) )

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Southern_df["Latitude"], Southern_df["Humidity"], "Humidity", (10,30) )

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Northern_df["Latitude"], Northern_df["Cloudiness"], "Cloudiness", (10,30) )

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Southern_df["Latitude"], Southern_df["Cloudiness"], "Cloudiness", (10,30) )

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Northern_df["Latitude"], Northern_df["Wind Speed"], "Wind Speed", (10,30) )

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Southern_df["Latitude"], Southern_df["Wind Speed"], "Wind Speed", (10,30) )

In [None]:
#If anyone reads this can you explain why the linear regression equation only shows up on some of the graphs
#and not any of the others?