In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import time
import scipy.stats as st
from scipy.stats import linregress
import gmaps
import os

# Import API key
from api_key import gkey

In [None]:
csv_path_happiness = "Resources/2017_happiness.csv"
csv_path_libraries = "Resources/lmw_data_09_Feb_2021.csv"
csv_path_population = "Resources/API_SP.POP.TOTL_DS2_en_csv_v2_2017895.csv"
csv_path_capitals = "Resources/concap.csv"

happiness_df = pd.read_csv(csv_path_happiness)
libraries_df = pd.read_csv(csv_path_libraries, encoding='latin-1')
population_df= pd.read_csv(csv_path_population)
capitals_df = pd.read_csv(csv_path_capitals)

In [None]:
# Limit capitals info
capitals_df = capitals_df.loc[:,["CountryName", "CapitalLatitude", "CapitalLongitude"]]

capitals_df

In [None]:
# Limit happiness_df to country and score only
happiness_df = happiness_df.loc[:,["Country", "Happiness.Rank", "Happiness.Score"]]

happiness_df.head()

In [None]:
libraries_df = libraries_df.loc[(libraries_df["Data Year"] == "2017"), :]
libraries_df = libraries_df.dropna()

#libraries_df.drop(libraries_df[libraries_df["Metric Data"]=="NaN"].index, inplace=True)

#library_df = libraries_df.drop(libraries_df[libraries_df["Metric Data"] == "NaN"].index, inplace=False)

#test_df = libraries_df.loc[libraries_df["Metric Data"]=="NaN", :]
libraries_df

#test_df

In [None]:
libraries_df["Metric Data"] = pd.to_numeric(libraries_df["Metric Data"],errors="coerce")

In [None]:
population_df = population_df[["Country Name", "2017"]]
population_df = population_df.rename(columns={"2017":"Population Amount"})
population_df.head()

In [None]:
combined_df = pd.merge(happiness_df, libraries_df, how='left', left_on=["Country"], right_on=["Country or Area"])
combined_df = pd.merge(combined_df, population_df, how='left', left_on=["Country"], right_on=["Country Name"])
combined_df

In [None]:
combined_df = combined_df.dropna()
combined_df.head()

In [None]:
# library_visits_df = libraries_df.loc[libraries_df["Metric"]=="Visits", :]

# #library_visits_df.dropna()
# #library_visits_df["Metric Data (INT)"] = library_visits_df["Metric Data"].astype(int)


# #library_visits_df.head(100)
# type_group_df = library_visits_df.groupby(["Library Type"]).sum()

# type_group_df

In [None]:
# PAM CODE HERE - start

In [None]:
libraries_2017_df = libraries_df.loc[(libraries_df["Data Year"] == "2017"), ["Library Type", "Region","Country or Area", "Metric", "Metric Data"]]
                                
libraries_2017_df = libraries_2017_df.dropna()
                                         
libraries_2017_df.head()

In [None]:
lib_pop_2017_df = pd.merge(libraries_2017_df, population_df, how='left', left_on=["Country or Area"], right_on=["Country Name"])
#lib_pop_2017_df["Metric Per Pop"] = lib_pop_2017_df["Metric Data"]/lib_pop_2017_df["Population Amount"]

lib_pop_2017_df

In [None]:
lib_pop_2017_visits_df = lib_pop_2017_df.loc[lib_pop_2017_df["Metric"]=="Visits", :]

lib_pop_2017_visits_df = lib_pop_2017_visits_df.dropna()

lib_pop_2017_visits_df

In [None]:
visits_by_type_df = lib_pop_2017_visits_df.groupby(["Library Type"]).sum()

visits_by_type_df["Visits Per Capita"] = visits_by_type_df["Metric Data"]/visits_by_type_df["Population Amount"]

visits_by_type_df = visits_by_type_df.reset_index()
visits_by_type_df

In [None]:
labels = visits_by_type_df["Library Type"]

library_types = visits_by_type_df["Visits Per Capita"]

explode = (0, 0, 0.0, 0.0, 0.0, 0.0)

title = f"Library Visits Per Library Type\n"
plt.title(title)
plt.pie(library_types, labels=labels, explode=explode,
        autopct="%1.1f%%", shadow=False, startangle=210)
plt.axis("equal")
fig = plt.gcf()
fig.set_size_inches(8,8)

# Save figure
plt.savefig('Images/VisitsPerLibraryType.png')

plt.show()

In [None]:
lib_pop_2017_amount_df = lib_pop_2017_df.loc[lib_pop_2017_df["Metric"]=="Libraries", :]

lib_pop_2017_amount_df = lib_pop_2017_amount_df.dropna()

lib_pop_2017_amount_df

In [None]:
amount_by_type_df = lib_pop_2017_amount_df.groupby(["Library Type"]).sum()

amount_by_type_df = amount_by_type_df.reset_index()

amount_by_type_df

In [None]:
labels = amount_by_type_df["Library Type"]

library_types = amount_by_type_df["Metric Data"]

explode = (0, 0, 0.0, 0.0, 0.05, 0.0)

title = f"Amount of Libraries Per Library Type\n\n\n"
plt.title(title)
plt.pie(library_types, labels=labels, explode=explode, pctdistance=1.2, labeldistance=1.3,
        autopct="%1.1f%%", shadow=False, startangle=115)
plt.axis("equal")
fig = plt.gcf()
fig.set_size_inches(8,8)

# Save figure
plt.savefig('Images/AmountOfLibrariesPerLibraryType.png')
plt.show()

In [None]:
# PAM CODE HERE - end

In [None]:
# ZOE CODE HERE - start

In [None]:
## Overall happiness index 2017
# Get top 20
happiest_countries = happiness_df.loc[happiness_df["Happiness.Rank"] <= 20]

# set x axis and tick locations
countries = happiest_countries["Country"]
happiness = happiest_countries["Happiness.Score"]

# Create chart
plt.figure(figsize=(15,6))
happy_bar = plt.bar(countries, happiness, align="center")
plt.xticks(rotation="vertical")
plt.title("20 happiest countries by score, 2017")
plt.tight_layout()

# Save figure
plt.savefig('Images/happiest_countries.png')

# Display
plt.show()

In [None]:
## Happiness index for countries which we also have library data for
# Get top 20
happiest_library_countries = combined_df.loc[combined_df["Happiness.Score"] > 6.1]

# set x axis and tick locations
countries = happiest_library_countries["Country"]
happiness = happiest_library_countries["Happiness.Score"]

# Create chart
plt.figure(figsize=(15,6))
plt.bar(countries, happiness, align="center")
plt.xticks(rotation="vertical")
plt.title("20 happiest countries with library usage data by score, 2017")
plt.tight_layout()

# Save figure
plt.savefig('Images/happiest_library_countries.png')

# Display
plt.show()

In [None]:
## Scatter plot of physical visits and happiness
# Drop metric rows for non-visit data
visits_df = combined_df.drop(combined_df[combined_df["Metric"] != "Visits"].index, inplace=False)

# Sum visits for all library types for each country
total_visits = visits_df.groupby("Country")
total_visits_sum = total_visits.sum()

# Get population data
population = total_visits.max()

# Create new df's for plotting and merge
visits_sum_df = pd.DataFrame(total_visits_sum["Metric Data"])
pop_df = pd.DataFrame(population["Population Amount"])

pop_visits_df = pd.merge(visits_sum_df, pop_df, how='left', on=["Country"])

# Calculate per capita and add as column
pop_visits_df["Visits per Capita"] = pop_visits_df["Metric Data"] / pop_visits_df["Population Amount"]

# Pull happiness scores for each country and add as column
pop_visits_df["Happiness Score"] = total_visits["Happiness.Score"].mean()

# Plot
plt.scatter(pop_visits_df["Visits per Capita"], pop_visits_df["Happiness Score"])
plt.xlabel("Number of visits per capita")
plt.ylabel("Happiness Score")
plt.title("Number of library visits vs. Country happiness score")

# Save figure
plt.savefig('Images/physicalvisits_v_happiness.png')

# Display
plt.show()

In [None]:
# Linear regression
x_values_visits = pop_visits_df["Visits per Capita"]
y_values_visits = pop_visits_df["Happiness Score"]

(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values_visits, y_values_visits)
regress_values = x_values_visits * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values_visits,y_values_visits)
plt.plot(x_values_visits,regress_values,"r-")
plt.annotate(line_eq,(2,3.5),fontsize=15,color="red")

plt.xlabel("Number of visits per capita")
plt.ylabel("Happiness Score")
plt.title("Number of library visits vs. Country happiness score")

plt.savefig('Images/physicalvisits_v_happiness_regress.png')

plt.show()

In [None]:
## Scatter plot of loans and happiness
# Drop metric rows for non-physical loan data
loans_df = combined_df.drop(combined_df[combined_df["Metric"] != "Loans_books"].index, inplace=False)

# Sum loans for all library types for each country
total_loans = loans_df.groupby("Country")
total_loans_sum = total_loans.sum()

# Get population data
population = total_loans.max()

# Create new df's for plotting and merge
loans_sum_df = pd.DataFrame(total_loans_sum["Metric Data"])
pop_df = pd.DataFrame(population["Population Amount"])

pop_loans_df = pd.merge(loans_sum_df, pop_df, how='left', on=["Country"])

# Calculate per capita and add as column
pop_loans_df["Loans per Capita"] = pop_loans_df["Metric Data"] / pop_loans_df["Population Amount"]

# Pull happiness scores for each country and add as column
pop_loans_df["Happiness Score"] = total_loans["Happiness.Score"].mean()

# Plot
plt.scatter(pop_loans_df["Loans per Capita"], pop_loans_df["Happiness Score"])
plt.xlabel("Number of physical loans per capita")
plt.ylabel("Happiness Score")
plt.title("Number of physical loans vs. Country happiness score")

# Save figure
plt.savefig('Images/physicalloans_v_happiness.png')

# Display
plt.show()

In [None]:
# Linear regression
x_values_loans = pop_loans_df["Loans per Capita"]
y_values_loans = pop_loans_df["Happiness Score"]

(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values_loans, y_values_loans)
regress_values = x_values_loans * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values_loans,y_values_loans)
plt.plot(x_values_loans,regress_values,"r-")
plt.annotate(line_eq,(100,6),fontsize=15,color="red")

plt.xlabel("Number of physical loans per capita")
plt.ylabel("Happiness Score")
plt.title("Number of physical loans vs. Country happiness score")

plt.savefig('Images/physicalloans_v_happiness_regress.png')

plt.show()

In [None]:
# ZOE CODE HERE - end

In [None]:
# MALLORY CODE HERE - start

In [None]:
number_libraries_df = combined_df.loc[combined_df["Metric"] == "Libraries", :]

In [None]:
number_group = number_libraries_df.groupby(["Country"])
num_lib_df = number_group.sum()
num_pop_df = number_group.max()
num_lib_df = pd.DataFrame(num_lib_df["Metric Data"])
num_pop_df = pd.DataFrame(num_pop_df["Population Amount"])

pop_lib_df = pd.merge(num_lib_df, num_pop_df, how='left', on=["Country"])
pop_lib_df["Libraries Per Capita"] = pop_lib_df["Metric Data"]/pop_lib_df["Population Amount"]
pop_lib_df

In [None]:
x_axis = np.arange(len(pop_lib_df))
tick_locations = [value+0.4 for value in x_axis]

In [None]:
plt.figure(figsize = (20,20))
plt.bar(x_axis,pop_lib_df["Libraries Per Capita"],color='b',alpha=0.5, align='edge')
plt.xticks(tick_locations, num_lib_df.index,rotation="vertical")

plt.xlim(-0.25,len(x_axis))
plt.ylim(0,max(pop_lib_df["Libraries Per Capita"])+.00001)

plt.title("Libraries per Capita")
plt.xlabel("Country")
plt.ylabel("Number of libraries")

plt.savefig("Images/libraries_per_country_bar.png")

plt.tight_layout()
plt.show()

In [None]:
# MALLORY CODE HERE - end

In [None]:
#ELLIE CODE HERE - start

In [None]:
libraries_df["Metric Data"] = pd.to_numeric(libraries_df["Metric Data"],errors="coerce")

In [None]:
#create DF to show only country and number of visits?
visits_df = libraries_df.loc[libraries_df["Metric"]=="Visits", :]
visits_df.dtypes

In [None]:
grouped_visits_df = visits_df.groupby(['Country or Area'])
print(grouped_visits_df)
grouped_visits_df=grouped_visits_df.sum().head(20)

In [None]:
visit_number=grouped_visits_df.groupby(['Country or Area'])
new_visits_df=visit_number.sum()
new_visits_df = pd.DataFrame(grouped_visits_df["Metric Data"])
new_visits_df

In [None]:
x_axis = np.arange(len(new_visits_df))
tick_locations = [value+0.5 for value in x_axis]

In [None]:
# Library Visits per Country - Bar Graph 
plt.figure(figsize = (20,20))

plt.bar(x_axis,new_visits_df["Metric Data"],color='b',alpha=0.5, align='edge')
plt.xticks(tick_locations, new_visits_df.index,rotation="vertical")

plt.xlabel ("Country")
plt.ylabel ("Visits")
plt.title ("Library Visits per Country") 

#plt.xlim([])
#plt.ylim([])

plt.savefig('Images/visits_per_country.png')

plt.tight_layout()
plt.show()

In [None]:
# ELLIE CODE HERE - end

In [None]:
# Heatmaps

In [None]:
# Configure maps
gmaps.configure(api_key=gkey)

# heatmap merge
map_df = pd.merge(happiness_df, capitals_df, how='outer', left_on='Country', right_on='CountryName')

# country, lat, lng, visits, libraries, population, happiness score
#map_df = pd.merge(happiness_df, pop_lib_df, how='outer', on='Country')
#map_df = pd.merge(map_df, new_visits_df, left_on='Country', right_on='Country or Area')
#map_df = pd.merge(map_df, capitals_df, how='outer', left_on='Country', right_on='CountryName')

# Drop NaNs
map_df = map_df.dropna()

map_df

In [None]:
fig1 = gmaps.figure(center=(20,5), zoom_level=2)

# Store lat and lng in list
locations = map_df[["CapitalLatitude","CapitalLongitude"]]

# Store happiness to set weight
happiness = map_df["Happiness.Score"]

# Create heat layer
heat_layer = gmaps.heatmap_layer(locations, weights=happiness, dissipating=False, max_intensity=7.6, point_radius=3.5)


# Add heat layer
fig1.add_layer(heat_layer)
fig1

In [None]:
# Add markers for visits 
visits_df = pd.merge(map_df, new_visits_df, left_on='Country', right_on='Country or Area')

visit_locations = visits_df[["CapitalLatitude","CapitalLongitude"]].astype(float)
visits = visits_df["Metric Data"].tolist()

In [None]:
fig2 = gmaps.figure(center=(20,5), zoom_level=2)

#Create symbol layer
visits_layer = gmaps.symbol_layer(
    visit_locations, fill_color='blue',
    stroke_color='rgba(0, 0, 150, 0)', scale=5,
    info_box_content=[f"Visit count: {visit}" for visit in visits]
)

fig2.add_layer(heat_layer)
fig2.add_layer(visits_layer)

fig2

In [None]:
#Add markers for number of libraries per capita
lib_amt_df = pd.merge(map_df, pop_lib_df, on='Country')

lib_amt_df = lib_amt_df.head(10)

lib_locations = lib_amt_df[["CapitalLatitude","CapitalLongitude"]].astype(float)

lib_amt_df["Lib Per 100000"] = lib_amt_df["Libraries Per Capita"]*100000
lib_amt_df["Lib Per 100000"] = lib_amt_df["Lib Per 100000"].round(2)

lib_amt_df = lib_amt_df.rename(columns={"Happiness.Rank": "HappinessRank", 
                          "Happiness.Score":"HappinessScore"})

lib_amt_df["HappinessRank"] = lib_amt_df["HappinessRank"].astype(int)


In [None]:
info_box_template = """
<dl>
<dt>Country</dt><dd>{Country}</dd>
<dt>Happiness Rank</dt><dd>{HappinessRank}</dd>
<dt>Libraries Per 100000 People</dt><dd>{Lib Per 100000}</dd>
</dl>
"""

#<dt>Happiness Rank</dt><dd>{Happiness.Rank}</dd>
# Store the DataFrame Row
# NOTE: be sure to update with your DataFrame name
library_info = [info_box_template.format(**row) for index, row in lib_amt_df.iterrows()]

library_info

In [None]:
fig3 = gmaps.figure(center=(20,30), zoom_level=2)

#Create symbol layer
libraries_layer = gmaps.marker_layer(
    lib_locations, info_box_content=library_info)

fig3.add_layer(heat_layer)
fig3.add_layer(libraries_layer)

fig3