# 6.3 Geographic Visualization
### This script contains the following:
#### 1. Import data and libraries
#### 2. Data wrangling
#### 3. Data cleaning
#### 4. Plotting a choropleth
#### 5. Discuss the results and what they mean in a markdown section.

#### 1. Import data and libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json

In [None]:
# This command propts matplotlib visuals to appear in the notebook 

%matplotlib inline

In [None]:
# Import ".geojson" file for the Countries
Countries_geo = r"C:\Users\acisk\Dropbox\My PC (DESKTOP-SO8P63N)\Desktop\Data Analytics\Achievement 6 Project- Advanced Analytics & Dashboard Design/countries.geojson"

In [None]:
path=r"C:\Users\acisk\Dropbox\My PC (DESKTOP-SO8P63N)\Desktop\Data Analytics\Achievement 6 Project- Advanced Analytics & Dashboard Design"

In [None]:
# That's just in case you want to look at the GEOJSON file contents here too:
f = open(r"C:\Users\acisk\Dropbox\My PC (DESKTOP-SO8P63N)\Desktop\Data Analytics\Achievement 6 Project- Advanced Analytics & Dashboard Design/countries.geojson",)
 
# returns GEOJSON object asa dictionary
data = json.load(f)
  
# Iterating through the geojson list
for i in data['features']:
    print(i)

In [None]:
#Importing players ratings data
fifa_21 = pd.read_csv(os.path.join(path, 'FIFA 21 Ratings Project','Data', 'Prepared Data', 'FIFA21_ratings.csv'), index_col = False)

In [None]:
fifa_21.head()

In [None]:
fifa_21.columns

#### 2. Data Wrangling

In [None]:
# Dropping the unneeded columns
fifa_21_new = fifa_21[['sofifa_id', 'age', 'height_cm', 'weight_kg',
                       'nationality', 'player_rating', 'value_eur', 'wage_eur']]

In [None]:
fifa_21_new.head()

In [None]:
fifa_21_new.dtypes

In [None]:
fifa_21_new['nationality'].value_counts()

#### 3. Data cleaning

In [None]:
# Check for missing values

fifa_21_new.isnull().sum()

In [None]:
# Duplicate check
dups = fifa_21_new.duplicated()

In [None]:
dups.head()

#### Extreme values check

In [None]:
sns.histplot(fifa_21_new['player_rating'], bins=20, kde = True) # shows extreme values for 'player ratings'

#### 4. Plotting a choropleth

#### I created a new column for the count of players in each countries

In [None]:
# Creating a new column for the counts of players of each countries
fifa_21_new['Player_counts'] = fifa_21_new.groupby(['nationality'])['nationality'].transform('count')

In [None]:
fifa_21_new.head()

In [None]:
# Create a data frame with just the nationality and the counts of players

data_to_plot = fifa_21_new[['nationality','Player_counts']]
data_to_plot.head()

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location = [100, 0], zoom_start = 1.5)

# Choropleth maps bind Pandas Data Frames and json geometries.This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data = Countries_geo, 
    data = data_to_plot,
    columns = ['nationality', 'Player_counts'],
    key_on = 'feature.properties.ADMIN', # this part is very important - check your json file to see where the KEY is located
    fill_color = 'YlOrBr', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "Count of players").add_to(map)
folium.LayerControl().add_to(map)

map

In [None]:
map.save('plot_data.html')

### 5. Discuss the results and what they mean in a markdown section.

##### Does the analysis answer any of your existing research questions?
##### Yes, the analysis show me which countries have more or less players 

##### Does the analysis lead you to any new research questions?
#####  I will like to know why players of some nationalities are more 

In [None]:
# Export the data with player count
fifa_21_new.to_csv(os.path.join(path,'FIFA 21 Ratings Project', 'Data','Prepared Data', 'fifa_21_updated.csv'))