# Project: Data Visualisation with Plotly

I will be using a dataset form World Bank Data Bank. This [dataset](https://datacatalog.worldbank.org/search/dataset/0041754/Ghana---Electricity-Customers-in-Southern-Ghana) consists of details about electricity customers in Southern Ghana.

I have uploaded this dataset to GitHub from where I will be loading it into a dataframe in this notebook

### Prerequisites

In [None]:
# Import pandas for data manipulation
import pandas as pd

# Import numpy for scientific computations
import numpy as np

# Import plotly library for data visualisation
# ---
# `plotly.express` contains plotly.py's core functionality
# ----
#
import plotly.express as px

### Loading the dataset

In [None]:
# Prepare
dataset_url = "https://raw.githubusercontent.com/wambasisamuel/DE_Week03_Wednesday/main/electricity-customers-southern-ghana.csv"
electricity_df = pd.read_csv(dataset_url)
electricity_df.head()

Unnamed: 0,ID,REGION,LOCATION,METRO/MUNICIPAL/DISTRICT,HV VOLTAGE,NUMBER OF CUSTOMERS,OPERATIONAL AREA
0,1,Greater Accra Region,MAMPONG,Akwapim North,11.0,17522.0,ACCRA EAST
1,2,Greater Accra Region,TESHIEE,Ledzokuku,11.0,28537.0,ACCRA EAST
2,3,Greater Accra Region,KWABENYA,Ga East,11.0,31132.0,ACCRA EAST
3,4,Greater Accra Region,MAKOLA,Accra Metropolitan Assembly,11.0,46943.0,ACCRA EAST
4,5,Greater Accra Region,ROMAN RIDGE,Accra Metropolitan Assembly,11.0,51866.0,Tamale


## Data Exploration

In [None]:
# Rows and columns in the dataset
electricity_df.shape

(72, 7)

In [None]:
# Dataset columns
electricity_df.columns

Index(['ID', 'REGION', 'LOCATION', 'METRO/MUNICIPAL/DISTRICT', 'HV VOLTAGE',
       'NUMBER OF CUSTOMERS', 'OPERATIONAL AREA'],
      dtype='object')

In [None]:
# Change column names to lowercase and replace '/' and spaces with underscores. Also remove leading and trailing whitespaces
electricity_df.columns = electricity_df.columns.str.lower().str.replace(' ','_').str.replace('/','_').str.strip()
electricity_df.columns

Index(['id', 'region', 'location', 'metro_municipal_district', 'hv_voltage',
       'number_of_customers', 'operational_area'],
      dtype='object')

In [None]:
# Data types
electricity_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   id                        72 non-null     int64  
 1   region                    72 non-null     object 
 2   location                  72 non-null     object 
 3   metro_municipal_district  72 non-null     object 
 4   hv_voltage                67 non-null     float64
 5   number_of_customers       62 non-null     float64
 6   operational_area          72 non-null     object 
dtypes: float64(2), int64(1), object(4)
memory usage: 4.1+ KB


In [None]:
# Null Values
#df_null = pd.Dataframe({'Null Values':electricity_df.isna().sum(), 'Percent of Null': (electricity_df.isna().sum()) / (electricity_df.shape[0]) * 100 })
#df_null
electricity_df.isna().sum()

id                           0
region                       0
location                     0
metro_municipal_district     0
hv_voltage                   5
number_of_customers         10
operational_area             0
dtype: int64

In [None]:
# Missing Values
electricity_df.isnull().sum()

id                           0
region                       0
location                     0
metro_municipal_district     0
hv_voltage                   5
number_of_customers         10
operational_area             0
dtype: int64

The dataset has 5 missing values in the HV Voltage and 10 missing values in the Number of Customers column

In [None]:
# Check for duplicates
electricity_df.duplicated().sum()

0

There are no duplicate records in the dataset

In [None]:
# Drop rows with missing values
electricity_df = electricity_df.dropna()
electricity_df.shape

(58, 7)

## Data Visualization

### 1. Pie Chart

#### <font color="green"> Supply Voltage</font>

In [None]:
# Dataframe of customers per region
#supply_df = electricity_df[['hv_voltage']].groupby(['hv_voltage'])['hv_voltage'].count()
supply_df = electricity_df[['hv_voltage']].groupby(['hv_voltage']).agg(
    count_hv = pd.NamedAgg(column="hv_voltage", aggfunc="count")
)
supply_df

Unnamed: 0_level_0,count_hv
hv_voltage,Unnamed: 1_level_1
11.0,37
33.0,21


In [None]:
names = ['11-kV','33-kV']
fig = px.pie(
    supply_df, 
    values  = 'count_hv', 
    names   = names,
    color_discrete_sequence=px.colors.sequential.RdBu,
    hole = 0.5
  )

# Tweak and Label
fig.update_layout(
    title = {
        'text': 'Distribution of Power Supplies', 
        'x': 0.45,
        'xanchor': 'center',
        'yanchor': 'top'}
  )
fig.update_traces(textposition='inside', textinfo='percent+label')

# Display
fig.show()

#### <font color="green"> Customers per region</font>

In [None]:
# Dataframe of customers per region
summary_df = electricity_df[['region','number_of_customers']].groupby(['region']).sum()
summary_df

Unnamed: 0_level_0,number_of_customers
region,Unnamed: 1_level_1
Ashanti Region,325383.0
Central Region,202246.0
Eastern Region,182480.0
Greater Accra Region,858425.0
Volta Region,143946.0
Western Region,101460.0


In [None]:
# Render Pie Chart
names = ['Ashanti Region', 'Central Region', 'Eastern Region', 'Greater Accra Region', 'Volta Region','Western Region']
fig = px.pie(
    summary_df, 
    values  = 'number_of_customers', 
    names   = names,
    color_discrete_sequence=px.colors.sequential.RdBu,
    hole = 0.5
  )

# Tweak and Label
fig.update_layout(
    title = {
        'text': 'Distribution of Customers Per Region', 
        'x': 0.45,
        'xanchor': 'center',
        'yanchor': 'top'}
  )
fig.update_traces(textposition='inside', textinfo='percent+label')

# Display
fig.show()

### Exporting Plotly Visualisations

In [None]:
# install chart studio
!pip install chart_studio

# import chart studio
import chart_studio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting chart_studio
  Downloading chart_studio-1.1.0-py3-none-any.whl (64 kB)
[K     |████████████████████████████████| 64 kB 1.8 MB/s 
Collecting retrying>=1.3.3
  Downloading retrying-1.3.3.tar.gz (10 kB)
Building wheels for collected packages: retrying
  Building wheel for retrying (setup.py) ... [?25l[?25hdone
  Created wheel for retrying: filename=retrying-1.3.3-py3-none-any.whl size=11447 sha256=7fa7cd3ed4b3fb797b0f9ee1ebd499906da83ae7ad434fb4af296d45ded26e7d
  Stored in directory: /root/.cache/pip/wheels/f9/8d/8d/f6af3f7f9eea3553bc2fe6d53e4b287dad18b06a861ac56ddf
Successfully built retrying
Installing collected packages: retrying, chart-studio
Successfully installed chart-studio-1.1.0 retrying-1.3.3


In [None]:
username = 'swambasi' # your username
api_key = 'u0rNKq1hp54uRHy7XwKa' # your api key - go to profile > settings > regenerate key
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

In [None]:
import chart_studio.plotly as py
py.plot(fig, filename = 'Electricity Distribution', auto_open=True)

'https://plotly.com/~swambasi/1/'

In [None]:
import chart_studio.tools as tls
tls.get_embed('https://plotly.com/~swambasi/1/')

'<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plotly.com/~swambasi/1.embed" height="525" width="100%"></iframe>'