In [44]:
import pandas as pd
import numpy as np

In [2]:
#read data 

#sheet_url = "../../NESIP_FIELD_DATA/data.xlsx"

#sheet_ea_passed = "2. Energy Access(Passed)"

#ea_passed = pd.read_excel(sheet_url, sheet_ea_passed)


In [75]:
# Read data
sheet_url = "../../NESIP_FIELD_DATA/data.xlsx"

sheet_ea_passed = "2. Energy Access(Passed)"

# Load the sheet into a DataFrame
energy_access_data = pd.read_excel(sheet_url, sheet_name=sheet_ea_passed, dtype=str)


#### 1. Capacity

#### 2. Availability

###### Daytime Availability

In [83]:
# Define mapping dictionary
tier_mapping = {
    "0 hour": "Tier 0",
    "1 hour": "Tier 1",
    "2 hours": "Tier 2",
    "3 hours": "Tier 3",
    "4 hours": "Tier 4 / Tier 5",
    "> 4 hours": "Tier 4 / Tier 5"
}

In [84]:
# Strip spaces and map values
energy_access_data["Daytime Availability"] = energy_access_data["2G. During the day (6 am–6 pm), how many hours of electricity do you get from the primary sources?"].str.strip().map(tier_mapping)

# Fill NaN values with "No Tier"
energy_access_data["Daytime Availability"].fillna("No Tier", inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  energy_access_data["Daytime Availability"].fillna("No Tier", inplace=True)


In [85]:
# Display result
print(energy_access_data[["Daytime Availability"]].head())

  Daytime Availability
0      Tier 4 / Tier 5
1              No Tier
2               Tier 2
3               Tier 2
4               Tier 3


###### Evening Availability

In [78]:
# Define mapping dictionary
tier_mapping = {
    "0 hour": "Tier 0",
    "1 hour": "Tier 1",
    "2 hours": "Tier 2",
    "3 hours": "Tier 3",
    "4 hours": "Tier 4 / Tier 5",
    "> 4 hours": "Tier 4 / Tier 5"
}

In [None]:
# Strip spaces and map values
energy_access_data["Evening Availability"] = energy_access_data["2H. At Night (6 pm–6 am), how many hours of electricity do you get from the primary sources?"].str.strip().map(tier_mapping)

# Fill NaN values with "No Tier"
energy_access_data["Evening Availability"].fillna("No Tier", inplace=True)


In [81]:
# Display result
print(energy_access_data[["Evening Availability"]].head())

  Evening Availability
0      Tier 4 / Tier 5
1              No Tier
2      Tier 4 / Tier 5
3               Tier 2
4               Tier 1


##### DAILY AVAILABILITY

#### 3. Reliability

#### 4. Quality

In [76]:
# Function to determine the tier
def map_quality(row):
    challenges = row["2R. If yes, what are the main challenges you face with your primary electricity source? (Outages, cost, low voltage, etc.)"]
    quality = row["2U. How would you describe the quality of energy supply to your household"]
    
    if pd.isna(challenges) or pd.isna(quality) or (challenges.strip() == "" and quality.strip() == ""):
        return "No Tier"
    
    if "Low Voltage" in challenges:
        if quality == "Good quality of energy supply, Voltage does not affect use of appliances":
            return "Tier 2"

    if quality == "Good quality of energy supply, Voltage does not affect use of appliances":
        return "Tier 3 / Tier 4 / Tier 5"
    
    if quality == "Poor Quality (Damages or cannot operate appliances)":
        return "Tier 0 / Tier 1"

    return "No Tier"



In [77]:
# Apply transformation
energy_access_data["Quality"] = energy_access_data.apply(map_quality, axis=1)

# Display the first few rows to verify
print(energy_access_data[["Quality"]].head())


                    Quality
0  Tier 3 / Tier 4 / Tier 5
1                   No Tier
2           Tier 0 / Tier 1
3           Tier 0 / Tier 1
4                    Tier 2


##### 5. Affordability

In [52]:
# Convert numerical column to numeric type, forcing errors to NaN for empty values
energy_access_data["2P. how much do you pay for the primary source of electricity for the period mentioned earlier? (Exact Amount Specified n ₦)"] = pd.to_numeric(
    energy_access_data["2P. how much do you pay for the primary source of electricity for the period mentioned earlier? (Exact Amount Specified n ₦)"], errors="coerce"
)

# Mapping of frequency to yearly multiplier
multipliers = {
    "Daily": 365,
    "Weekly": 52,
    "Bi-Weekly": 26,
    "Monthly": 12,
    "Quarterly": 4,
    "Yearly": 1
}

# Compute yearly and monthly payments, keeping NaNs
energy_access_data["yearly grid payment"] = energy_access_data["2N. How often do you pay for the primary source of energy?"].map(multipliers) * energy_access_data["2P. how much do you pay for the primary source of electricity for the period mentioned earlier? (Exact Amount Specified n ₦)"]
energy_access_data["monthly grid payment"] = energy_access_data["yearly grid payment"] / 12  # Convert yearly to monthly


In [57]:
energy_access_data["1L(iii). What is your total monthly household income?, Specify in ₦."] = pd.to_numeric(energy_access_data["1L(iii). What is your total monthly household income?, Specify in ₦."], errors="coerce")

energy_access_data["monthly grid payment"].head()

0     8666.666667
1             NaN
2    12000.000000
3     5000.000000
4     2000.000000
Name: monthly grid payment, dtype: float64

In [56]:
# Function to determine "Legality"
def determine_affordability(monthly_pay, monthly_allowance):
    if monthly_pay >= (0.15 * monthly_allowance):
        return "Tier 0 / Tier 1"
    elif monthly_pay < (0.15 * monthly_allowance):
        return "Tier 2 / Tier 3 / Tier 4 / Tier 5"
    else:
        return "No Tier"  # Return blank if none of the conditions match

In [59]:
monthly_grid_spend = "monthly grid payment"
household_income = "1L(iii). What is your total monthly household income?, Specify in ₦."

# Apply function to create "Legality" column
energy_access_data["Affordability"] = energy_access_data.apply(
    lambda row: determine_affordability(row[monthly_grid_spend], row[household_income]), axis=1
)

# Display the first few rows to verify
print(energy_access_data[["Affordability"]].head())

                       Affordability
0  Tier 2 / Tier 3 / Tier 4 / Tier 5
1                            No Tier
2  Tier 2 / Tier 3 / Tier 4 / Tier 5
3  Tier 2 / Tier 3 / Tier 4 / Tier 5
4  Tier 2 / Tier 3 / Tier 4 / Tier 5


#### 5. Legality

In [21]:
# Column names
metered_col = "5D. Are you metered?"
payment_col = "5E. If not metered, How do you pay for electricity?"


In [22]:
# Clean and normalize the data
energy_access_data[metered_col] = energy_access_data[metered_col].astype(str).str.strip().str.lower()
energy_access_data[payment_col] = energy_access_data[payment_col].astype(str).str.strip().str.lower()

In [30]:
# Function to determine "Legality"
def determine_legality(metered, payment):
    if metered in ["yes, with a prepaid meter", "yes, with a postpaid meter"]:
        return "Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5"
    elif payment in [
        "through community billing arrangements (e.g., shared connections in the neighborhood)",
        "through a flat rate or estimated billing system provided by the electricity company"
    ]:
        return "Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5"
    elif payment == "do not pay for electricity":
        return "Tier 0"
    else:
        return "No Tier"  # Return blank if none of the conditions match

In [31]:
# Apply function to create "Legality" column
energy_access_data["Legality"] = energy_access_data.apply(
    lambda row: determine_legality(row[metered_col], row[payment_col]), axis=1
)

# Display the first few rows to verify
print(energy_access_data[["Legality"]].head(20))


                                      Legality
0   Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
1                                      No Tier
2   Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
3   Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
4   Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
5                                      No Tier
6                                      No Tier
7   Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
8   Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
9                                      No Tier
10  Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
11  Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
12                                     No Tier
13  Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
14  Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
15  Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
16                                     No Tier
17  Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
18                                     No Tier
19  Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5


#### 6. Health and Safety

In [32]:
# Column name
col_name = "6B. Have you or anyone in your household experienced an incident caused by an electricity connection (e.g., shock, fire)?"

# Clean and normalize values
energy_access_data[col_name] = (
    energy_access_data[col_name]
    .str.strip()  # Remove leading/trailing spaces
    .str.lower()  # Convert to lowercase for consistency
)

In [33]:
# Mapping function
def map_health_safety(value):
    if value == "true":
        return "Tier 0 / Tier 1 / Tier 2 / Tier 3"
    elif value == "false":
        return "Tier 4 / Tier 5"
    else:
        return "No Tier"  # Keep NaN as an empty string

In [34]:
# Apply transformation
energy_access_data["Health and Safety"] = energy_access_data[col_name].apply(map_health_safety)

# Display the first few rows to verify
print(energy_access_data[["Health and Safety"]].head())


  Health and Safety
0   Tier 4 / Tier 5
1           No Tier
2   Tier 4 / Tier 5
3   Tier 4 / Tier 5
4   Tier 4 / Tier 5


##### Dashboard Analysis - PER HOUSEHOLD

In [35]:
# Create an empty DataFrame
per_household_summary = pd.DataFrame()

##### 1. State and LGA

In [36]:
# Assign specific columns (with their values)
per_household_summary['State'] = energy_access_data['State']
per_household_summary['LGA'] = energy_access_data['LGA']

print(per_household_summary.head())


         State                LGA
0  Cross River  Calabar Municipal
1  Cross River  Calabar Municipal
2        Delta      Ughelli North
3      Bayelsa              Nembe
4  Cross River  Calabar Municipal


##### 2. Health and Safety

In [37]:
# Assign specific columns (with their values)
per_household_summary['Health and Safety'] = energy_access_data['Health and Safety']

print(per_household_summary.head())


         State                LGA Health and Safety
0  Cross River  Calabar Municipal   Tier 4 / Tier 5
1  Cross River  Calabar Municipal           No Tier
2        Delta      Ughelli North   Tier 4 / Tier 5
3      Bayelsa              Nembe   Tier 4 / Tier 5
4  Cross River  Calabar Municipal   Tier 4 / Tier 5


##### 3. Legality

In [38]:
# Assign specific columns (with their values)
per_household_summary['Legality'] = energy_access_data['Legality']

print(per_household_summary['Legality'].head())


0    Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
1                                       No Tier
2    Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
3    Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
4    Tier 1 / Tier 2 / Tier 3 / Tier 4 / Tier 5
Name: Legality, dtype: object


##### 4. Affordability

In [None]:
# Cost of standard consumption package is less than 15% of household income per year

2N. How often do you pay for the primary source of energy?	   
2P. how much do you pay for the primary source of electricity for the period mentioned earlier? (Exact Amount Specified n ₦)