In [71]:
import pandas as pd
import os
import ast  # To safely evaluate string as list

folder_path = "_recoveryData-20250216T194447Z-001/_recoveryData"

combined_data = []

for file in os.listdir(folder_path):
    if file.endswith(".txt"):
        file_path = os.path.join(folder_path, file)

        # Open and read the content
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read().strip()

            # Safely convert string to list (like JSON but for Python lists)
            try:
                data_row = ast.literal_eval(content)
                combined_data.append(data_row)
            except Exception as e:
                print(f"Failed to parse {file}: {e}")

# Convert to DataFrame
combined_df = pd.DataFrame(combined_data)

print(combined_df.head())

combined_df.to_csv('combined_data.csv', index=False)

  0         1   2   3   4   5   6                    7            8    \
0   2  b7537bb6                      02/12/2025 09:35:18  Cross River   
1   3  55fe6700                      02/13/2025 19:23:51  Cross River   
2   2  904a7682                      02/14/2025 14:55:35  Cross River   
3   3  f67cfea0                      02/14/2025 15:07:42  Cross River   
4   4  9df886bc                      02/14/2025 15:16:24  Cross River   

                 9    ... 140 141 142  \
0  Calabar Municipal  ...   Y           
1  Calabar Municipal  ...   N   N       
2  Calabar Municipal  ...   N   N       
3  Calabar Municipal  ...   Y   N       
4  Calabar Municipal  ...   Y   N       

                                                 143 144 145          146  \
0  We need improvement in energy supply not limit...          Silverstone   
1                 I need to be connected to the grid          Silverstone   
2                          The light supply is okay.          Silverstone   
3     

In [1]:
import pandas as pd

In [3]:
sheet_ea_day = "0. Energy Access(LP)"
sheet_ea_dump = "1. Energy Access Dump"
sheet_ea_passed = "2. Energy Access(Passed)"
sheet_ea_bad = "3. Energy Access(Bad)"


energy_access_day = pd.read_excel("data/data.xlsx", sheet_ea_day)
ea_dump = pd.read_excel("data/data.xlsx", sheet_ea_dump)
ea_passed = pd.read_excel("data/data.xlsx", sheet_ea_passed)
ea_bad = pd.read_excel("data/data.xlsx", sheet_ea_bad)

In [5]:
# Create Urban_Collected and Rural_Collected based on "Area Description"
ea_passed["Urban_Collected"] = (ea_passed["Area Description"] == "Urban").astype(int)
ea_passed["Rural_Collected"] = (ea_passed["Area Description"] == "Rural").astype(int)

# Group by State and LGA, sum Urban and Rural collected
summary_collected = ea_passed.groupby(["State", "LGA"])[["Urban_Collected", "Rural_Collected"]].sum().reset_index()

In [6]:
summary_collected

Unnamed: 0,State,LGA,Urban_Collected,Rural_Collected
0,Akwa-Ibom,Itu,2,2
1,Bayelsa,Ekeremor,3,6
2,Bayelsa,Kolokuma/Opokuma,0,1
3,Bayelsa,Nembe,1,0
4,Bayelsa,Ogbia,2,7
...,...,...,...,...
66,River's,Eleme,1,0
67,River's,Emohua,6,6
68,River's,Obio-Akpor,8,6
69,River's,Ogba-Egbema-Ndoni,0,6


In [16]:
sampling_sheet = "Sampling Numbers"
sampling_numbers = pd.read_excel("data/data.xlsx", sampling_sheet)

In [17]:
sampling_numbers

Unnamed: 0,State,LGA,Urban_Target,Rural_Target
0,Abia,Aba North,3,8
1,Abia,Aba South,3,8
2,Abia,Arochukwu,3,8
3,Abia,Bende,3,8
4,Abia,Ikwuano,3,8
...,...,...,...,...
752,Zamfara,Maradun,3,7
753,Zamfara,Maru,3,7
754,Zamfara,Shinkafi,3,7
755,Zamfara,Talata-Mafara,3,7


In [18]:
# Outer Join on 'LGA'
merged_collection_summ = pd.merge(sampling_numbers, summary_collected, on=['State', 'LGA'] , how='outer')

In [24]:
merged_collection_summ.fillna(0, inplace=True)


In [25]:
merged_collection_summ[merged_collection_summ['State'] == 'Akwa-Ibom'].head(30)

Unnamed: 0,State,LGA,Urban_Target,Rural_Target,Urban_Collected,Rural_Collected,Urban_Completion,Rural_Completion
38,Akwa-Ibom,Abak,3.0,6.0,0.0,0.0,0.0,0.0
39,Akwa-Ibom,Eastern Obolo,3.0,6.0,0.0,0.0,0.0,0.0
40,Akwa-Ibom,Eket,3.0,6.0,0.0,0.0,0.0,0.0
41,Akwa-Ibom,Esit Eket,3.0,6.0,0.0,0.0,0.0,0.0
42,Akwa-Ibom,Essien Udim,3.0,6.0,0.0,0.0,0.0,0.0
43,Akwa-Ibom,Etim Ekpo,3.0,6.0,0.0,0.0,0.0,0.0
44,Akwa-Ibom,Etinan,3.0,6.0,0.0,0.0,0.0,0.0
45,Akwa-Ibom,Ibeno,3.0,6.0,0.0,0.0,0.0,0.0
46,Akwa-Ibom,Ibesikpo Asutan,3.0,6.0,0.0,0.0,0.0,0.0
47,Akwa-Ibom,Ibiono-Ibom,3.0,6.0,0.0,0.0,0.0,0.0


In [26]:
# Calculate Completion Percentage per LGA
merged_collection_summ["Urban_Completion"] = (merged_collection_summ["Urban_Collected"] / merged_collection_summ["Urban_Target"]) * 100
merged_collection_summ["Rural_Completion"] = (merged_collection_summ["Rural_Collected"] / merged_collection_summ["Rural_Target"]) * 100

# Aggregate to State Level
state_completion = merged_collection_summ.groupby("State").agg(
    Urban_Completion=("Urban_Completion", "mean"),
    Rural_Completion=("Rural_Completion", "mean")
).reset_index()

# Calculate Overall Completion per State
state_completion["Overall_Completion"] = (state_completion["Urban_Completion"] + state_completion["Rural_Completion"]) / 2


In [28]:
state_completion

Unnamed: 0,State,Urban_Completion,Rural_Completion,Overall_Completion
0,Abia,0.0,0.0,0.0
1,Adamawa,0.0,0.0,0.0
2,Akwa-Ibom,2.150538,1.075269,1.612903
3,Anambra,0.0,0.0,0.0
4,Bauchi,0.0,0.0,0.0
5,Bayelsa,70.833333,44.642857,57.738095
6,Benue,0.0,0.0,0.0
7,Borno,0.0,0.0,0.0
8,Cross River,12.962963,7.638889,10.300926
9,Delta,7.2,5.6,6.4


In [31]:

# Calculate the Deficit (Data not yet collected)
merged_collection_summ["Urban_Deficit"] = merged_collection_summ["Urban_Target"] - merged_collection_summ["Urban_Collected"]
merged_collection_summ["Rural_Deficit"] = merged_collection_summ["Rural_Target"] - merged_collection_summ["Rural_Collected"]

# If collected >= target, set deficit to 0 (i.e., already completed)
merged_collection_summ["Urban_Deficit"] = merged_collection_summ["Urban_Deficit"].apply(lambda x: x if x > 0 else 0)
merged_collection_summ["Rural_Deficit"] = merged_collection_summ["Rural_Deficit"].apply(lambda x: x if x > 0 else 0)

# Sum the deficits and targets across all states and LGAs
total_urban_deficit = merged_collection_summ["Urban_Deficit"].sum()
total_rural_deficit = merged_collection_summ["Rural_Deficit"].sum()

total_urban_target = merged_collection_summ["Urban_Target"].sum()
total_rural_target = merged_collection_summ["Rural_Target"].sum()

# Total Deficit (Urban + Rural)
total_deficit = total_urban_deficit + total_rural_deficit

# Total Target (Urban + Rural)
total_target = total_urban_target + total_rural_target

# Overall Percentage Completion
overall_completion = round((1 - (total_deficit / total_target)) * 100,2)

print(f"Total Urban Deficit: {total_urban_deficit}")
print(f"Total Rural Deficit: {total_rural_deficit}")
print(f"Overall Deficit: {total_deficit}")
print(f"Total Target: {total_target}")
print(f"Overall Project Completion: {overall_completion:.2f}%")


Total Urban Deficit: 3370.0
Total Rural Deficit: 3449.0
Overall Deficit: 6819.0
Total Target: 7405.0
Overall Project Completion: 7.91%


In [29]:
import pandas as pd

# Sample collected data from the previous step
data_collected = {
    "State": [
        "Cross River", "Cross River", "Delta", "Bayelsa", "River's", "Lagos", "Ondo", "Ekiti"
    ],
    "LGA": [
        "Calabar Municipal", "Calabar South", "Ughelli North", "Yenagoa", "Obio-Akpor", "Ifako-Ijaiye", "Akure South", "Ikere"
    ],
    "Urban_Target": [3, 3, 3, 3, 3, 3, 3, 3],
    "Rural_Target": [8, 8, 8, 8, 8, 8, 8, 8],
    "Urban_Collected": [3, 0, 1, 4, 2, 1, 1, 5],
    "Rural_Collected": [1, 1, 0, 0, 0, 0, 0, 1],
}

merged_collection_summ = pd.DataFrame(data_collected)

# Calculate the Deficit (Data not yet collected)
merged_collection_summ["Urban_Deficit"] = merged_collection_summ["Urban_Target"] - merged_collection_summ["Urban_Collected"]
merged_collection_summ["Rural_Deficit"] = merged_collection_summ["Rural_Target"] - merged_collection_summ["Rural_Collected"]

# If collected >= target, set deficit to 0 (i.e., already completed)
merged_collection_summ["Urban_Deficit"] = merged_collection_summ["Urban_Deficit"].apply(lambda x: x if x > 0 else 0)
merged_collection_summ["Rural_Deficit"] = merged_collection_summ["Rural_Deficit"].apply(lambda x: x if x > 0 else 0)

# Sum the deficits and targets across all states and LGAs
total_urban_deficit = merged_collection_summ["Urban_Deficit"].sum()
total_rural_deficit = merged_collection_summ["Rural_Deficit"].sum()

total_urban_target = merged_collection_summ["Urban_Target"].sum()
total_rural_target = merged_collection_summ["Rural_Target"].sum()

# Total Deficit (Urban + Rural)
total_deficit = total_urban_deficit + total_rural_deficit

# Total Target (Urban + Rural)
total_target = total_urban_target + total_rural_target

# Overall Percentage Completion
overall_completion = (1 - (total_deficit / total_target)) * 100

print(f"Total Urban Deficit: {total_urban_deficit}")
print(f"Total Rural Deficit: {total_rural_deficit}")
print(f"Overall Deficit: {total_deficit}")
print(f"Total Target: {total_target}")
print(f"Overall Project Completion: {overall_completion:.2f}%")


Total Urban Deficit: 10
Total Rural Deficit: 61
Overall Deficit: 71
Total Target: 88
Overall Project Completion: 19.32%


In [33]:
state_completion[state_completion["Overall_Completion"] >= 100]

Unnamed: 0,State,Urban_Completion,Rural_Completion,Overall_Completion
30,Oyo,inf,inf,inf


In [36]:
import pandas as pd

# Sample Data
data = {
    "Enumerator Name": [
        "Alex Esuong", "Alex Esuong", "Ello Utoro", "Faith Roland", "EDIDIONG OKON", "EDIDIONG OKON",
        "Ubong Godwin Aaron", "Tariere Yerikema", "Tariere Yerikema", "Faith Roland", "Kalada Pascal",
        "Kurofuo Zilayefa Dorothy", "Kalada Pascal", "Efe Mitchell"
    ],
    "vista_remark": [
        "Good", "Good", "Good", "Good", "Good", "Good", "Good", "Good", "Good", "Good", "Good", "Bad", "Bad", "Good"
    ]
}

df = pd.DataFrame(data)

# Create Summary Table
summary = df.pivot_table(
    index="Enumerator Name",
    columns="vista_remark",
    aggfunc="size",
    fill_value=0
).reset_index()

# Ensure "Good" and "Bad" columns are present even if some are missing
if "Good" not in summary.columns:
    summary["Good"] = 0
if "Bad" not in summary.columns:
    summary["Bad"] = 0

# Reorder columns for readability
summary = summary[['Enumerator Name', 'Good', 'Bad']]

print(summary)


vista_remark           Enumerator Name  Good  Bad
0                          Alex Esuong     2    0
1                        EDIDIONG OKON     2    0
2                         Efe Mitchell     1    0
3                           Ello Utoro     1    0
4                         Faith Roland     2    0
5                        Kalada Pascal     1    1
6             Kurofuo Zilayefa Dorothy     0    1
7                     Tariere Yerikema     2    0
8                   Ubong Godwin Aaron     1    0


In [37]:
summary[['Enumerator Name', 'Good', 'Bad']]

vista_remark,Enumerator Name,Good,Bad
0,Alex Esuong,2,0
1,EDIDIONG OKON,2,0
2,Efe Mitchell,1,0
3,Ello Utoro,1,0
4,Faith Roland,2,0
5,Kalada Pascal,1,1
6,Kurofuo Zilayefa Dorothy,0,1
7,Tariere Yerikema,2,0
8,Ubong Godwin Aaron,1,0


In [43]:
urban_target = sampling_numbers['Urban_Target'].sum()
rural_target = sampling_numbers['Rural_Target'].sum()

total_target = urban_target + rural_target 

In [44]:
total_target

7405

In [45]:
ea_passed.shape[0]

620

In [46]:
pd.to_datetime(ea_passed['Timestamp'], format='%m/%d/%Y %H:%M:%S')

0     2025-02-12 15:55:29
1     2025-02-12 16:35:33
2     2025-02-12 16:12:54
3     2025-02-12 16:51:19
4     2025-02-12 17:34:36
              ...        
615   2025-02-15 14:42:14
616   2025-02-15 16:05:10
617   2025-02-15 17:03:29
618   2025-02-15 17:38:23
619   2025-02-15 12:52:47
Name: Timestamp, Length: 620, dtype: datetime64[ns]

In [47]:
ea_passed['Timestamp'].dt.date

0      2025-02-12
1      2025-02-12
2      2025-02-12
3      2025-02-12
4      2025-02-12
          ...    
615    2025-02-15
616    2025-02-15
617    2025-02-15
618    2025-02-15
619    2025-02-15
Name: Timestamp, Length: 620, dtype: object

In [49]:
daily_collection_combined = ea_passed.groupby('date').size()

In [50]:
daily_avg_combined = daily_collection_combined.mean()

In [51]:
daily_avg_combined

124.0

In [52]:
# Remaining surveys to be collected
remaining_total = max(total_target - current_total, 0)

In [53]:
remaining_total

6785

In [54]:
days_to_complete = remaining_total / daily_avg_combined if daily_avg_combined > 0 else float('inf')

In [55]:
days_to_complete

54.71774193548387

In [56]:
today = datetime.date.today()
completion_date = today + datetime.timedelta(days=round(days_to_complete))

In [57]:
completion_date

datetime.date(2025, 4, 12)

In [41]:
import pandas as pd
import datetime

# Assuming ea_passed is your DataFrame
ea_passed['Timestamp'] = pd.to_datetime(ea_passed['Timestamp'], format='%m/%d/%Y %H:%M:%S')
ea_passed['date'] = ea_passed['Timestamp'].dt.date

# Daily collection count
daily_collection_combined = ea_passed.groupby('date').size()

# Average daily collection
daily_avg_combined = daily_collection_combined.mean()

# Current total
current_total = ea_passed.shape[0]

# Your project target (set this accordingly)
total_target = 15000

# Remaining surveys to be collected
remaining_total = max(total_target - current_total, 0)

# Calculate estimated completion time
days_to_complete = remaining_total / daily_avg_combined if daily_avg_combined > 0 else float('inf')

# Estimated completion date
today = datetime.date.today()
completion_date = today + datetime.timedelta(days=round(days_to_complete))

print(f"Estimated Completion Date: {completion_date.strftime('%Y-%m-%d')}")


Estimated Completion Date: 2025-06-12


In [58]:
# Total Target (Urban + Rural)
total_target = total_urban_target + total_rural_target

# Overall Percentage Completion
overall_completion = round((1 - (total_deficit / total_target)) * 100,2)
perc_deficit =  round((100  - overall_completion), 2)

############################################################################
# completion date
urban_target = sampling_numbers['Urban_Target'].sum()
rural_target = sampling_numbers['Rural_Target'].sum()

total_target = urban_target + rural_target 

current_total  = ea_passed.shape[0]
# Assuming ea_passed is your DataFrame
ea_passed['Timestamp'] = pd.to_datetime(ea_passed['Timestamp'], format='%m/%d/%Y %H:%M:%S')
ea_passed['date'] = ea_passed['Timestamp'].dt.date

# Daily collection count
daily_collection_combined = ea_passed.groupby('date').size()

# Average daily collection
daily_avg_combined = daily_collection_combined.mean()

# Exclude the latest date
latest_date = daily_collection_combined.index.max()
daily_collection_excluding_latest = daily_collection_combined[daily_collection_combined.index != latest_date]

# Average daily collection (excluding the latest date)
prev_daily_avg_combined = daily_collection_excluding_latest.mean()

perc_inc_dec_avg_col = round((daily_avg_combined - prev_daily_avg_combined)/prev_daily_avg_combined * 100, 0)

# Remaining surveys to be collected
remaining_total = max(total_target - current_total, 0)

# Calculate estimated completion time
days_to_complete = remaining_total / daily_avg_combined  # if daily_avg_combined > 0 else float('inf')

# Estimated completion date
today = datetime.date.today()
completion_date = today + datetime.timedelta(days=round(days_to_complete))

completion_date_text = print(f"Estimated Completion Date: {completion_date.strftime('%Y-%m-%d')}")


Estimated Completion Date: 2025-04-12


In [59]:
completion_date

datetime.date(2025, 4, 12)

In [60]:
completion_date_text = completion_date.strftime('%Y-%m-%d')

In [61]:
completion_date_text

'2025-04-12'