# Analysis of Total Eclipse Sightings across Various States in the United States.

## 
In this analysis, we will meticulously refine the original dataset acquired for this project, leveraging it to unveil compelling trends which we will subsequently visualize in our HTML file. Our objective is todeterminen the average duration of the Total Eclipse per state within its path. Furthermore, we aim to compute the percentage of states in the US that witnessed the Total Eclipse in its entirety compared to those that experienced it only partially. Lastly, we will employ HTML, CSS, and Javascript to map the eclipse's path along with its duration timings, providing an immersive visual representation of these sightings trends.

# 

# 1. Cleanup of our Data

In [1]:
import pandas as pd
from pathlib import Path
import csv
import json

In [None]:
# Import Resource 1
# cvs_file = Path("Resources/Eclipse_Cities_Totality_duration.csv")

In [2]:
# Import Resource 2 (Original Data)
cvs_file2 = Path("static/Eclipse_Cities_Totality.csv")

In [None]:
# Read our data file with the Pandas library
# Not every CSV requires an encoding, but be aware this can come up
# csv_file_df = pd.read_csv(cvs_file, encoding="ISO-8859-1")

In [3]:
# Read our data file with the Pandas library
# Not every CSV requires an encoding, but be aware this can come up
csv_file2_df = pd.read_csv(cvs_file2, encoding="ISO-8859-1")

In [None]:
# Show the first five rows of Resource 1
# csv_file_df.head()

In [4]:
# Show the first five rows of Resource 2
csv_file2_df.head()

Unnamed: 0,ï»¿X,Y,STATE,NAME,LAT,LON,Start_Partial,Start_Total,End_Total,End_Partial,Midpoint_Totality,ObjectId,TestTime
0,-94.200112,34.638786,AR,Acorn,34.638786,-94.200112,2024/02/18 17:30:40+00,2024/02/18 18:47:35+00,2024/02/18 18:51:37+00,2024/02/18 20:08:30+00,2024/02/18 18:49:36+00,1,12:30:40
1,-91.826194,35.113627,AR,McRae,35.113627,-91.826194,2024/02/18 17:35:10+00,2024/02/18 18:52:42+00,2024/02/18 18:55:26+00,2024/02/18 20:12:30+00,2024/02/18 18:54:04+00,2,12:35:10
2,-92.899132,35.03993,AR,Adona,35.03993,-92.899132,2024/02/18 17:33:20+00,2024/02/18 18:50:08+00,2024/02/18 18:54:22+00,2024/02/18 20:10:50+00,2024/02/18 18:52:15+00,3,12:33:20
3,-92.451217,34.618588,AR,Alexander,34.618588,-92.451217,2024/02/18 17:33:20+00,2024/02/18 18:51:09+00,2024/02/18 18:53:38+00,2024/02/18 20:11:10+00,2024/02/18 18:52:24+00,4,12:33:20
4,-91.08345,35.892907,AR,Alicia,35.892907,-91.08345,2024/02/18 17:37:30+00,2024/02/18 18:54:29+00,2024/02/18 18:58:05+00,2024/02/18 20:14:10+00,2024/02/18 18:56:17+00,5,12:37:30


In [5]:
# Convert the 'Start_Total' and 'End_Total' columns to datetime with the appropriate format
csv_file2_df['Start_Total'] = pd.to_datetime(csv_file2_df['Start_Total'], format='%Y/%m/%d %H:%M:%S%z')
csv_file2_df['End_Total'] = pd.to_datetime(csv_file2_df['End_Total'], format='%Y/%m/%d %H:%M:%S%z')

# Calculate the duration by subtracting 'Start_Total' from 'End_Total' and store it in a new column 'Total_Duration'
csv_file2_df['Total_Duration'] = csv_file2_df['End_Total'] - csv_file2_df['Start_Total']

# Show the DataFrame to see the results
csv_file2_df.head()

Unnamed: 0,ï»¿X,Y,STATE,NAME,LAT,LON,Start_Partial,Start_Total,End_Total,End_Partial,Midpoint_Totality,ObjectId,TestTime,Total_Duration
0,-94.200112,34.638786,AR,Acorn,34.638786,-94.200112,2024/02/18 17:30:40+00,2024-02-18 18:47:35+00:00,2024-02-18 18:51:37+00:00,2024/02/18 20:08:30+00,2024/02/18 18:49:36+00,1,12:30:40,0 days 00:04:02
1,-91.826194,35.113627,AR,McRae,35.113627,-91.826194,2024/02/18 17:35:10+00,2024-02-18 18:52:42+00:00,2024-02-18 18:55:26+00:00,2024/02/18 20:12:30+00,2024/02/18 18:54:04+00,2,12:35:10,0 days 00:02:44
2,-92.899132,35.03993,AR,Adona,35.03993,-92.899132,2024/02/18 17:33:20+00,2024-02-18 18:50:08+00:00,2024-02-18 18:54:22+00:00,2024/02/18 20:10:50+00,2024/02/18 18:52:15+00,3,12:33:20,0 days 00:04:14
3,-92.451217,34.618588,AR,Alexander,34.618588,-92.451217,2024/02/18 17:33:20+00,2024-02-18 18:51:09+00:00,2024-02-18 18:53:38+00:00,2024/02/18 20:11:10+00,2024/02/18 18:52:24+00,4,12:33:20,0 days 00:02:29
4,-91.08345,35.892907,AR,Alicia,35.892907,-91.08345,2024/02/18 17:37:30+00,2024-02-18 18:54:29+00:00,2024-02-18 18:58:05+00:00,2024/02/18 20:14:10+00,2024/02/18 18:56:17+00,5,12:37:30,0 days 00:03:36


In [6]:
# Convert the Total_Duration column to a custom string format of "mm:ss"
csv_file2_df['Total_Duration'] = csv_file2_df['Total_Duration'].dt.components.minutes.map("{:02d}".format) + ":" + csv_file2_df['Total_Duration'].dt.components.seconds.map("{:02d}".format)

# Show the DataFrame to see the results
csv_file2_df

Unnamed: 0,ï»¿X,Y,STATE,NAME,LAT,LON,Start_Partial,Start_Total,End_Total,End_Partial,Midpoint_Totality,ObjectId,TestTime,Total_Duration
0,-94.200112,34.638786,AR,Acorn,34.638786,-94.200112,2024/02/18 17:30:40+00,2024-02-18 18:47:35+00:00,2024-02-18 18:51:37+00:00,2024/02/18 20:08:30+00,2024/02/18 18:49:36+00,1,12:30:40,04:02
1,-91.826194,35.113627,AR,McRae,35.113627,-91.826194,2024/02/18 17:35:10+00,2024-02-18 18:52:42+00:00,2024-02-18 18:55:26+00:00,2024/02/18 20:12:30+00,2024/02/18 18:54:04+00,2,12:35:10,02:44
2,-92.899132,35.039930,AR,Adona,35.039930,-92.899132,2024/02/18 17:33:20+00,2024-02-18 18:50:08+00:00,2024-02-18 18:54:22+00:00,2024/02/18 20:10:50+00,2024/02/18 18:52:15+00,3,12:33:20,04:14
3,-92.451217,34.618588,AR,Alexander,34.618588,-92.451217,2024/02/18 17:33:20+00,2024-02-18 18:51:09+00:00,2024-02-18 18:53:38+00:00,2024/02/18 20:11:10+00,2024/02/18 18:52:24+00,4,12:33:20,02:29
4,-91.083450,35.892907,AR,Alicia,35.892907,-91.083450,2024/02/18 17:37:30+00,2024-02-18 18:54:29+00:00,2024-02-18 18:58:05+00:00,2024/02/18 20:14:10+00,2024/02/18 18:56:17+00,5,12:37:30,03:36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3325,-73.257254,44.307338,VT,West Charlotte,44.307338,-73.257254,2024/02/18 18:14:20+00,2024-02-18 19:26:13+00:00,2024-02-18 19:29:08+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:40+00,3326,07:14:20,02:55
3326,-73.015994,44.603324,VT,Westford,44.603324,-73.015994,2024/02/18 18:14:50+00,2024-02-18 19:26:18+00:00,2024-02-18 19:29:40+00:00,2024/02/18 20:37:30+00,2024/02/18 19:27:59+00,3327,07:14:50,03:22
3327,-73.184857,44.495587,VT,Winooski,44.495587,-73.184857,2024/02/18 18:14:30+00,2024-02-18 19:26:08+00:00,2024-02-18 19:29:25+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:46+00,3328,07:14:30,03:17
3328,-72.470074,44.551809,VT,Wolcott,44.551809,-72.470074,2024/02/18 18:15:30+00,2024-02-18 19:27:12+00:00,2024-02-18 19:30:05+00:00,2024/02/18 20:37:50+00,2024/02/18 19:28:38+00,3329,07:15:30,02:53


In [7]:
# Rename the column with gibberish characters
csv_file2_df = csv_file2_df.rename(columns={'ï»¿X': 'X'})

# Show the DataFrame to see the results
csv_file2_df

Unnamed: 0,X,Y,STATE,NAME,LAT,LON,Start_Partial,Start_Total,End_Total,End_Partial,Midpoint_Totality,ObjectId,TestTime,Total_Duration
0,-94.200112,34.638786,AR,Acorn,34.638786,-94.200112,2024/02/18 17:30:40+00,2024-02-18 18:47:35+00:00,2024-02-18 18:51:37+00:00,2024/02/18 20:08:30+00,2024/02/18 18:49:36+00,1,12:30:40,04:02
1,-91.826194,35.113627,AR,McRae,35.113627,-91.826194,2024/02/18 17:35:10+00,2024-02-18 18:52:42+00:00,2024-02-18 18:55:26+00:00,2024/02/18 20:12:30+00,2024/02/18 18:54:04+00,2,12:35:10,02:44
2,-92.899132,35.039930,AR,Adona,35.039930,-92.899132,2024/02/18 17:33:20+00,2024-02-18 18:50:08+00:00,2024-02-18 18:54:22+00:00,2024/02/18 20:10:50+00,2024/02/18 18:52:15+00,3,12:33:20,04:14
3,-92.451217,34.618588,AR,Alexander,34.618588,-92.451217,2024/02/18 17:33:20+00,2024-02-18 18:51:09+00:00,2024-02-18 18:53:38+00:00,2024/02/18 20:11:10+00,2024/02/18 18:52:24+00,4,12:33:20,02:29
4,-91.083450,35.892907,AR,Alicia,35.892907,-91.083450,2024/02/18 17:37:30+00,2024-02-18 18:54:29+00:00,2024-02-18 18:58:05+00:00,2024/02/18 20:14:10+00,2024/02/18 18:56:17+00,5,12:37:30,03:36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3325,-73.257254,44.307338,VT,West Charlotte,44.307338,-73.257254,2024/02/18 18:14:20+00,2024-02-18 19:26:13+00:00,2024-02-18 19:29:08+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:40+00,3326,07:14:20,02:55
3326,-73.015994,44.603324,VT,Westford,44.603324,-73.015994,2024/02/18 18:14:50+00,2024-02-18 19:26:18+00:00,2024-02-18 19:29:40+00:00,2024/02/18 20:37:30+00,2024/02/18 19:27:59+00,3327,07:14:50,03:22
3327,-73.184857,44.495587,VT,Winooski,44.495587,-73.184857,2024/02/18 18:14:30+00,2024-02-18 19:26:08+00:00,2024-02-18 19:29:25+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:46+00,3328,07:14:30,03:17
3328,-72.470074,44.551809,VT,Wolcott,44.551809,-72.470074,2024/02/18 18:15:30+00,2024-02-18 19:27:12+00:00,2024-02-18 19:30:05+00:00,2024/02/18 20:37:50+00,2024/02/18 19:28:38+00,3329,07:15:30,02:53


In [8]:
# Drop the columns X and Y, because they are already in the Lat and Lon columns
csv_file2_df = csv_file2_df.drop(columns=['X', 'Y'])

# Show the DataFrame to see the results
csv_file2_df


Unnamed: 0,STATE,NAME,LAT,LON,Start_Partial,Start_Total,End_Total,End_Partial,Midpoint_Totality,ObjectId,TestTime,Total_Duration
0,AR,Acorn,34.638786,-94.200112,2024/02/18 17:30:40+00,2024-02-18 18:47:35+00:00,2024-02-18 18:51:37+00:00,2024/02/18 20:08:30+00,2024/02/18 18:49:36+00,1,12:30:40,04:02
1,AR,McRae,35.113627,-91.826194,2024/02/18 17:35:10+00,2024-02-18 18:52:42+00:00,2024-02-18 18:55:26+00:00,2024/02/18 20:12:30+00,2024/02/18 18:54:04+00,2,12:35:10,02:44
2,AR,Adona,35.039930,-92.899132,2024/02/18 17:33:20+00,2024-02-18 18:50:08+00:00,2024-02-18 18:54:22+00:00,2024/02/18 20:10:50+00,2024/02/18 18:52:15+00,3,12:33:20,04:14
3,AR,Alexander,34.618588,-92.451217,2024/02/18 17:33:20+00,2024-02-18 18:51:09+00:00,2024-02-18 18:53:38+00:00,2024/02/18 20:11:10+00,2024/02/18 18:52:24+00,4,12:33:20,02:29
4,AR,Alicia,35.892907,-91.083450,2024/02/18 17:37:30+00,2024-02-18 18:54:29+00:00,2024-02-18 18:58:05+00:00,2024/02/18 20:14:10+00,2024/02/18 18:56:17+00,5,12:37:30,03:36
...,...,...,...,...,...,...,...,...,...,...,...,...
3325,VT,West Charlotte,44.307338,-73.257254,2024/02/18 18:14:20+00,2024-02-18 19:26:13+00:00,2024-02-18 19:29:08+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:40+00,3326,07:14:20,02:55
3326,VT,Westford,44.603324,-73.015994,2024/02/18 18:14:50+00,2024-02-18 19:26:18+00:00,2024-02-18 19:29:40+00:00,2024/02/18 20:37:30+00,2024/02/18 19:27:59+00,3327,07:14:50,03:22
3327,VT,Winooski,44.495587,-73.184857,2024/02/18 18:14:30+00,2024-02-18 19:26:08+00:00,2024-02-18 19:29:25+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:46+00,3328,07:14:30,03:17
3328,VT,Wolcott,44.551809,-72.470074,2024/02/18 18:15:30+00,2024-02-18 19:27:12+00:00,2024-02-18 19:30:05+00:00,2024/02/18 20:37:50+00,2024/02/18 19:28:38+00,3329,07:15:30,02:53


In [9]:
# Make a copy of the DataFrame
cleaned_ttl_duration = csv_file2_df.copy()

# Show the DataFrame to see the results
cleaned_ttl_duration

Unnamed: 0,STATE,NAME,LAT,LON,Start_Partial,Start_Total,End_Total,End_Partial,Midpoint_Totality,ObjectId,TestTime,Total_Duration
0,AR,Acorn,34.638786,-94.200112,2024/02/18 17:30:40+00,2024-02-18 18:47:35+00:00,2024-02-18 18:51:37+00:00,2024/02/18 20:08:30+00,2024/02/18 18:49:36+00,1,12:30:40,04:02
1,AR,McRae,35.113627,-91.826194,2024/02/18 17:35:10+00,2024-02-18 18:52:42+00:00,2024-02-18 18:55:26+00:00,2024/02/18 20:12:30+00,2024/02/18 18:54:04+00,2,12:35:10,02:44
2,AR,Adona,35.039930,-92.899132,2024/02/18 17:33:20+00,2024-02-18 18:50:08+00:00,2024-02-18 18:54:22+00:00,2024/02/18 20:10:50+00,2024/02/18 18:52:15+00,3,12:33:20,04:14
3,AR,Alexander,34.618588,-92.451217,2024/02/18 17:33:20+00,2024-02-18 18:51:09+00:00,2024-02-18 18:53:38+00:00,2024/02/18 20:11:10+00,2024/02/18 18:52:24+00,4,12:33:20,02:29
4,AR,Alicia,35.892907,-91.083450,2024/02/18 17:37:30+00,2024-02-18 18:54:29+00:00,2024-02-18 18:58:05+00:00,2024/02/18 20:14:10+00,2024/02/18 18:56:17+00,5,12:37:30,03:36
...,...,...,...,...,...,...,...,...,...,...,...,...
3325,VT,West Charlotte,44.307338,-73.257254,2024/02/18 18:14:20+00,2024-02-18 19:26:13+00:00,2024-02-18 19:29:08+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:40+00,3326,07:14:20,02:55
3326,VT,Westford,44.603324,-73.015994,2024/02/18 18:14:50+00,2024-02-18 19:26:18+00:00,2024-02-18 19:29:40+00:00,2024/02/18 20:37:30+00,2024/02/18 19:27:59+00,3327,07:14:50,03:22
3327,VT,Winooski,44.495587,-73.184857,2024/02/18 18:14:30+00,2024-02-18 19:26:08+00:00,2024-02-18 19:29:25+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:46+00,3328,07:14:30,03:17
3328,VT,Wolcott,44.551809,-72.470074,2024/02/18 18:15:30+00,2024-02-18 19:27:12+00:00,2024-02-18 19:30:05+00:00,2024/02/18 20:37:50+00,2024/02/18 19:28:38+00,3329,07:15:30,02:53


In [10]:
# Convert the dataframe into a new CSV file to use if needed
csv_file = "Resources/cleaned_TotalE_Duration.csv"

# Save the DataFrame to a CSV file
csv_file2_df.to_csv(csv_file, index=False)

print("DataFrame saved as CSV file:", csv_file)

DataFrame saved as CSV file: Resources/cleaned_TotalE_Duration.csv


In [11]:
# Convert the DataFrame to a JSON object
json_data = cleaned_ttl_duration.to_json(orient='records')

# Print the JSON object
print(json_data)

[{"STATE":"AR","NAME":"Acorn","LAT":34.638786,"LON":-94.200112,"Start_Partial":"2024\/02\/18 17:30:40+00","Start_Total":1708282055000,"End_Total":1708282297000,"End_Partial":"2024\/02\/18 20:08:30+00","Midpoint_Totality":"2024\/02\/18 18:49:36+00","ObjectId":1,"TestTime":"12:30:40","Total_Duration":"04:02"},{"STATE":"AR","NAME":"McRae","LAT":35.113627,"LON":-91.826194,"Start_Partial":"2024\/02\/18 17:35:10+00","Start_Total":1708282362000,"End_Total":1708282526000,"End_Partial":"2024\/02\/18 20:12:30+00","Midpoint_Totality":"2024\/02\/18 18:54:04+00","ObjectId":2,"TestTime":"12:35:10","Total_Duration":"02:44"},{"STATE":"AR","NAME":"Adona","LAT":35.03993,"LON":-92.899132,"Start_Partial":"2024\/02\/18 17:33:20+00","Start_Total":1708282208000,"End_Total":1708282462000,"End_Partial":"2024\/02\/18 20:10:50+00","Midpoint_Totality":"2024\/02\/18 18:52:15+00","ObjectId":3,"TestTime":"12:33:20","Total_Duration":"04:14"},{"STATE":"AR","NAME":"Alexander","LAT":34.618588,"LON":-92.451217,"Start_Par

In [12]:
# Pretty-Print the json_data (Option 1)
# Convert the JSON object to a formatted string 
pretty_json = json.dumps(json_data, indent=4)

# Print the pretty-printed JSON
print(pretty_json)

"[{\"STATE\":\"AR\",\"NAME\":\"Acorn\",\"LAT\":34.638786,\"LON\":-94.200112,\"Start_Partial\":\"2024\\/02\\/18 17:30:40+00\",\"Start_Total\":1708282055000,\"End_Total\":1708282297000,\"End_Partial\":\"2024\\/02\\/18 20:08:30+00\",\"Midpoint_Totality\":\"2024\\/02\\/18 18:49:36+00\",\"ObjectId\":1,\"TestTime\":\"12:30:40\",\"Total_Duration\":\"04:02\"},{\"STATE\":\"AR\",\"NAME\":\"McRae\",\"LAT\":35.113627,\"LON\":-91.826194,\"Start_Partial\":\"2024\\/02\\/18 17:35:10+00\",\"Start_Total\":1708282362000,\"End_Total\":1708282526000,\"End_Partial\":\"2024\\/02\\/18 20:12:30+00\",\"Midpoint_Totality\":\"2024\\/02\\/18 18:54:04+00\",\"ObjectId\":2,\"TestTime\":\"12:35:10\",\"Total_Duration\":\"02:44\"},{\"STATE\":\"AR\",\"NAME\":\"Adona\",\"LAT\":35.03993,\"LON\":-92.899132,\"Start_Partial\":\"2024\\/02\\/18 17:33:20+00\",\"Start_Total\":1708282208000,\"End_Total\":1708282462000,\"End_Partial\":\"2024\\/02\\/18 20:10:50+00\",\"Midpoint_Totality\":\"2024\\/02\\/18 18:52:15+00\",\"ObjectId\":3

In [13]:
# Pretty-Print the json_data (Option 2)
# Import Pretty Print
from pprint import pprint

In [14]:
# Pretty Print the JSON object
pprint(json_data)

('[{"STATE":"AR","NAME":"Acorn","LAT":34.638786,"LON":-94.200112,"Start_Partial":"2024\\/02\\/18 '
 '17:30:40+00","Start_Total":1708282055000,"End_Total":1708282297000,"End_Partial":"2024\\/02\\/18 '
 '20:08:30+00","Midpoint_Totality":"2024\\/02\\/18 '
 '18:49:36+00","ObjectId":1,"TestTime":"12:30:40","Total_Duration":"04:02"},{"STATE":"AR","NAME":"McRae","LAT":35.113627,"LON":-91.826194,"Start_Partial":"2024\\/02\\/18 '
 '17:35:10+00","Start_Total":1708282362000,"End_Total":1708282526000,"End_Partial":"2024\\/02\\/18 '
 '20:12:30+00","Midpoint_Totality":"2024\\/02\\/18 '
 '18:54:04+00","ObjectId":2,"TestTime":"12:35:10","Total_Duration":"02:44"},{"STATE":"AR","NAME":"Adona","LAT":35.03993,"LON":-92.899132,"Start_Partial":"2024\\/02\\/18 '
 '17:33:20+00","Start_Total":1708282208000,"End_Total":1708282462000,"End_Partial":"2024\\/02\\/18 '
 '20:10:50+00","Midpoint_Totality":"2024\\/02\\/18 '
 '18:52:15+00","ObjectId":3,"TestTime":"12:33:20","Total_Duration":"04:14"},{"STATE":"AR","NAME"

In [15]:
# Specify the file path where you want to save the JSON file
converted_file = "Resources/cleaned_TotalE_Duration.json"

# Save the DataFrame to a JSON file
cleaned_ttl_duration.to_json(converted_file, orient='records')

# Confirm that the JSON file has been saved
print("DataFrame saved as JSON file:", converted_file)

DataFrame saved as JSON file: Resources/cleaned_TotalE_Duration.json


# 

# The following section encompasses the analysis for the information required to plot charts in our Javascript file.
.



## This section will identify the states that experienced the total eclipse, enabling us to determine how many states were in the total eclipse's path and the average duration of this event in its entirety. Specifically, it will focus on the duration of the Total Eclipse phase in each state along the path. This analysis excludes states that only experienced partial eclipse phases. 
### Please note that we are only considering the 50 states within the United States and not the 16 insular areas it considers as territories.

# 2. Analysis of our Cleaned Data

In [16]:
# Make a list of all the states that had Total Eclipse sightings
unique_states = csv_file2_df['STATE'].unique()
print(unique_states)

['AR' 'IL' 'IN' 'MO' 'NY' 'NH' 'OH' 'KY' 'ME' 'MI' 'PA' 'TX' 'OK' 'VT']


In [23]:
# Make a copy of the DataFrame
avrg_duration_df = csv_file2_df.copy()

# Show the DataFrame to see the results
avrg_duration_df

Unnamed: 0,STATE,NAME,LAT,LON,Start_Partial,Start_Total,End_Total,End_Partial,Midpoint_Totality,ObjectId,TestTime,Total_Duration
0,AR,Acorn,34.638786,-94.200112,2024/02/18 17:30:40+00,2024-02-18 18:47:35+00:00,2024-02-18 18:51:37+00:00,2024/02/18 20:08:30+00,2024/02/18 18:49:36+00,1,12:30:40,04:02
1,AR,McRae,35.113627,-91.826194,2024/02/18 17:35:10+00,2024-02-18 18:52:42+00:00,2024-02-18 18:55:26+00:00,2024/02/18 20:12:30+00,2024/02/18 18:54:04+00,2,12:35:10,02:44
2,AR,Adona,35.039930,-92.899132,2024/02/18 17:33:20+00,2024-02-18 18:50:08+00:00,2024-02-18 18:54:22+00:00,2024/02/18 20:10:50+00,2024/02/18 18:52:15+00,3,12:33:20,04:14
3,AR,Alexander,34.618588,-92.451217,2024/02/18 17:33:20+00,2024-02-18 18:51:09+00:00,2024-02-18 18:53:38+00:00,2024/02/18 20:11:10+00,2024/02/18 18:52:24+00,4,12:33:20,02:29
4,AR,Alicia,35.892907,-91.083450,2024/02/18 17:37:30+00,2024-02-18 18:54:29+00:00,2024-02-18 18:58:05+00:00,2024/02/18 20:14:10+00,2024/02/18 18:56:17+00,5,12:37:30,03:36
...,...,...,...,...,...,...,...,...,...,...,...,...
3325,VT,West Charlotte,44.307338,-73.257254,2024/02/18 18:14:20+00,2024-02-18 19:26:13+00:00,2024-02-18 19:29:08+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:40+00,3326,07:14:20,02:55
3326,VT,Westford,44.603324,-73.015994,2024/02/18 18:14:50+00,2024-02-18 19:26:18+00:00,2024-02-18 19:29:40+00:00,2024/02/18 20:37:30+00,2024/02/18 19:27:59+00,3327,07:14:50,03:22
3327,VT,Winooski,44.495587,-73.184857,2024/02/18 18:14:30+00,2024-02-18 19:26:08+00:00,2024-02-18 19:29:25+00:00,2024/02/18 20:37:20+00,2024/02/18 19:27:46+00,3328,07:14:30,03:17
3328,VT,Wolcott,44.551809,-72.470074,2024/02/18 18:15:30+00,2024-02-18 19:27:12+00:00,2024-02-18 19:30:05+00:00,2024/02/18 20:37:50+00,2024/02/18 19:28:38+00,3329,07:15:30,02:53


In [24]:
# Getting the average of Total_Duration per state
# Splitting the 'Total_Duration' column into minutes and seconds due to being in a string format
minutes_seconds = avrg_duration_df['Total_Duration'].str.split(':', expand=True)  # Split by the colon, expand=True separeates Minutes and Seconds in DF

# Convert minutes and seconds to integers to calculate average. Because there are new columns. Index 0 is minutes, and index 1 is seconds.
minutes_seconds[0] = minutes_seconds[0].astype(int)
minutes_seconds[1] = minutes_seconds[1].astype(int)

# Convert to timedelta object (multiply minutes by 60 to convert to seconds)
# This line multiplies minutes by 60, to add them to the seconds in index 1, and implies that the imput unit is in seconds 's'
avrg_duration_df['Total_Duration'] = pd.to_timedelta(minutes_seconds[0] * 60 + minutes_seconds[1], unit='s')

# Group the unique values of the states with the Total_duration column, and get the average
average_duration_per_state = avrg_duration_df.groupby('STATE')['Total_Duration'].mean()
average_duration_per_state

STATE
AR   0 days 00:03:15.638356164
IL   0 days 00:03:16.301587301
IN   0 days 00:03:09.551194539
KY   0 days 00:01:45.447368421
ME   0 days 00:02:21.857142857
MI   0 days 00:00:39.333333333
MO   0 days 00:03:04.535714285
NH      0 days 00:02:14.500000
NY   0 days 00:03:02.168734491
OH   0 days 00:03:00.567099567
OK   0 days 00:02:29.268292682
PA   0 days 00:02:32.281690140
TX   0 days 00:03:14.636209813
VT   0 days 00:02:41.298850574
Name: Total_Duration, dtype: timedelta64[ns]

In [26]:
# Reset index to include 'STATE' as a regular column
average_duration_per_state_reset = average_duration_per_state.reset_index()

# Convert average duration per state to seconds
average_duration_per_state_seconds = average_duration_per_state_reset['Total_Duration'].dt.total_seconds()

# Convert seconds to MM:SS format
average_duration_per_state_MM_SS = average_duration_per_state_seconds.apply(lambda x: '{:02d}:{:02d}'.format(int(x // 60), int(x % 60)))

# Create a new DataFrame with state and duration in MM:SS format
average_duration_per_state_df = pd.DataFrame({'STATE': average_duration_per_state_reset['STATE'], 'Average_Duration_MM_SS': average_duration_per_state_MM_SS})

average_duration_per_state_df

   STATE Average_Duration_MM_SS
0     AR                  03:15
1     IL                  03:16
2     IN                  03:09
3     KY                  01:45
4     ME                  02:21
5     MI                  00:39
6     MO                  03:04
7     NH                  02:14
8     NY                  03:02
9     OH                  03:00
10    OK                  02:29
11    PA                  02:32
12    TX                  03:14
13    VT                  02:41


In [27]:
average_duration_per_state_df

Unnamed: 0,STATE,Average_Duration_MM_SS
0,AR,03:15
1,IL,03:16
2,IN,03:09
3,KY,01:45
4,ME,02:21
5,MI,00:39
6,MO,03:04
7,NH,02:14
8,NY,03:02
9,OH,03:00


In [39]:
# Change the order of the rows so that the states display in the order the sun passed over them during the Total Eclipse
# Define the order of states
desired_order = ['TX', 'OK', 'AR', 'MO', 'KY', 'IL', 'IN', 'OH', 'MI', 'PA', 'NY', 'VT', 'NH', 'ME']

# Reindex the DataFrame to display rows in the defined order
average_duration_per_state_df_ordered = average_duration_per_state_df.set_index('STATE').reindex(desired_order).reset_index()

# Show the Data Frame
average_duration_per_state_df_ordered

Unnamed: 0,STATE,Average_Duration_MM_SS
0,TX,03:14
1,OK,02:29
2,AR,03:15
3,MO,03:04
4,KY,01:45
5,IL,03:16
6,IN,03:09
7,OH,03:00
8,MI,00:39
9,PA,02:32


In [40]:
# Rename the column 'Average_Duration_MM_SS' to 'Average Duration'
average_duration_per_state_df_ordered = average_duration_per_state_df_ordered.rename(columns={'Average_Duration_MM_SS': 'Average Duration'})

# Show the Data Frame
average_duration_per_state_df_ordered

Unnamed: 0,STATE,Average Duration
0,TX,03:14
1,OK,02:29
2,AR,03:15
3,MO,03:04
4,KY,01:45
5,IL,03:16
6,IN,03:09
7,OH,03:00
8,MI,00:39
9,PA,02:32


In [42]:
# List of all 50 states in the US
all_states = [
    'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware',
    'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky',
    'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri',
    'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina',
    'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
    'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'
]

# List of US states with total eclipse viewings
total_eclipse_states = [
    'Texas', 'Oklahoma', 'Arkansas', 'Missouri', 'Kentucky', 'Illinois', 'Indiana', 'Ohio',
    'Michigan', 'Pennsylvania', 'New York', 'Vermont', 'New Hampshire', 'Maine'
]

# Extract the states with ONLY partial eclipse viewings
partial_eclipse_states = [state for state in all_states if state not in total_eclipse_states]

# Output the lists
print("States with ONLY Partial Eclipse Viewings:")
print(partial_eclipse_states)

print("\nStates with Total Eclipse Viewings(in the order the sun passed over each one):")
print(total_eclipse_states)


States with ONLY Partial Eclipse Viewings:
['Alabama', 'Alaska', 'Arizona', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Iowa', 'Kansas', 'Louisiana', 'Maryland', 'Massachusetts', 'Minnesota', 'Mississippi', 'Montana', 'Nebraska', 'Nevada', 'New Jersey', 'New Mexico', 'North Carolina', 'North Dakota', 'Oregon', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Utah', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']

States with Total Eclipse Viewings(in the order the sun passed over each one):
['Texas', 'Oklahoma', 'Arkansas', 'Missouri', 'Kentucky', 'Illinois', 'Indiana', 'Ohio', 'Michigan', 'Pennsylvania', 'New York', 'Vermont', 'New Hampshire', 'Maine']


In [43]:
# Calculate the percentage of states with total eclipse viewings out of all the United States
percentage_total_eclipse_states = (len(total_eclipse_states) / len(all_states)) * 100

print("Percentage of states with Total Eclipse Viewings: {:.2f}%".format(percentage_total_eclipse_states))


Percentage of states with Total Eclipse Viewings: 28.00%


## Now we will take this information to create our plots on the JavaScript file within the repository for this project.