<a href="https://colab.research.google.com/github/wjvlno/python_bootcamp/blob/main/session5_bonus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Aggregating and Visualizing Presidential Polling Trends

In [None]:
## Step 1: Import required libraries
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

In [None]:
## Step 2: Define the URL for the polling data
url = 'https://projects.fivethirtyeight.com/polls/data/presidential_general_averages.csv'

## Step 3: Fetch the data
response = requests.get(url)
with open('president_general_polls.csv', 'wb') as file:
    file.write(response.content)

## Step 4: Load the data into a pandas DataFrame
polls_df = pd.read_csv('president_general_polls.csv')

In [None]:
## Step 5: Explore the data
print(polls_df.head())

In [None]:
## Step 6: Filter the data for Trump vs. Biden matchups
polls_df = polls_df[polls_df['candidate'].str.contains("Trump") | polls_df['candidate'].str.contains("Biden")]

In [None]:
## Step 7: Add a date column and convert to datetime
polls_df['date'] = pd.to_datetime(polls_df['date'])

## Step 8: Extract month and year from the date
polls_df['year_month'] = polls_df['date'].dt.to_period('M')

## Step 9: Define the date of Trump's conviction
conviction_date = datetime(2023, 12, 15)

## Step 10: Separate data into before and after the conviction
before_conviction = polls_df[polls_df['date'] < conviction_date]
after_conviction = polls_df[polls_df['date'] >= conviction_date]

In [None]:
## Step 11: Group by month, state, and candidate to calculate average percentages
grouped_df = polls_df.groupby(['year_month', 'state', 'candidate']).agg({
    'pct_estimate': ['mean', 'std']
}).reset_index()
grouped_df.columns = ['year_month', 'state', 'candidate', 'mean_pct_estimate', 'std_pct_estimate']

## Step 12: Create a pivot table for plotting
pivot_df = grouped_df.pivot_table(index=['year_month', 'state'], columns='candidate', values=['mean_pct_estimate', 'std_pct_estimate']).reset_index()
pivot_df.columns = ['year_month', 'state', 'Trump_mean', 'Biden_mean', 'Trump_std', 'Biden_std']

In [None]:
## Step 13: Define swing states
swing_states = ['Georgia', 'Michigan', 'Pennsylvania', 'Wisconsin']

## Step 14: Plot the results for swing states
for state in swing_states:
    state_df = pivot_df[pivot_df['state'] == state]
    plt.figure(figsize=(10, 5))
    plt.errorbar(state_df['year_month'].astype(str), state_df['Trump_mean'], yerr=state_df['Trump_std'], label='Donald Trump', color='red', fmt='-o')
    plt.errorbar(state_df['year_month'].astype(str), state_df['Biden_mean'], yerr=state_df['Biden_std'], label='Joe Biden', color='blue', fmt='-o')
    plt.title(f'Polling Trends in {state}')
    plt.xlabel('Month')
    plt.ylabel('Average Polling Percentage')
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()




In [None]:
## Step 15: Calculate and plot national averages
national_df = pivot_df.groupby('year_month').agg({
    'Trump_mean': ['mean', 'std'],
    'Biden_mean': ['mean', 'std']
}).reset_index()
national_df.columns = ['year_month', 'Trump_national_mean', 'Trump_national_std', 'Biden_national_mean', 'Biden_national_std']

plt.figure(figsize=(12, 6))
plt.errorbar(national_df['year_month'].astype(str), national_df['Trump_national_mean'], yerr=national_df['Trump_national_std'], label='Donald Trump', color='red', fmt='-o')
plt.errorbar(national_df['year_month'].astype(str), national_df['Biden_national_mean'], yerr=national_df['Biden_national_std'], label='Joe Biden', color='blue', fmt='-o')
plt.title('National Polling Trends')
plt.xlabel('Month')
plt.ylabel('Average Polling Percentage')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Accounting for Polling Error

In [None]:
# Python Bootcamp: Aggregating and Visualizing 2024 Presidential Election Polling Data with Custom Weights

## Step 1: Import required libraries
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

## Step 2: Define the URL for the polling data
url = 'https://projects.fivethirtyeight.com/polls/data/presidential_general_averages.csv'

## Step 3: Fetch the data
response = requests.get(url)
with open('presidential_general_averages.csv', 'wb') as file:
    file.write(response.content)

## Step 4: Load the data into a pandas DataFrame
polls_df = pd.read_csv('presidential_general_averages.csv')

## Step 5: Explore the data
print(polls_df.head())

## Step 6: Filter the data for Trump vs. Biden matchups
polls_df = polls_df[(polls_df['candidate'] == 'Trump') | (polls_df['candidate'] == 'Biden')]

## Step 7: Add a date column and convert to datetime
polls_df['date'] = pd.to_datetime(polls_df['date'])

## Step 8: Define the date of Trump's conviction
conviction_date = datetime(2024, 5, 30)

## Step 9: Create a column to distinguish before and after the conviction
polls_df['period'] = polls_df['date'].apply(lambda x: 'Before' if x < conviction_date else 'After')

## Step 10: Define swing states
swing_states = ['Arizona', 'Florida', 'Georgia', 'Michigan', 'North Carolina', 'Pennsylvania', 'Wisconsin']

## Step 11: Filter the data for swing states
polls_df = polls_df[polls_df['state'].isin(swing_states)]

## Step 12: Calculate the confidence interval width and weights
polls_df['ci_width'] = polls_df['hi'] - polls_df['lo']
polls_df['ci_weight'] = 1 / polls_df['ci_width']
polls_df['standardized_weight'] = polls_df['ci_weight'] / polls_df['ci_weight'].mean()




In [None]:
# Plotting average polling data over time for four select states with error ribbons

# Define the four states to plot
states_to_plot = ['Arizona', 'Florida', 'Georgia', 'Michigan']

# Filter the data for the selected states
filtered_df = pivot_df[pivot_df['state'].isin(states_to_plot)]

# Plot the data
fig, axes = plt.subplots(2, 2, figsize=(15, 10), sharex=True, sharey=True)
fig.suptitle('Polling Averages Over Time for Selected States')

for i, state in enumerate(states_to_plot):
    ax = axes[i // 2, i % 2]
    state_df = filtered_df[filtered_df['state'] == state]

    ax.plot(state_df['date'], state_df['Trump_pct'], label='Donald Trump', color='red')
    ax.fill_between(state_df['date'], state_df['Trump_pct'] - state_df['Trump_weight'], state_df['Trump_pct'] + state_df['Trump_weight'], color='red', alpha=0.3)

    ax.plot(state_df['date'], state_df['Biden_pct'], label='Joe Biden', color='blue')
    ax.fill_between(state_df['date'], state_df['Biden_pct'] - state_df['Biden_weight'], state_df['Biden_pct'] + state_df['Biden_weight'], color='blue', alpha=0.3)

    ax.set_title(state)
    ax.set_xlabel('Date')
    ax.set_ylabel('Polling Percentage')
    ax.legend()

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()


In [None]:
## Step 13: Calculate the weighted average difference between Trump and Biden percentages
pivot_df = polls_df.pivot_table(index=['date', 'state', 'period'], columns='candidate', values=['pct_estimate', 'standardized_weight'], aggfunc={'pct_estimate': 'mean', 'standardized_weight': 'sum'}).reset_index()
pivot_df.columns = ['date', 'state', 'period', 'Biden_pct', 'Trump_pct', 'Biden_weight', 'Trump_weight']

pivot_df['weighted_diff'] = (pivot_df['Trump_pct'] * pivot_df['Trump_weight'] - pivot_df['Biden_pct'] * pivot_df['Biden_weight']) / (pivot_df['Trump_weight'] + pivot_df['Biden_weight'])

## Step 14: Plot the results as violin plots
plt.figure(figsize=(15, 10))
sns.violinplot(x='state', y='weighted_diff', hue='period', data=pivot_df, split=True, inner='quart', palette={'Before': 'lightblue', 'After': 'lightcoral'})
plt.title('Polling Averages Before vs. After Trump\'s Conviction (5/30/24)')
plt.xlabel('State')
plt.ylabel('Difference in Polling Percentage (Trump - Biden)')
plt.legend(title='Period')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Step 15: Plot the average difference with color based on who is ahead
pivot_df['ahead'] = pivot_df['weighted_diff'].apply(lambda x: 'Trump' if x > 0 else 'Biden')

plt.figure(figsize=(15, 10))
sns.violinplot(x='state', y='weighted_diff', hue='ahead', data=pivot_df, split=True, inner='quart', palette={'Trump': 'red', 'Biden': 'blue'})
plt.title('Average Difference in Polling Percentages (Trump vs. Biden) Before and After Conviction')
plt.xlabel('State')
plt.ylabel('Difference in Polling Percentage (Trump - Biden)')
plt.legend(title='Leading Candidate')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()