### latest survey exploration
## based on live data from dawum.de

In [None]:
# credit to: https://github.com/kennell/dawum-to-csv/tree/master

import csv
import io
import requests
from slugify import slugify
import pandas as pd


DAWUM_API_URL = 'https://api.dawum.de/'
SLUGIFY_GERMAN_UMLAUTS = [
    ['ä', 'ae'],
    ['Ä', 'Ae'],
    ['ö', 'oe'],
    ['Ö', 'Oe'],
    ['ü', 'ue'],
    ['Ü', 'Ue'],
]

rsp = requests.get(DAWUM_API_URL)
data = rsp.json()

# Parse Parliaments
parliaments = {}
for key, value in data['Parliaments'].items():
    parliaments[key] = {
        'parliament_id': slugify(value['Shortcut'], replacements=SLUGIFY_GERMAN_UMLAUTS),
        'parliament_name': value['Name']
    }

# Parse Institutes
institutes = {}
for key, value in data['Institutes'].items():
    institutes[key] = {
        'institute_id': slugify(value['Name'], replacements=SLUGIFY_GERMAN_UMLAUTS),
        'institute_name': value['Name'],
    }

# Parse Taskers
taskers = {}
for key, value in data['Taskers'].items():
    taskers[key] = {
        'tasker_id': slugify(value['Name'], replacements=SLUGIFY_GERMAN_UMLAUTS),
        'tasker_name': value['Name'],
    }

# Parse Parties
parties = {}
for key, value in data['Parties'].items():
    parties[key] = {
        'party_id': slugify(value['Shortcut'], replacements=SLUGIFY_GERMAN_UMLAUTS),
        'party_name': value['Name'],
    }

# Parse Surveys
surveys = {}
result_fieldnames = set()
for key, value in data['Surveys'].items():
    surveys[key] = {
        'survey_id': key,
        'survey_date': value['Date'],
        'survey_persons': value['Surveyed_Persons'],
        'survey_start': value['Survey_Period']['Date_Start'],
        'survey_end': value['Survey_Period']['Date_End'],
    }
    surveys[key].update(
        parliaments[value['Parliament_ID']]
    )
    surveys[key].update(
        taskers[value['Tasker_ID']]
    )
    surveys[key].update(
        institutes[value['Institute_ID']]
    )
    for party_id, result in value['Results'].items():
        col = 'result_' + parties[party_id]['party_id']
        result_fieldnames.add(col)
        surveys[key][col] = float(result)

# displaying latest survey data

In [None]:
surveys_df = pd.DataFrame.from_dict(surveys, orient='index')
surveys_df['survey_date'] = pd.to_datetime(surveys_df['survey_date'])
# Define the filtering conditions (BSW 2025)
filter_condition = (surveys_df['parliament_id'] == 'bundestag') & (surveys_df['survey_date'] >= '2024-01-01')

# Apply the filter
filtered_surveys = surveys_df[filter_condition]
filtered_surveys['survey_persons']=pd.to_numeric(filtered_surveys['survey_persons'])
#filtered_surveys.head()

In [None]:
import pandas as pd
import plotly.graph_objects as go

# Load dataset
df = filtered_surveys.copy()

# Normalize survey_persons for bubble size scaling
min_size, max_size = 2, 20
df['scaled_size'] = ((df['survey_persons'] - df['survey_persons'].min()) /
                     (df['survey_persons'].max() - df['survey_persons'].min())) * (max_size - min_size) + min_size

# Define party colors
party_colors = {
    "result_cdu-csu": "#000000",  # Schwarz
    "result_afd": "#0047AB",  # Dunkelblau
    "result_spd": "#E3001B",  # Rot
    "result_gruene": "#1A7F22",  # Grün
    "result_sonstige": "#800080",  # Lila
    "result_linke": "#C60084",  # Magenta
    "result_bsw": "#5E1D4D",  # Dunkellila
    "result_fdp": "#FFD700",  # Gelb
}

# List of result columns (parties)
party_columns = list(party_colors.keys())

# Get max survey result to define Y-axis range
max_value = df[party_columns].max().max()

# Create figure
fig = go.Figure()

# Add traces for each party (initially hide all except first)
for i, col in enumerate(party_columns):
    fig.add_trace(go.Scatter(
        x=df['survey_date'],
        y=df[col],
        mode='markers',
        marker=dict(
            size=df['scaled_size'],  # Properly scaled bubbles
            opacity=0.7,
            color=party_colors[col],  # Assign correct color
            line=dict(width=1, color='black')  # Outline for better visibility
        ),
        name=col.replace("result_", "").upper(),  # Rename legend
        visible=(i == 0),  # Only show first party initially
        hovertemplate=(
            "<b>%{x}</b><br>"
            "Umfrageergebnis: %{y:.1f}%<br>"  # Show 1 decimal place
            "Befragte Personen: %{customdata:,}<br>"  # Show real value with thousand separator
            "Institut: %{text}"  # Tasker name
        ),
        text=df['tasker_name'],  # Pass tasker_name to hover info
        customdata=df['survey_persons'],  # Use original, unscaled survey_persons for hover
    ))

# Create dropdown menu
dropdown_buttons = [
    dict(label=col.replace("result_", "").upper(),  # Dropdown label
         method="update",
         args=[{"visible": [i == j for j in range(len(party_columns))]}]  # Toggle visibility only
    ) for i, col in enumerate(party_columns)
]

# Update layout
fig.update_layout(
    title="Umfrageergebnisse im Zeitverlauf – Neueste Daten von dawum.de",
    xaxis_title="Umfragedatum",
    yaxis_title="Umfrageergebnis (%)",
    plot_bgcolor="white",
    height=700,  # Slightly increased height for better visibility
    yaxis=dict(range=[0, max_value]),  # Y-axis starts at 0    
    updatemenus=[dict(buttons=dropdown_buttons, direction="down", showactive=True, x=0.1, xanchor="left", y=1.25, yanchor="top")]
)

# Show plot
fig.show()


In [None]:
fig.write_html("index_survey_data.html", include_plotlyjs="cdn")

In [None]:
import os
os.getcwd()