Data: https://www.kaggle.com/datasets/catherinerasgaitis/mxmh-survey-results

In [None]:
import kagglehub
import pandas as pd
import os
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

In [None]:
path = kagglehub.dataset_download("catherinerasgaitis/mxmh-survey-results")
file_path = os.path.join(path, "mxmh_survey_results.csv")
df = pd.read_csv(file_path)
df.head()

Using Colab cache for faster access to the 'mxmh-survey-results' dataset.


Unnamed: 0,Timestamp,Age,Primary streaming service,Hours per day,While working,Instrumentalist,Composer,Fav genre,Exploratory,Foreign languages,...,Frequency [R&B],Frequency [Rap],Frequency [Rock],Frequency [Video game music],Anxiety,Depression,Insomnia,OCD,Music effects,Permissions
0,8/27/2022 19:29:02,18.0,Spotify,3.0,Yes,Yes,Yes,Latin,Yes,Yes,...,Sometimes,Very frequently,Never,Sometimes,3.0,0.0,1.0,0.0,,I understand.
1,8/27/2022 19:57:31,63.0,Pandora,1.5,Yes,No,No,Rock,Yes,No,...,Sometimes,Rarely,Very frequently,Rarely,7.0,2.0,2.0,1.0,,I understand.
2,8/27/2022 21:28:18,18.0,Spotify,4.0,No,No,No,Video game music,No,Yes,...,Never,Rarely,Rarely,Very frequently,7.0,7.0,10.0,2.0,No effect,I understand.
3,8/27/2022 21:40:40,61.0,YouTube Music,2.5,Yes,No,Yes,Jazz,Yes,Yes,...,Sometimes,Never,Never,Never,9.0,7.0,3.0,3.0,Improve,I understand.
4,8/27/2022 21:54:47,18.0,Spotify,4.0,Yes,No,No,R&B,Yes,No,...,Very frequently,Very frequently,Never,Rarely,7.0,2.0,5.0,9.0,Improve,I understand.


In [None]:
# Configuration constants
FIG_WIDTH_BASE = 1600
FIG_HEIGHT_BASE = 900
FIG_WIDTH_WIDE = 1800
FIG_HEIGHT_TALL = 1000

# Disease configuration for consistent usage across plots
DISEASES = ['Anxiety', 'Depression', 'Insomnia', 'OCD']
DISEASE_NAMES = {
    'Anxiety': 'Anxiety',
    'Depression': 'Depression',
    'Insomnia': 'Insomnia',
    'OCD': 'OCD'
}
DISEASE_COLORS = {
    'Anxiety': 'rgba(244, 67, 54, 0.7)',
    'Depression': 'rgba(33, 150, 243, 0.7)',
    'Insomnia': 'rgba(76, 175, 80, 0.7)',
    'OCD': 'rgba(156, 39, 176, 0.7)'
}

Create bubble plot showing platform usage by age, hours, and genre

In [None]:
def create_bubble_plot():
    # Aggregate data for bubble plot
    bubble_data = df.groupby(['Primary streaming service', 'Fav genre']).agg({
        'Age': 'mean',
        'Hours per day': 'mean',
        'Timestamp': 'count'
    }).reset_index().rename(columns={'Timestamp': 'Count'})

    # Create bubble chart
    fig = px.scatter(
        bubble_data,
        x='Age',
        y='Hours per day',
        size='Count',
        color='Primary streaming service',
        hover_name='Fav genre',
        hover_data={
            'Age': ':.1f',
            'Hours per day': ':.1f',
            'Count': True,
            'Primary streaming service': True
        },
        title='Platform usage by age, listening hours and genre',
        labels={
            'Age': 'Average age',
            'Hours per day': 'Hours per day',
            'Count': 'Number of respondents',
            'Primary streaming service': 'Streaming service'
        },
        size_max=60
    )

    fig.update_layout(
        hovermode='closest',
        showlegend=True,
        xaxis=dict(title='Average age', showgrid=True),
        yaxis=dict(title='Hours per day', showgrid=True),
        width=FIG_WIDTH_BASE,
        height=FIG_HEIGHT_BASE
    )

    return fig

Create line plot showing disease level by listening hours

In [None]:
def create_music_effects_lineplot():

   # Prepare data - add count of respondents for each point
    plot_data = df.groupby(['Hours per day', 'Music effects']).agg( {**{disease: 'mean' for disease in DISEASES}, 'Age': 'count'}).reset_index().rename(columns={'Age': 'Count'})

    plot_data = plot_data[plot_data['Count'] >= 3]

    effect_colors = {
        'Improve': '#56B4E9',
        'No effect': '#D55E00',
        'Worsen': '#999999'
    }

    disease_styles = {
        'Anxiety': {
            'line_style': 'solid',
            'marker': 'circle'
        },
        'Depression': {
            'line_style': 'solid',
            'marker': 'circle-open'
        },
        'Insomnia': {
            'line_style': 'dash',
            'marker': 'square-open'
        },
        'OCD': {
            'line_style': 'dot',
            'marker': 'triangle-up'
        }
    }

    fig = go.Figure()

    # Add traces for all combinations of diseases and effects
    for disease in DISEASES:
        for effect in effect_colors.keys():
            # Filter data for current disease and effect
            filtered_data = plot_data[
                (plot_data['Music effects'] == effect)
            ].sort_values('Hours per day')

            if len(filtered_data) > 0:
                display_name = f"{DISEASE_NAMES[disease]} - {effect}"
                disease_style = disease_styles[disease]

                fig.add_trace(
                    go.Scatter(
                        x=filtered_data['Hours per day'],
                        y=filtered_data[disease],
                        mode='lines+markers',
                        name=display_name,
                        legendgroup=disease,
                        line=dict(
                            color=effect_colors[effect],
                            dash=disease_style['line_style'],
                            width=3
                        ),
                        marker=dict(
                            symbol=disease_style['marker'],
                            size=8,
                            color=effect_colors[effect],
                            line=dict(
                                width=2 if 'open' in disease_style['marker'] else 1,
                                color=effect_colors[effect]
                            )
                        ),
                        hovertemplate=(
                            f"<b>{DISEASE_NAMES[disease]}</b><br>"
                            f"Effect: {effect}<br>"
                            "Hours: %{x:.1f}<br>"
                            "Severity: %{y:.2f}<br>"
                            "Respondents: %{customdata:.0f}<br>"
                            "<extra></extra>"
                        ),
                        customdata=filtered_data['Count'],
                        visible=True
                    )
                )
    disease_dropdown = create_disease_dropdown(DISEASES, DISEASE_NAMES)
    effect_dropdown = create_effect_dropdown()

    fig.update_layout(
        title=dict(
            text='Disease severity by listening hours',
            x=0.05,
            xanchor='left',
            font=dict(size=16)
        ),
        height=700,
        width=1200,
        showlegend=True,
        legend=dict(
            title=dict(text='<b>Disease - Effect</b>'),
            orientation="v",
            yanchor="top",
            y=0.98,
            xanchor="left",
            x=1.02,
            bgcolor='rgba(255,255,255,0.9)',
            bordercolor='gray',
            font=dict(size=10)
        ),
        plot_bgcolor='white',
        margin=dict(l=80, r=200, t=100, b=80),
        updatemenus=[
            disease_dropdown,
            effect_dropdown
        ]

    )

    fig.update_xaxes(
        title_text="Listening hours per day",
        showgrid=True,
        gridwidth=1,
        gridcolor='lightgray',
        zeroline=True,
        zerolinewidth=1,
        zerolinecolor='black'
    )

    fig.update_yaxes(
        title_text="Disease level",
        showgrid=True,
        gridwidth=1,
        gridcolor='lightgray'
    )

    return fig

# Create dropdown menu for disease filtering
def create_disease_dropdown(diseases, disease_names):
    buttons = []

    buttons.append(dict(
        label="All Diseases",
        method="update",
        args=[{"visible": [True] * (len(diseases) * 3)}]
    ))

    for i, disease in enumerate(diseases):
        visibility = [False] * (len(diseases) * 3)
        start_idx = i * 3
        end_idx = start_idx + 3
        for j in range(start_idx, end_idx):
            visibility[j] = True

        buttons.append(dict(
            label=disease_names[disease],
            method="update",
            args=[{"visible": visibility}]
        ))

    return dict(
        type="dropdown",
        direction="down",
        x=0.05,
        y=0.95,
        xanchor="left",
        yanchor="top",
        buttons=buttons,
        bgcolor='rgba(255,255,255,0.9)',
        bordercolor='gray',
        borderwidth=1,
        font=dict(size=12)
    )

# Create dropdown menu for effect filtering
def create_effect_dropdown():
    effects = ['Improve', 'No effect', 'Worsen']
    buttons = []

    buttons.append(dict(
        label="All Effects",
        method="update",
        args=[{"visible": [True] * 12}]
    ))

    for i, effect in enumerate(effects):
        visibility = [False] * 12
        for j in range(4):
            visibility[i + j * 3] = True

        buttons.append(dict(
            label=effect,
            method="update",
            args=[{"visible": visibility}]
        ))

    return dict(
        type="dropdown",
        direction="down",
        x=0.05,
        y=0.85,
        xanchor="left",
        yanchor="top",
        buttons=buttons,
        bgcolor='rgba(255,255,255,0.9)',
        bordercolor='gray',
        borderwidth=1,
        font=dict(size=12)
    )


Create interactive BPM distribution plot with genre filtering

In [None]:
def create_bpm_genre_plot():
    # Prepare data
    plot_data = df.dropna(subset=['BPM', 'Fav genre'])
    all_genres = plot_data['Fav genre'].unique()

    # Calculate BPM range
    all_bpm_data = plot_data['BPM'].dropna()
    bpm_min, bpm_max = np.percentile(all_bpm_data, 5), np.percentile(all_bpm_data, 95)

    # Create subplots
    fig = make_subplots(
        rows=1, cols=4,
        subplot_titles=[DISEASE_NAMES[d] for d in DISEASES],
        horizontal_spacing=0.05,
        shared_yaxes=True
    )

    # Create dropdown menu for genre filtering
    genre_buttons = create_genre_dropdown(plot_data, all_genres, len(DISEASES))

    # Add box plots for each disease and genre
    for col, disease in enumerate(DISEASES, 1):
        for genre in all_genres:
            genre_data = plot_data[plot_data['Fav genre'] == genre]['BPM']

            if len(genre_data) > 0:
                fig.add_trace(
                    go.Box(
                        y=genre_data,
                        name=genre,
                        showlegend=False,
                        boxpoints='outliers',
                        marker_color=DISEASE_COLORS[disease],
                        line_color=DISEASE_COLORS[disease].replace('0.7', '1.0'),
                        hovertemplate=create_bpm_hover_template(genre, disease, plot_data)
                    ),
                    row=1, col=col
                )

    fig.update_layout(
        title=dict(
            text='BPM distribution by genre for different diseases',
            x=0.05,
            xanchor='left',
            y=0.95,
            font=dict(size=16)
        ),
        height=FIG_HEIGHT_BASE,
        width=FIG_WIDTH_WIDE,
        showlegend=False,
        plot_bgcolor='white',
        margin=dict(l=80, r=150, t=120, b=80),  # Adjusted margins
        updatemenus=[
            dict(
                type="dropdown",
                direction="down",
                x=0.27,
                y=1.13,
                xanchor="left",
                buttons=genre_buttons,
                bgcolor='rgba(255,255,255,0.9)',
                bordercolor='gray',
                borderwidth=1,
                font=dict(size=12)
            )
        ]
    )

    # Update axes
    for col in range(1, 5):
        fig.update_xaxes(
            title_text="Genres",
            tickangle=45,
            row=1, col=col
        )

    fig.update_yaxes(
        title_text="BPM",
        range=[bpm_min, bpm_max],
        row=1, col=1
    )

    # Add grid
    fig.update_xaxes(
        showgrid=True,
        gridwidth=1,
        gridcolor='lightgray'
    )
    fig.update_yaxes(
        showgrid=True,
        gridwidth=1,
        gridcolor='lightgray'
    )

    return fig

# Create dropdown buttons for genre filtering"
def create_genre_dropdown(plot_data, all_genres, num_diseases):
    genre_buttons = []
    total_traces = len(all_genres) * num_diseases

    # "All Genres" button
    genre_buttons.append(dict(label="All Genres", method="update", args=[{"visible": [True] * total_traces}]))

    # Buttons for each individual genre
    for i, genre in enumerate(all_genres):
        visibility = [False] * total_traces
        for disease_idx in range(num_diseases):
            visibility[i + disease_idx * len(all_genres)] = True
        genre_buttons.append(dict(label=genre, method="update", args=[{"visible": visibility}]))

    # Buttons for top genres
    genre_counts = plot_data['Fav genre'].value_counts()
    top_genres = genre_counts.head(5).index

    for i, genre in enumerate(top_genres, 1):
        visibility = [False] * total_traces
        for j, g in enumerate(all_genres):
            if g in top_genres:
                for disease_idx in range(num_diseases):
                    visibility[j + disease_idx * len(all_genres)] = True
        genre_buttons.append(dict(label=f"Top {i} Genres", method="update", args=[{"visible": visibility}]))

    return genre_buttons

# Create dropdown menu configuration
def create_dropdown_menu(buttons):
    return dict(
        type="dropdown",
        direction="down",
        x=0.1, y=1.15,
        xanchor="left", yanchor="top",
        buttons=buttons,
        bgcolor='rgba(255,255,255,0.9)',
        bordercolor='gray',
        borderwidth=1,
        font=dict(size=12)
    )

# Create hover template for BPM box plots
def create_bpm_hover_template(genre, disease, plot_data):
    genre_data = plot_data[plot_data['Fav genre'] == genre]
    return (
        f"<b>Genre:</b> {genre}<br>"
        f"<b>Disease:</b> {DISEASE_NAMES[disease]}<br>"
        f"<b>BPM:</b> %{{y:.0f}}<br>"
        f"<b>Average {DISEASE_NAMES[disease].lower()}:</b> {genre_data[disease].mean():.2f}<br>"
        f"<b>Observations:</b> {len(genre_data['BPM'])}<br>"
        "<extra></extra>"
    )

Create correlation heatmap between genre frequency and mental health

In [None]:
def create_genre_correlation_heatmap():
    # Identify frequency columns
    frequency_cols = [col for col in df.columns if 'Frequency' in col]
    genre_names = [col.replace('Frequency [', '').replace(']', '') for col in frequency_cols]

    # Prepare data with frequency mapping
    heatmap_data = df.copy()
    frequency_mapping = {'Never': 0, 'Rarely': 1, 'Sometimes': 2, 'Very frequently': 3}

    for col in frequency_cols:
        heatmap_data[col] = heatmap_data[col].map(frequency_mapping)

    heatmap_data = heatmap_data.dropna(subset=frequency_cols + DISEASES)

    # Calculate correlation matrix
    correlation_matrix = heatmap_data[frequency_cols + DISEASES].corr()
    genre_correlations = correlation_matrix.loc[frequency_cols, DISEASES]
    genre_correlations.index = genre_names

    # Create heatmap
    fig = px.imshow(
        genre_correlations,
        x=genre_correlations.columns,
        y=genre_correlations.index,
        color_continuous_scale='RdBu_r',
        aspect="auto",
        title="Correlation between music genre frequency and mental health conditions",
        labels=dict(x="Mental health conditions", y="Music genres", color="Correlation coefficient"),
        zmin=-1, zmax=1
    )

    fig.update_traces(
        hovertemplate=(
            "<b>Genre:</b> %{y}<br>"
            "<b>Condition:</b> %{x}<br>"
            "<b>Correlation:</b> %{z:.3f}<br>"
            "<extra></extra>"
        )
    )

    # Add correlation values as annotations
    annotations = []
    for i, genre in enumerate(genre_correlations.index):
        for j, disease in enumerate(genre_correlations.columns):
            corr_value = genre_correlations.iloc[i, j]
            annotations.append(dict(
                x=j, y=i, text=f"{corr_value:.2f}", showarrow=False,
                font=dict(color="white" if abs(corr_value) > 0.3 else "black", size=10)
            ))

    fig.update_layout(
        height=800, width=700,
        xaxis=dict(tickangle=45),
        coloraxis_colorbar=dict(
            title="Correlation", titleside="right",
            tickvals=[-1, -0.5, 0, 0.5, 1], ticktext=["-1.0", "-0.5", "0", "0.5", "1.0"]
        ),
        annotations=annotations
    )

    return fig

Create heatmap showing BPM distribution by disease severity

In [None]:
def create_bpm_disease_heatmap():
    # Prepare data
    heatmap_data = df.dropna(subset=['BPM'] + DISEASES)

    # Define bins for grouping
    bpm_bins = [0, 60, 80, 100, 120, 140, 160, 180, 200, 300]
    bpm_labels = ['<60', '60-80', '80-100', '100-120', '120-140', '140-160', '160-180', '180-200', '>200']
    disease_bins = [0, 2, 4, 6, 8, 10]
    disease_labels = ['0-2', '2-4', '4-6', '6-8', '8-10']

    # Calculate unified color scale range
    all_values = []
    for disease in DISEASES:
        disease_data = heatmap_data.copy()
        disease_data['BPM_group'] = pd.cut(disease_data['BPM'], bins=bpm_bins, labels=bpm_labels)
        disease_data['Disease_level'] = pd.cut(disease_data[disease], bins=disease_bins, labels=disease_labels)

        pivot_table = disease_data.groupby(['Disease_level', 'BPM_group'], observed=True).size().unstack(fill_value=0)
        pivot_table_normalized = pivot_table.div(pivot_table.sum(axis=1), axis=0) * 100
        all_values.extend(pivot_table_normalized.values.flatten())

    zmin, zmax = 0, max(all_values)

    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[DISEASE_NAMES[d] for d in DISEASES],
        vertical_spacing=0.08, horizontal_spacing=0.05,
        shared_xaxes=False, shared_yaxes=True
    )

    # Create heatmaps for each disease
    for i, disease in enumerate(DISEASES):
        row, col = i // 2 + 1, i % 2 + 1

        disease_data = heatmap_data.copy()
        disease_data['BPM_group'] = pd.cut(disease_data['BPM'], bins=bpm_bins, labels=bpm_labels)
        disease_data['Disease_level'] = pd.cut(disease_data[disease], bins=disease_bins, labels=disease_labels)

        pivot_table = disease_data.groupby(['Disease_level', 'BPM_group'], observed=True).size().unstack(fill_value=0)
        pivot_table_normalized = pivot_table.div(pivot_table.sum(axis=1), axis=0) * 100

        fig.add_trace(
            go.Heatmap(
                z=pivot_table_normalized.values,
                x=pivot_table_normalized.columns,
                y=pivot_table_normalized.index,
                colorscale='Viridis',
                zmin=zmin, zmax=zmax,
                showscale=(i == 3),
                colorbar=dict(title="Percentage of respondents", titleside="right", len=0.8, y=0.5, yanchor='middle'),
                hovertemplate=create_bpm_disease_hover_template(disease, pivot_table),
                customdata=pivot_table.values
            ),
            row=row, col=col
        )

    fig.update_layout(
        title=dict(text='BPM distribution by disease severity levels', x=0.05, xanchor='left'),
        height=FIG_HEIGHT_TALL, width=FIG_WIDTH_BASE, showlegend=False
    )

    for i in range(1, 5):
        fig.update_xaxes(title_text="BPM range", row=(i+1)//2, col=(i-1)%2+1)

    fig.update_yaxes(title_text="Disease severity level", row=1, col=1)
    fig.update_yaxes(title_text="Disease severity level", row=2, col=1)

    return fig

# Create hover template for BPM-disease heatmap
def create_bpm_disease_hover_template(disease, pivot_table):
    return (
        f"<b>{DISEASE_NAMES[disease]}</b><br>"
        "Disease Level: %{y}<br>"
        "BPM: %{x}<br>"
        "Observations: %{customdata}<br>"
        "Percentage: %{z:.1f}%<br>"
        "<extra></extra>"
    )

Create stacked bar plot of genre listening frequency with separate traces for each age group

In [None]:
def create_genre_frequency_plot():
    # Prepare data with age groups
    plot_data = df.copy()

    # Create age groups
    age_bins = [0, 18, 25, 35, 45, 100]
    age_labels = ['<18', '18-25', '26-35', '36-45', '45+']
    plot_data['Age_group'] = pd.cut(plot_data['Age'], bins=age_bins, labels=age_labels, right=False)

    # Identify frequency columns
    frequency_cols = [col for col in plot_data.columns if 'Frequency' in col]

    # Get all genres and frequency order
    frequency_order = ['Never', 'Rarely', 'Sometimes', 'Very frequently']

    # Process data for each age group
    age_group_data = {}

    for age_group in ['All Ages'] + age_labels:
        if age_group == 'All Ages':
            age_data = plot_data
        else:
            age_data = plot_data[plot_data['Age_group'] == age_group]

        melted_data = pd.melt(
            age_data,
            value_vars=frequency_cols,
            var_name='Genre',
            value_name='Frequency'
        )
        melted_data['Genre'] = melted_data['Genre'].str.replace(r'Frequency \[', '', regex=True).str.replace(r'\]', '', regex=True)
        melted_data = melted_data.dropna(subset=['Frequency'])

        # Count frequencies and sort
        frequency_counts = melted_data.groupby(['Genre', 'Frequency']).size().reset_index(name='Count')
        frequency_counts['Frequency'] = pd.Categorical(
            frequency_counts['Frequency'],
            categories=frequency_order,
            ordered=True
        )

        # Sort genres by popularity
        genre_totals = frequency_counts.groupby('Genre')['Count'].sum().sort_values(ascending=False)
        frequency_counts['Genre'] = pd.Categorical(
            frequency_counts['Genre'],
            categories=genre_totals.index,
            ordered=True
        )
        frequency_counts = frequency_counts.sort_values(['Genre', 'Frequency'])

        age_group_data[age_group] = frequency_counts

    fig = go.Figure()

    frequency_colors = {
        'Never': '#EF553B',
        'Rarely': '#FECB52',
        'Sometimes': '#AB63FA',
        'Very frequently': '#00CC96'
    }

    colors = [frequency_colors[freq] for freq in frequency_order]


    # Add traces for "All Ages"
    all_data = age_group_data['All Ages']

    for i, freq in enumerate(frequency_order):
        freq_data = all_data[all_data['Frequency'] == freq]
        fig.add_trace(
            go.Bar(
                x=freq_data['Genre'],
                y=freq_data['Count'],
                name=freq,
                legendgroup=freq,
                marker_color=colors[i],
                visible=True,
                showlegend=True,
                hovertemplate=(
                    f"<b>Genre:</b> %{{x}}<br>"
                    f"<b>Frequency:</b> {freq}<br>"
                    f"<b>Count:</b> %{{y}}<br>"
                    f"<b>Age Group:</b> All Ages<br>"
                    "<extra></extra>"
                )
            )
        )

    # Add traces for other age groups
    for age_group in age_labels:
        age_data = age_group_data[age_group]

        for i, freq in enumerate(frequency_order):
            freq_data = age_data[age_data['Frequency'] == freq]
            fig.add_trace(
                go.Bar(
                    x=freq_data['Genre'],
                    y=freq_data['Count'],
                    name=freq,
                    legendgroup=freq,
                    marker_color=colors[i],
                    visible=False,
                    showlegend=True,
                    hovertemplate=(
                        f"<b>Genre:</b> %{{x}}<br>"
                        f"<b>Frequency:</b> {freq}<br>"
                        f"<b>Count:</b> %{{y}}<br>"
                        f"<b>Age Group:</b> {age_group}<br>"
                        "<extra></extra>"
                    )
                )
            )

    # Create dropdown menu
    dropdown_buttons = []

    traces_per_group = len(frequency_order)
    total_traces = traces_per_group * (len(age_labels) + 1)

    all_ages_visible = [True] * traces_per_group + [False] * (total_traces - traces_per_group)
    dropdown_buttons.append(
        dict(
            label="All Ages",
            method="update",
            args=[
                {"visible": all_ages_visible},
                {"title": dict(
                    text="Music genre popularity by listening frequency - All age groups",
                    x=0.05,
                    xanchor='left'
                )}
            ]
        )
    )

    for i, age_group in enumerate(age_labels):
        visibility = [False] * total_traces
        start_idx = traces_per_group * (i + 1)
        end_idx = start_idx + traces_per_group
        for j in range(start_idx, end_idx):
            visibility[j] = True

        dropdown_buttons.append(
            dict(
                label=age_group,
                method="update",
                args=[
                    {"visible": visibility},
                    {"title": dict(
                        text=f"Music genre popularity by listening frequency - {age_group} age group",
                        x=0.05,
                        xanchor='left'
                    )}
                ]
            )
        )

    # Final layout
    fig.update_layout(
        title=dict(
            text="Music genre popularity by listening frequency - all age groups",
            x=0.05,
            xanchor='left',
            y=0.95,
            font=dict(size=16)
        ),
        height=650,
        width=1400,
        xaxis=dict(
            title=dict(text='Genre', font=dict(size=14)),
            tickangle=45,
            domain=[0, 0.70]
        ),
        yaxis=dict(
            title=dict(text='Number of responses', font=dict(size=14)),
            domain=[0.05, 0.95]
        ),
        barmode='stack',
        plot_bgcolor='white',
        showlegend=True,
        legend=dict(
            title=dict(
                text='<b>Listening frequency</b>',
                font=dict(size=13)
            ),
            orientation="v",
            yanchor="middle",
            y=0.65,
            xanchor="left",
            x=0.72,
            bgcolor='rgba(255,255,255,0.9)',
            bordercolor='darkgray',
            borderwidth=1,
            font=dict(size=12),
            itemsizing='constant'
        ),
        updatemenus=[
            dict(
                type="dropdown",
                direction="down",
                x=0.72,
                y=0.95,
                xanchor="left",
                yanchor="top",
                buttons=dropdown_buttons,
                bgcolor='rgba(255,255,255,0.95)',
                bordercolor='darkgray',
                borderwidth=1,
                font=dict(size=12)
            )
        ],
        margin=dict(r=280, l=80, t=100, b=120)
    )

    fig.update_xaxes(
        showgrid=True,
        gridwidth=1,
        gridcolor='rgba(211,211,211,0.5)'
    )
    fig.update_yaxes(
        showgrid=True,
        gridwidth=1,
        gridcolor='rgba(211,211,211,0.5)'
    )

    return fig

Generic function to create heatmaps for grouped data (age and listening hours)

In [None]:
def create_grouped_heatmap(data_column, bins, labels, title, xaxis_title):
    heatmap_data = df.copy()
    group_column = f"{data_column}_group"

    # Create groups
    heatmap_data[group_column] = pd.cut(heatmap_data[data_column], bins=bins, labels=labels, right=False)
    heatmap_data = heatmap_data.dropna(subset=[group_column] + DISEASES)

    # Calculate means
    group_means = heatmap_data.groupby(group_column, observed=True)[DISEASES].mean()
    group_means.columns = [DISEASE_NAMES[d] for d in DISEASES]

    # Create heatmap
    fig = px.imshow(
        group_means, x=group_means.columns, y=group_means.index,
        color_continuous_scale='RdYlBu_r', aspect="auto", title=title,
        labels=dict(x="Diseases", y=xaxis_title, color="Average disease level")
    )

    fig.update_traces(
        hovertemplate=(
            f"<b>{xaxis_title}:</b> %{{y}}<br>"
            "<b>Disease:</b> %{x}<br>"
            "<b>Average Level:</b> %{z:.3f}<br>"
            "<extra></extra>"
        )
    )

    # Add value annotations
    annotations = []
    for i, group in enumerate(group_means.index):
        for j, disease in enumerate(group_means.columns):
            mean_value = group_means.loc[group, disease]
            annotations.append(dict(
                x=j, y=i, text=f"{mean_value:.2f}", showarrow=False,
                font=dict(color="white" if mean_value > group_means.values.mean() else "black", size=11)
            ))

    fig.update_layout(
        height=600, width=800,
        xaxis=dict(tickangle=45),
        coloraxis_colorbar=dict(title="Disease level", titleside="right"),
        annotations=annotations
    )

    return fig

# Create heatmap of disease levels by age groups
def create_age_heatmap():
    age_bins = [0, 18, 25, 35, 45, 100]
    age_labels = ['<18', '18-25', '26-35', '36-45', '45+']
    return create_grouped_heatmap('Age', age_bins, age_labels, "Average disease levels by age groups", "Age groups")

# Create heatmap of disease levels by listening hours
def create_hours_heatmap():
    hours_bins = [0, 1, 2, 3, 4, 5, 6, 24]
    hours_labels = ['<1h', '1-2h', '2-3h', '3-4h', '4-5h', '5-6h', '>6h']
    return create_grouped_heatmap('Hours per day', hours_bins, hours_labels,"Average disease levels by music listening hours", "Listening hours per day")

In [None]:
create_bubble_plot().show()

1. The graph shows the popularity of specific streaming services depending on age.  
2. It is also noticeable that the older the listener, the fewer hours a day they listen to music. Some platforms correlate with longer daily listening time, suggesting that the design or content of the platform may influence engagement levels.
3. Bubble sizes show which combinations of genres and platforms have the largest user base.

In [None]:
create_age_heatmap().show()

1. The graph shows that mental health problems manifest themselves differently in different age groups.

In [None]:
create_hours_heatmap().show()

1. The graph shows how the level of mental illness increases with increasing listening hours.
2. Together with the previous heat map and the very first distribution graph, a pattern is traced between the age of the respondents, listening hours and mental state

In [None]:
create_genre_frequency_plot().show()

1. The graph shows the hierarchy of popularity of genres.
2. For different ages, the popularity of genres varies.
3. The distribution shows which genres attract casual listeners and which ones attract devoted ones.

In [None]:
create_genre_correlation_heatmap().show()

1. The values of the correlation coefficient between mental illnesses and the popularity of the musical genre fluctuate around zero, which suggests that there is no linear relationship between these values.

In [None]:
create_music_effects_lineplot().show()

1. The graph shows that there is some relationship between the effect of music and the level of mental state.
2. In general, the noted positive effect of music is typical for respondents with a higher level of mental illness, compared with a neutral effect.
3. For the same listening hours, the degree of illness is more severe for the group of respondents who note the negative effect of listening.
4. The graph shows that the most severe level of the disease is expressed in anxiety, and the most mild in OCD.

In [None]:
create_bpm_genre_plot().show()

1. It is impossible to distinguish from the graphs a clear preponderance of the BPM level in the direction of certain mental illnesses.
2. At the same time, there are different distributions of average BPM values between different genres.

In [None]:
create_bpm_disease_heatmap().show()

1. The heat map shows some relationship between BPM and the level of mental stability
2. For each type of disease, there is a proportion of respondents in a certain BPM range. For depression, for example, the distribution of respondents is biased towards lower BPM compared to OCD