# Statista data
Statista Consumer Insights: https://www.statista.com/global-consumer-survey 

In [9]:
import pandas as pd
import plotly.express as px
import glob
import numpy as np
from io import StringIO
from typing import List

from plotly.subplots import make_subplots
from plotly import graph_objects as go

In [10]:
data_root = '../data/statista/'

file_lst = glob.glob(data_root + '*.csv')


In [11]:
def parse_statista_csv(filepath: str) -> pd.DataFrame:
    """Reads a csv file downloaded from statista.com and returns a pandas dataframe

    Args:
        filepath (str): Filepath of the csv file

    Returns:
        pd.DataFrame: Pandas dataframe with the data
    """
    with open(filepath, 'r') as f:
        lines = f.readlines()
    # print question:
    print(f'Question: {lines[4]}')
    update_indices = [idx for idx, line in enumerate(lines) if ' - Update' in line]
    # create pairs from the above list
    pairs = list(zip(update_indices, update_indices[1:]))
    # Filter out short pairse because they do not contain any data
    pairs = [pair for pair in pairs if abs(pair[1] - pair[0]) > 4]
    df_lst = []
    for update_start, update_end in pairs:
        year, update = lines[update_start].strip().replace('Update ', '').replace('"', '').split('-')
        # print(f'Processing {year} {update}')
        year = int(year)
        update = int(update)
        start_idx = update_start + 2
        end_idx = update_end - 2
        csv_str = ''.join(lines[start_idx:end_idx])
        # print(csv_str)
        
        df = pd.read_csv(StringIO(csv_str), sep=';', header=None).T

        # Filter rows based on column: 1
        df = df[(df[1] == "absolute") | (df[1].isna())]

        # Drop column: '1'
        df = df.drop(columns=[1])
        df.loc[0, 0] = 'Country'
        df.columns = df.iloc[0]
        df = df[1:]
        df['Year'] = year
        df['Update'] = update
        df.set_index(['Year', 'Update', 'Country'], inplace=True)

        for col in df.columns:
            # float to be able to assign null
            df[col] = df[col].str.split(' / ').str[0].str.replace(',', '').astype(float)
        df_lst.append(df)
        for col in df.columns:
            if col != 'Base':
                df[col] = df[col] / df['Base'] * 100
                
    df = pd.concat(df_lst)
    country_lst = ['Finland', 'France', 'Italy', 'Spain', 'Germany', 'Switzerland', 'Sweden', 'Austria', 'Poland']
    df = df.query('Country in @country_lst')

    return df

In [12]:
def create_bar_plot(df: pd.DataFrame, title: str, cols_to_plot: List[str] = None, barmode: str = 'group') -> go.Figure:
    plot_df = df.copy()
    plot_df.reset_index(inplace=True)
    if cols_to_plot is None:
        cols_to_plot = plot_df.columns.tolist()[4:]
    max_cols = 4
    n_rows = int(np.ceil(len(plot_df['Country'].unique())/max_cols))
    plot_titles = plot_df['Country'].unique()
    fig = make_subplots(rows=n_rows, cols=max_cols, subplot_titles=plot_titles, shared_yaxes=True, shared_xaxes=True)
    fig.update_layout(title=title, legend_title='Answer')
    cur_row = 1
    cur_col = 1
    first = True
    color_palette = px.colors.qualitative.Dark24
    if cols_to_plot is None:
        cols_to_plot = plot_df.columns.tolist()[4:]
    for country in plot_df['Country'].unique():
        df = plot_df.query('Country == @country')
        for i, col in enumerate(cols_to_plot):
            fig.add_trace(go.Bar(x=df['Year'], y=df[col], name=col, marker=dict(color=color_palette[i]), showlegend=first, legendgroup=col), row=cur_row, col=cur_col)
            fig.update_xaxes(title_text="Year", row=cur_row, col=cur_col)
            fig.update_yaxes(title_text="Percentage", row=cur_row, col=cur_col)
        first = False
        cur_col += 1
        if cur_col > max_cols:
            cur_row += 1
            cur_col = 1
    layout_dict = {}
    for i in range(len(plot_df['Country'].unique())):
        if i == 0:
            layout_dict['xaxis_showticklabels'] = True
            layout_dict['yaxis_showticklabels'] = True
        else:
            layout_dict[f'xaxis{i}_showticklabels'] = True
            layout_dict[f'yaxis{i}_showticklabels'] = True
    fig.update_layout(legend_title='Answer', barmode=barmode, margin=dict(l=20, r=20, t=100, b=20), height=600)
    fig.update_layout(layout_dict)

    return fig

In [13]:
def create_scatter_plot(df: pd.DataFrame, title: str, cols_to_plot: List[str] = None) -> go.Figure:
    plot_df = df.copy()
    plot_df.reset_index(inplace=True)
    # make multiple line subplots for each country
    max_cols = 4
    n_rows = int(np.ceil(len(plot_df['Country'].unique())/max_cols))
    fig = make_subplots(rows=n_rows, cols=max_cols, subplot_titles=plot_df['Country'].unique(), shared_yaxes=True, shared_xaxes=True)
    fig.update_layout(title=title, legend_title='Answer')
    cur_row = 1
    cur_col = 1
    first = True
    new_row = True
    color_palette = px.colors.qualitative.Dark24
    if cols_to_plot is None:
        cols_to_plot = plot_df.columns.tolist()[4:]
    for country in plot_df['Country'].unique():
        df = plot_df.query('Country == @country')
        for i, col in enumerate(cols_to_plot):
            fig.add_trace(
                go.Scatter(x=df['Year'], y=df[col], name=col, mode='markers+lines', marker=dict(color=color_palette[i]), showlegend=first, legendgroup=col),
                row=cur_row, col=cur_col)
            fig.update_xaxes(title_text="Year", row=cur_row, col=cur_col)
            if new_row:
                fig.update_yaxes(title_text="Percentage", row=cur_row, col=cur_col)

        first = False
        new_row = False
        cur_col += 1
        if cur_col > max_cols:
            new_row = True
            cur_row += 1
            cur_col = 1

    layout_dict = {}
    for i in range(len(plot_df['Country'].unique())):
        if i == 0:
            layout_dict['xaxis_showticklabels'] = True
            layout_dict['yaxis_showticklabels'] = True
        else:
            layout_dict[f'xaxis{i}_showticklabels'] = True
            layout_dict[f'yaxis{i}_showticklabels'] = True
    fig.update_layout(layout_dict)

    fig.update_layout(margin=dict(l=20, r=20, t=100, b=20), height=600)
    return fig

## Attitude towards mobility

In [14]:
mobility_attitude_df = parse_statista_csv('../data/statista/Attitudes towards mobility.csv')
# filter for update 1
mobility_attitude_df = mobility_attitude_df.query('Update == 1')
mobility_attitude_df.info()

Question: Which of these statements about cars and mobility do you agree with? (multi-pick) Base: all respondents (Mobility, Travel)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 54 entries, (2018, 1, 'Austria') to (2023, 1, 'Switzerland')
Data columns (total 17 columns):
 #   Column                                                                           Non-Null Count  Dtype  
---  ------                                                                           --------------  -----  
 0   Base                                                                             54 non-null     float64
 1   A car is just a means of transportation                                          9 non-null      float64
 2   A car reflects its owner's personality                                           18 non-null     float64
 3   Cars represent independence and freedom                                          9 non-null      float64
 4   Driving cars is bad for the environment                  

In [15]:
mobility_attitude_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Base,A car is just a means of transportation,A car reflects its owner's personality,Cars represent independence and freedom,Driving cars is bad for the environment,I am a car enthusiast,I can live well without a private car,I would like to switch to more environmentally-friendly means of transportation,Owning a car is important to me,None of the above,I spend too much time commuting,The public transportation system in my area is good,I can imagine using a self-driving taxi,There are not enough parking spaces where I live,"Fuel prices are making me opt for other forms of transportation (e.g., bike)",I try to opt for more environmentally-friendly means of transportation,The electric infrastructure prevents me from getting an electric car
Year,Update,Country,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2018,1,Austria,2044.0,53.816047,14.823875,51.272016,25.244618,22.358121,17.465753,17.612524,54.549902,2.348337,,,,,,,
2018,1,Finland,1044.0,46.551724,18.199234,31.704981,24.712644,22.030651,32.950192,22.89272,28.639847,6.992337,,,,,,,
2018,1,France,2041.0,42.381186,16.805488,53.846154,14.600686,12.248898,15.923567,25.183733,42.234199,5.830475,,,,,,,
2018,1,Germany,2080.0,43.317308,17.355769,52.836538,24.038462,24.951923,17.740385,17.163462,55.384615,3.942308,,,,,,,
2018,1,Italy,2074.0,37.56027,21.9865,47.637416,10.02893,14.320154,11.137898,23.818708,44.310511,3.230473,,,,,,,
2018,1,Poland,1041.0,43.419789,21.517771,31.508165,17.002882,22.382325,19.788665,15.754083,53.794428,7.300672,,,,,,,
2018,1,Spain,2037.0,32.106038,19.194894,54.344624,13.254786,21.894944,19.243986,25.822288,47.029946,3.387334,,,,,,,
2018,1,Sweden,1039.0,39.172281,13.666987,45.909528,25.409047,20.981713,27.141482,19.249278,35.418672,7.218479,,,,,,,
2018,1,Switzerland,1727.0,47.944412,16.444702,50.665895,24.551245,21.771859,23.161552,16.328894,47.770701,3.300521,,,,,,,
2019,1,Austria,2072.0,,17.519305,,29.247104,25.144788,19.498069,20.945946,58.590734,2.123552,8.735521,22.683398,,,,,


In [16]:
fig = create_bar_plot(mobility_attitude_df, 'Attitudes towards mobility')
fig.update_layout(height=900)
fig.show()

In [17]:
fig = create_scatter_plot(mobility_attitude_df, 'Attitudes towards mobility')
fig.update_layout(height=900)
fig.show()

In [18]:
# commute_countries = commute_duration_df.reset_index()['Country'].unique().tolist()
# frequent_modes_of_transport_countries = frequent_modes_of_transport_df.reset_index()['Country'].unique().tolist()
# commute_mode_countries = commute_modes_of_transport_df.reset_index()['Country'].unique().tolist()
# transportation_usage_public_countries = frequency_public_transport_df.reset_index()['Country'].unique().tolist()
# transportation_usage_cars_countries = frequency_car_df.reset_index()['Country'].unique().tolist()
# transportation_usage_combined_countries = frequency_combined_df.reset_index()['Country'].unique().tolist()
# # find common elements
# common_countries = list(set(commute_countries) & set(frequent_modes_of_transport_countries) & set(commute_mode_countries) & set(transportation_usage_public_countries) & set(transportation_usage_cars_countries) & set(transportation_usage_combined_countries))
# common_countries

## Duration of daily commute

In [19]:
commute_duration_df = parse_statista_csv('../data/statista/Duration of daily commute.csv')
# filter for update 1
commute_duration_df = commute_duration_df.query('Update == 1')
commute_duration_df.info()

Question: On average, how long is your daily commute to work/school/university (one way)? (single-pick) Base: all respondents (Mobility, Travel)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 45 entries, (2019, 1, 'Austria') to (2023, 1, 'Switzerland')
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Base                  45 non-null     float64
 1   Less than 15 minutes  45 non-null     float64
 2   15 to 29 minutes      45 non-null     float64
 3   30 to 59 minutes      45 non-null     float64
 4   60 to 119 minutes     45 non-null     float64
 5   120 minutes and more  45 non-null     float64
 6   I do not commute      45 non-null     float64
dtypes: float64(7)
memory usage: 3.9+ KB


In [20]:
commute_duration_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Base,Less than 15 minutes,15 to 29 minutes,30 to 59 minutes,60 to 119 minutes,120 minutes and more,I do not commute
Year,Update,Country,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019,1,Austria,1050.0,20.571429,27.904762,22.571429,7.047619,1.333333,20.571429
2019,1,Finland,1043.0,24.065197,30.393097,20.134228,4.506232,1.05465,19.846596
2019,1,France,1047.0,26.647564,32.760267,19.006686,4.202483,1.623687,15.759312
2019,1,Germany,1047.0,17.956065,32.378223,20.821394,4.680038,0.95511,23.209169
2019,1,Italy,1051.0,29.019981,35.394862,18.363463,5.233111,1.617507,10.371075
2019,1,Poland,1048.0,23.950382,35.209924,19.465649,4.675573,0.763359,15.935115
2019,1,Spain,1040.0,27.692308,39.807692,18.269231,3.846154,0.192308,10.192308
2019,1,Sweden,1052.0,24.239544,30.323194,21.102662,6.273764,1.140684,16.920152
2019,1,Switzerland,1057.0,21.570482,30.274361,19.48912,6.717124,1.608325,20.340587
2020,1,Austria,2090.0,21.818182,29.138756,22.440191,5.454545,1.674641,19.473684


In [21]:
fig = create_bar_plot(commute_duration_df, 'Duration of daily commute', barmode='stack')
fig.update_layout(height=1000)
fig.show()

In [22]:
fig = create_bar_plot(commute_duration_df, 'Duration of daily commute', barmode='group')
fig.update_layout(height=1000)
fig.show()

In [23]:
fig = create_scatter_plot(commute_duration_df, 'Duration of daily commute')
fig.update_layout(height=1000)
fig.show()


## Frequent users of modes of transportation

In [24]:
frequent_modes_of_transport_df = parse_statista_csv('../data/statista/Frequent users of modes of transportation.csv')
frequent_modes_of_transport_df = frequent_modes_of_transport_df.query('Update == 1')
frequent_modes_of_transport_df.info()

Question: Recode based on Transportation usage frequency (multi-pick) Base: all respondents (Mobility, Travel)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 43 entries, (2019, 1, 'Austria') to (2023, 1, 'Switzerland')
Data columns (total 15 columns):
 #   Column                                                                         Non-Null Count  Dtype  
---  ------                                                                         --------------  -----  
 0   Base                                                                           43 non-null     float64
 1   Bike rentals / bike sharing                                                    43 non-null     float64
 2   Car sharing (short periods, e.g., by the minute, not traditional car rental)   43 non-null     float64
 3   Local public transportation                                                    43 non-null     float64
 4   Own bicycle                                                                    43 non-null 

In [25]:
create_bar_plot(frequent_modes_of_transport_df, 'Frequent users of modes of transportation', barmode='group')

In [26]:
fig = create_bar_plot(frequent_modes_of_transport_df, 'Frequent users of modes of transportation', barmode='stack')
fig.update_layout(height=1000)
fig.show()

In [27]:
fig = create_scatter_plot(frequent_modes_of_transport_df, 'Frequent users of modes of transportation')
# fig.update_layout(height=700)
fig.show()

## Modes of transportation for commuting

In [28]:
commute_modes_of_transport_df = parse_statista_csv('../data/statista/Modes of transportation for commuting.csv')
commute_modes_of_transport_df = commute_modes_of_transport_df.query('Update == 1')
commute_modes_of_transport_df.info()

Question: Which modes of transportation do you use for your daily commute to work/school/university? (multi-pick) Base: respondents who commute (Mobility, Travel)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 43 entries, (2019, 1, 'Austria') to (2023, 1, 'Switzerland')
Data columns (total 15 columns):
 #   Column                                                                         Non-Null Count  Dtype  
---  ------                                                                         --------------  -----  
 0   Base                                                                           43 non-null     float64
 1   Bike rentals / bike sharing                                                    43 non-null     float64
 2   Car sharing (short periods, e.g., by the minute, not traditional car rental)   43 non-null     float64
 3   Own / household car                                                            43 non-null     float64
 4   Own bicycle                            

In [29]:
create_bar_plot(commute_modes_of_transport_df, 'Modes of transportation for commuting', barmode='group')

In [30]:
fig = create_bar_plot(commute_modes_of_transport_df, 'Modes of transportation for commuting', barmode='stack')
fig.update_layout(height=1000)

fig.show()

In [31]:
create_scatter_plot(commute_modes_of_transport_df, 'Modes of transportation for commuting')

## Bus user (local)

In [32]:
bus_user_df = parse_statista_csv('../data/statista/Bus user (local).csv')
bus_user_df = bus_user_df.query('Update == 1')
bus_user_df.info()

Question: Recode based on Mobility service usage (detailed) (single-pick) Base: all respondents (Mobility, Travel)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 45 entries, (2019, 1, 'Austria') to (2023, 1, 'Switzerland')
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Base              45 non-null     float64
 1   Bus user (local)  45 non-null     float64
 2   Non-user          45 non-null     float64
dtypes: float64(3)
memory usage: 2.4+ KB


In [33]:
# fig = create_bar_plot(bus_user_df, 'Bus user (local)', barmode='group')
# fig.update_layout(height=900)
# fig.show()

In [34]:
# fig = create_bar_plot(bus_user_df, 'Bus user (local)', barmode='stack')
# fig.update_layout(height=900)
# fig.show()

In [35]:
fig = create_scatter_plot(bus_user_df, 'Bus user (local)')
fig.update_layout(height=900)
fig.show()

## Train user (local)

In [36]:
train_user_df = parse_statista_csv('../data/statista/Train user (local).csv')
train_user_df = train_user_df.query('Update == 1')
train_user_df.info()

Question: Recode based on Mobility service usage (detailed) (single-pick) Base: all respondents (Mobility, Travel)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 45 entries, (2019, 1, 'Austria') to (2023, 1, 'Switzerland')
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Base                45 non-null     float64
 1   Train user (local)  45 non-null     float64
 2   Non-user            45 non-null     float64
dtypes: float64(3)
memory usage: 2.4+ KB


In [37]:
# fig = create_bar_plot(train_user_df, 'Train user (local)', barmode='group')
# fig.update_layout(height=900)
# fig.show()

In [38]:
# fig = create_bar_plot(train_user_df, 'Train user (local)', barmode='stack')
# fig.update_layout(height=900)
# fig.show()

In [39]:
fig = create_scatter_plot(train_user_df, 'Train user (local)')
fig.update_layout(height=900)
fig.show()

## Transportation usage frequency Local public transportation

In [56]:
frequency_public_transport_df = parse_statista_csv('../data/statista/Transportation usage frequency Local public transportation.csv')
frequency_public_transport_df = frequency_public_transport_df.query('Update == 1')
frequency_public_transport_df.info()

Question: How often do you use the following modes of transportation? (single-pick) Base: all respondents (Mobility, Travel)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 43 entries, (2019, 1, 'Austria') to (2023, 1, 'Switzerland')
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Base                 43 non-null     float64
 1   (Almost) daily       43 non-null     float64
 2   2-5 times per week   43 non-null     float64
 3   1-4 times per month  43 non-null     float64
 4   Every 2-3 months     43 non-null     float64
 5   Less often           43 non-null     float64
 6   Not at all           43 non-null     float64
dtypes: float64(7)
memory usage: 3.4+ KB


In [57]:
create_bar_plot(frequency_public_transport_df, 'Transportation usage frequency Local public transportation', barmode='stack')

In [42]:
create_bar_plot(frequency_public_transport_df, 'Transportation usage frequency: Local public transportation', barmode='group')

In [43]:
fig = create_scatter_plot(frequency_public_transport_df, 'Transportation usage frequency: Local public transportation')
fig.update_layout(height=1000)
fig.show()

## Transportation usage frequency Own bicycle

In [44]:
frequency_bicycle_df = parse_statista_csv('../data/statista/Transportation usage frequency Own bicycle.csv')
frequency_bicycle_df = frequency_bicycle_df.query('Update == 1')
frequency_bicycle_df.info()

Question: How often do you use the following modes of transportation? (single-pick) Base: all respondents (Mobility, Travel)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 43 entries, (2019, 1, 'Austria') to (2023, 1, 'Switzerland')
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Base                 43 non-null     float64
 1   (Almost) daily       43 non-null     float64
 2   2-5 times per week   43 non-null     float64
 3   1-4 times per month  43 non-null     float64
 4   Every 2-3 months     43 non-null     float64
 5   Less often           43 non-null     float64
 6   Not at all           43 non-null     float64
dtypes: float64(7)
memory usage: 3.4+ KB


In [45]:
create_bar_plot(frequency_bicycle_df, 'Transportation usage frequency: Own bicycle', barmode='stack')

In [46]:
create_bar_plot(frequency_bicycle_df, 'Transportation usage frequency: Own bicycle', barmode='group')

In [47]:
fig = create_scatter_plot(frequency_bicycle_df, 'Transportation usage frequency: Own bicycle')
fig.update_layout(height=700)
fig.show()

## Transportation usage frequency Own or household car

In [48]:
frequency_car_df = parse_statista_csv('../data/statista/Transportation usage frequency Own or household car.csv')
frequency_car_df = frequency_car_df.query('Update == 1')
frequency_car_df.info()


Question: How often do you use the following modes of transportation? (single-pick) Base: all respondents (Mobility, Travel)

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 43 entries, (2019, 1, 'Austria') to (2023, 1, 'Switzerland')
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Base                 43 non-null     float64
 1   (Almost) daily       43 non-null     float64
 2   2-5 times per week   43 non-null     float64
 3   1-4 times per month  43 non-null     float64
 4   Every 2-3 months     43 non-null     float64
 5   Less often           43 non-null     float64
 6   Not at all           43 non-null     float64
dtypes: float64(7)
memory usage: 3.4+ KB


In [49]:
create_bar_plot(frequency_car_df, 'Transportation usage frequency: Own or household car', barmode='stack')

In [50]:
create_bar_plot(frequency_car_df, 'Transportation usage frequency: Own or household car', barmode='group')

In [51]:
fig = create_scatter_plot(frequency_car_df, 'Transportation usage frequency: Own or household car')
fig.update_layout(height=1000)
fig.show()

## Compare frequency - public transport, bicycle and car

this is done by joining the required dataframes and plotting them. The columns to be plotted were limited to a subset to make the plots less cluttered.

In [52]:
frequency_combined_df = (
    frequency_public_transport_df.drop(columns="Base")
    .add_prefix("Public Transport ")
    .join(frequency_bicycle_df.drop(columns="Base").add_prefix("Bicycle "), how="outer")
    .join(frequency_car_df.drop(columns="Base").add_prefix("Car "), how="outer")
)
frequency_combined_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Public Transport (Almost) daily,Public Transport 2-5 times per week,Public Transport 1-4 times per month,Public Transport Every 2-3 months,Public Transport Less often,Public Transport Not at all,Bicycle (Almost) daily,Bicycle 2-5 times per week,Bicycle 1-4 times per month,Bicycle Every 2-3 months,Bicycle Less often,Bicycle Not at all,Car (Almost) daily,Car 2-5 times per week,Car 1-4 times per month,Car Every 2-3 months,Car Less often,Car Not at all
Year,Update,Country,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2019,1,Austria,21.238095,11.238095,13.142857,10.285714,15.047619,29.047619,7.714286,13.428571,18.190476,14.095238,20.285714,26.285714,49.619048,23.333333,9.333333,1.619048,1.142857,14.952381
2019,1,France,10.601719,8.978032,8.213945,4.775549,6.399236,61.031519,6.590258,9.455587,13.849093,7.640879,14.613181,47.851003,52.148997,22.254059,8.500478,1.528176,1.814709,13.753582
2019,1,Germany,13.849093,12.511939,16.427889,11.365807,14.51767,31.327603,10.983763,19.770774,19.866285,8.978032,13.658071,26.743075,50.716332,23.30468,6.78128,1.146132,1.719198,16.332378
2019,1,Italy,11.512845,8.372978,19.124643,10.56137,16.365366,34.062797,6.850618,14.557564,17.221694,10.466223,15.794481,35.10942,63.273073,21.313035,7.992388,1.236917,1.236917,4.947669
2019,1,Poland,14.408397,13.740458,19.179389,12.21374,12.5,27.958015,14.217557,19.847328,22.232824,7.538168,14.503817,21.660305,50.0,20.896947,9.160305,3.148855,2.862595,13.931298
2019,1,Spain,16.25,14.711538,22.019231,9.038462,12.307692,25.673077,5.673077,12.5,14.711538,5.865385,14.807692,46.442308,54.326923,20.0,10.769231,2.115385,1.826923,10.961538
2019,1,Sweden,18.91635,12.357414,16.064639,9.885932,13.688213,29.087452,14.068441,16.254753,15.494297,8.65019,17.015209,28.51711,38.688213,24.904943,11.596958,2.186312,2.946768,19.676806
2020,1,Austria,20.861244,11.770335,12.392344,10.382775,14.258373,30.334928,10.813397,16.794258,18.708134,10.334928,17.033493,26.315789,49.425837,23.062201,9.569378,1.244019,1.483254,15.215311
2020,1,Finland,15.642994,11.708253,19.289827,10.268714,13.53167,29.558541,10.460653,17.466411,16.314779,9.884837,22.072937,23.800384,44.62572,24.184261,7.197697,0.671785,1.34357,21.976967
2020,1,France,9.212411,8.257757,9.976134,5.059666,8.114558,59.379475,5.966587,11.837709,16.27685,7.064439,12.362768,46.491647,52.458234,22.529833,8.257757,1.718377,1.479714,13.556086


In [53]:

# fig = create_scatter_plot(frequency_combined_df, 'Transportation usage frequency', cols_to_plot=[col for col in frequency_combined_df.columns.to_list() if 'daily' in col or 'Not at all' in col or '2-5 times per week' in col])
fig = create_scatter_plot(frequency_combined_df, 'Transportation usage frequency', cols_to_plot=[col for col in frequency_combined_df.columns.to_list() if 'daily' in col])
fig.update_layout(height=1000)
# fig.show(renderer='browser')

In [54]:
create_bar_plot(frequency_combined_df, 'Transportation usage frequency', cols_to_plot=[col for col in frequency_combined_df.columns.to_list() if 'daily' in col or 'Not at all' in col or '2-5 times per week' in col], barmode='stack')


In [55]:
fig = create_bar_plot(frequency_combined_df, 'Transportation usage frequency', cols_to_plot=[col for col in frequency_combined_df.columns.to_list() if 'daily' in col or 'Not at all' in col or '2-5 times per week' in col], barmode='group')
fig.show()