# Canadian Social Harmony Index Visualization Project

Note: The data used in this notebook is from the Canadian Social Harmony Study by RA2 and Cause and Effect Marketing. The survey was internally funded by RA2 and Cause and Effect Marketing.

[1. Data Exploration](#section1) 

[2. Find value and value group scores for each person](#section2) 

[3. Merge responses with answer group](#section3)

[4. Figure Widget](#section4) 

In [1]:
# import all the required packages
import pandas as pd
pd.set_option('display.max_columns', None)

import plotly.express as px
import numpy as np

from IPython.display import clear_output, HTML
from ipywidgets import interact, interact_manual, widgets, Layout, Box

<a id="section1"></a>

## 1. Data Exploration

#### Read .csv file

In [2]:
df_responses = pd.read_csv('./data/dfg_survey_responses.csv')
df_values = pd.read_csv('./data/dfg_values_key-question-key.csv')
df_scale = pd.read_csv('./data/sixPointScale.csv')

In [3]:
df_responses.set_index('ID',inplace=True)
#df_responses.head(3)

In [4]:
#df_values.head(3)

In [5]:
#df_scale

## 1.1 Widget
#### - Explore each column

In [6]:
@interact
def bar_chart(col_name=list(df_responses.columns[1:])):
    df = df_responses.groupby(col_name).agg({'WT': ['sum','count']}).reset_index()
    df.columns = df.columns.map(''.join)
    df.rename(columns={'WTsum': 'Total_Wt', 'WTcount': 'Total'}, inplace = True)
    if col_name in df_values['Key'].values:
        fig  = px.bar(df, x=col_name, y = ['Total', 'Total_Wt'], barmode='group', labels = {col_name: df_values.loc[df_values['Key']==col_name,'Question'].values[0]})
    else:
        fig  = px.bar(df, x=col_name, y = ['Total', 'Total_Wt'], barmode='group')
    fig.show()

interactive(children=(Dropdown(description='col_name', options=('FSA', 'Rural_FSA', 'Region', 'YOB', 'Age', 'G…

In [7]:
#@interact
#def bar_chart(col_name=list(df_values.columns)):
#    fig = px.bar(df_values.groupby(col_name)[col_name].count())
#    fig.show()

<a id="section2"></a>

## 2.  Find value and value group scores for each id

#### 2.1. Replace the rating in responses with values for calculating the net score

In [8]:
df_questions = df_responses.loc[:,'SV_01':].copy() 

In [9]:
scale_dict = df_scale.set_index('Rating').T.loc['Scale order'].to_dict()
scale_dict 

{'Not like me at all': 1,
 'Not like me': 2,
 'Somewhat like me': 3,
 'A little like me': 4,
 'Like me': 5,
 'Very much like me': 6}

In [10]:
df_questions.loc[:,'SV_01':'SV_20'] = df_responses.loc[:,'SV_01':'SV_20'].stack().map(scale_dict).unstack()
df_questions.loc[:,'SV_01':'SV_20'] = df_questions.loc[:,'SV_01':'SV_20'].astype(int)
#df_questions.head(3)

#### 2.2. Create new dataframes for values and value group for each id

In [12]:
df_answers = pd.DataFrame(index = df_responses.index, columns =  df_values['Value'].unique()[:-3])
df_answers_unweighted = pd.DataFrame(index = df_responses.index, columns =  df_values['Value'].unique()[:-3])

In [13]:
# Find the answers to each quesions
for val in df_values['Value'].unique()[:-3]:
    ques = df_values[df_values['Value']==val]['Key'].values
    df_answers.loc[:,val] = ((df_questions.loc[:,ques[0]] + df_questions.loc[:,ques[1]])/2)*df_responses.loc[:,'WT']
    df_answers_unweighted.loc[:,val] = ((df_questions.loc[:,ques[0]] + df_questions.loc[:,ques[1]])/2)

In [13]:
#df_answers.head(3)

In [14]:
df_answers_group = pd.DataFrame(index = df_responses.index,columns = df_values['Value Group'].unique()[:-1])
df_answers_group_unweighted = pd.DataFrame(index = df_responses.index,columns = df_values['Value Group'].unique()[:-1])

In [15]:
for val_group in df_answers_group.columns:
    matching_values = df_values[df_values['Value Group'] == val_group]['Value'].unique()
    df_answers_group.loc[:,val_group] = df_answers[matching_values].sum(axis=1)
    df_answers_group_unweighted.loc[:,val_group] = df_answers_unweighted[matching_values].sum(axis=1)

In [19]:
#df_answers_unweighted.head(3)

<a id="section3"></a>

# 3. Merge responses with value group

In [21]:
df_all = df_responses.copy()
df_all_unweighted = df_responses.copy()

In [22]:
df_all.loc[:,df_answers_group.columns] = df_answers_group
df_all.loc[:,df_answers.columns] = df_answers.astype(float)
df_all_unweighted.loc[:,df_answers_group_unweighted.columns] = df_answers_group_unweighted
df_all_unweighted.loc[:,df_answers_unweighted.columns] = df_answers_unweighted.astype(float)
#df_all.head(3)

In [24]:
#df_all_unweighted.head(3)

<a id="section4"></a>

## 4. Figure Widget

In [49]:
def on_button_clicked(arg):
    with out:
        clear_output()
        if not feature_dropdown.value=="YOB":
            if label_dropdown.value=='Value':
                col = df_values['Value'].unique()[:-3]
            else:
                col = df_values['Value Group'].unique()[:-1]
            col_list = col.tolist()
            col_list.insert(0, 'WT')
            col_list.append(feature_dropdown.value)

            if weight_dropdown.value:
                df_tot = df_all.copy()
                df_plot = df_tot.groupby(feature_dropdown.value)[col_list].sum().reset_index()
            else:
                df_tot = df_all_unweighted.copy()
                df_plot = df_tot.groupby(feature_dropdown.value)[col_list].mean().reset_index()

            df_plot.set_index(feature_dropdown.value, inplace=True)
            df_plot.iloc[:,1:] = df_plot.iloc[:,1:].div(df_plot.WT, axis=0)
            df_plot.drop('WT',axis=1,inplace=True)

            if check_dropdown.value:    
                df_plot.loc['total',:] = df_plot.sum(axis=0)
                df_plot.iloc[:-1,:] = df_plot.iloc[:-1,:] / df_plot.iloc[-1,:]
                df_plot.drop(['total'],inplace = True)
                df_plot = df_plot.T
                fig = px.bar(df_plot, x= df_plot.index, y = df_plot.columns)
            else:
                df_plot = df_plot.T
                fig = px.bar(df_plot, x= df_plot.index, y = df_plot.columns)
            fig.show()
        else:
            display("Wrong input")

box_layout = Layout(display="flex", flex_flow='row', align_items='center', width='100%', justify_content = 'center')

feature_dropdown = widgets.Dropdown(
    options = df_responses.columns[1:-23],
    description = 'Feature: ',
    value = 'Region',
    style={'description_width': 'initial'},
    layout={'width': 'initial'}, 
)

label_dropdown = widgets.Dropdown(
    options = ['Value', 'Value group'],
    description = 'Label: ',
    value = 'Value',
    style={'description_width': 'initial'},
    layout={'width': 'initial'}, 
)

check_dropdown = widgets.Checkbox(
    value=False,
    description='Percentage',
    disabled=False,
    indent=False
)

weight_dropdown = widgets.Checkbox(
    value=False,
    description='Weighted',
    disabled=False,
    indent=False
)

go_button = widgets.Button(
    description='Submit',
    disabled=False,
    button_style='success',
)

go_button.on_click(on_button_clicked)
out = widgets.Output()
display(Box(children = [feature_dropdown, label_dropdown, go_button]), layout = box_layout)
display(Box(children = [check_dropdown, weight_dropdown]), layout = box_layout)
display(Box(children = [out], layout=box_layout))

Box(children=(Dropdown(description='Feature: ', index=2, layout=Layout(width='initial'), options=('FSA', 'Rura…

Box(children=(Checkbox(value=False, description='Percentage', indent=False), Checkbox(value=False, description…

Box(children=(Output(),), layout=Layout(align_items='center', display='flex', flex_flow='row', justify_content…