# World Values Survey Data Notebook - Child Traits

In [58]:
import numpy as np
import pandas as pd

from itertools import combinations
from math import factorial
from mlxtend.frequent_patterns import apriori
from kmodes.kmodes import KModes
from sklearn.metrics import silhouette_score

import pyreadr

import matplotlib.pyplot as plt

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

## Acquiring Data
The data was acquired from the World Values Survey at https://www.worldvaluessurvey.org/wvs.jsp. Wave 1 through 6 were in .rds format while Wave 7 was in .rdata format.

In [2]:
# Use pyreadr to read .rds files to DataFrame
wv1_6 = [ pyreadr.read_r(f'../rdata/wv{i}.rds')[None] for i in range(1, 7) ]

In [2]:
# RData converted from .rdata file to CSV with RStudio
wv7 = pd.read_csv('../rdata/wv7.csv', index_col=0)

  interactivity=interactivity, compiler=compiler, result=result)


### Traits that should be encouraged in children (Wave 7, Q7-17)
- List of Traits (choose 5): good manners, independence, hard work, feeling of responsibility, imagination, tolerance and respect for other people, thrift/saving money, determination/perseverance, religious faith, not being selfish, obedience

In [9]:
df = wv7[['B_COUNTRY_ALPHA', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11', 'Q12', 'Q13', 'Q14', 'Q15', 'Q16', 'Q17']].copy()

In [10]:
# Create dictionary from questionnaire
traits_dict = {
    'B_COUNTRY_ALPHA': 'country',
    'Q7': 'manners',
    'Q8': 'independence',
    'Q9': 'hard work',
    'Q10': 'responsibility',
    'Q11': 'imagination',
    'Q12': 'tolerance',
    'Q13': 'thrift',
    'Q14': 'determination',
    'Q15': 'faith',
    'Q16': 'unselfishness',
    'Q17': 'obedience',
}

# Function to boolean
def convert_bool(x):
    if x == 1:
        return True
    else:
        return False

In [11]:
# Clean column names and convert to boolean
df.columns = df.columns.map(traits_dict)
df.iloc[:, 1:] = df.iloc[:, 1:].applymap(convert_bool)

In [12]:
traits = [ col for col in df.columns if col != 'country' ]

In [14]:
df['num'] = df[traits].sum(axis=1)

In [24]:
# NaN all selected number of traits not equal to 5 (standardize measurements)
df['num'] = df[traits].sum(axis=1)
df['num'] = np.where(df['num'] == 5, df['num'], np.nan)

# Drop NaN
df.dropna(inplace=True)
df.drop(columns='num', inplace=True)

In [64]:
# Set data columns
X = df.drop(columns='country')

In [66]:
possible_combos = list(combinations(traits, 5))

In [68]:
# Function to map traits onto values
def map_trait(frame, trait):
    return np.where(frame[trait] == True, trait, np.nan)

# Function to concat traits
def list_traits(x):
    return [ trait for trait in x if trait != 'nan' ]

In [83]:
# Map traits onto values
tdf = pd.DataFrame()
for i in range(len(X.columns)):
    trait = X.columns[i]
    tdf[i] = map_trait(X, trait)

In [84]:
# Apply concat function row-wise
tdf['traits'] = tdf.apply(list_traits, axis=1)

In [85]:
tdf['country'] = df.reset_index(drop=True)['country']
tdf = tdf[['country', 'traits']]

In [89]:
tdf['traits'].value_counts(normalize=True)

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 1709, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[manners, independence, hard work, responsibility, tolerance]                0.045944
[manners, responsibility, tolerance, faith, obedience]                       0.027689
[manners, hard work, responsibility, tolerance, faith]                       0.025348
[manners, hard work, responsibility, tolerance, thrift]                      0.023098
[manners, independence, hard work, responsibility, imagination]              0.022792
[manners, hard work, responsibility, tolerance, determination]               0.022396
[manners, independence, responsibility, tolerance, determination]            0.017895
[manners, independence, responsibility, tolerance, faith]                    0.017481
[manners, hard work, tolerance, faith, obedience]                            0.017247
[manners, hard work, responsibility, tolerance, obedience]                   0.016797
[manners, independence, responsibility, tolerance, thrift]                   0.014907
[manners, independence, hard work, responsibility, thr

In [91]:
df['manners'].value_counts(normalize=True)

True     0.816927
False    0.183073
Name: manners, dtype: float64

In [93]:
# Run apriori algorithm
trait_sets = apriori(X, 0.25)

In [94]:
trait_sets['itemsets'] = [ list(value) for value in trait_sets['itemsets'].values ]

In [95]:
item2trait = {
    i : traits[i]
    for i in range(11)
}

In [96]:
trait_sets['traits'] = trait_sets['itemsets'].map(lambda x: [ item2trait[ele] for ele in x ])

In [98]:
trait_sets.sort_values('support', ascending=False)

Unnamed: 0,support,itemsets,traits
0,0.816927,[0],[manners]
3,0.6969,[3],[responsibility]
4,0.662712,[5],[tolerance]
12,0.562435,"[0, 3]","[manners, responsibility]"
2,0.553505,[2],[hard work]
13,0.533504,"[0, 5]","[manners, tolerance]"
20,0.45195,"[3, 5]","[responsibility, tolerance]"
1,0.446477,[1],[independence]
11,0.442282,"[0, 2]","[manners, hard work]"
7,0.376715,[8],[faith]


In [190]:
# Drop one-hot columns
df.drop(columns=[ col for col in df.columns if col not in ['country', 'traits'] ], inplace=True)

In [191]:
# Drop NaN
df.dropna(inplace=True)

In [192]:
# NaN incorrectly answered questionnaire (more than 5 traits selected)
df['num_traits'] = df['traits'].map(lambda x: len(x))

In [193]:
df['traits'] = np.where(df['num_traits'] > 5, np.nan, df['traits'])
df['traits'] = np.where(df['num_traits'] == 0, np.nan, df['traits'])

In [194]:
# Drop NaN
df.dropna(inplace=True)

In [195]:
df.drop(columns='num_traits', inplace=True)

In [196]:
df['traits']

1        [manners, hard work, responsibility, tolerance...
2        [independence, responsibility, imagination, to...
3        [manners, responsibility, tolerance, determina...
4        [manners, hard work, responsibility, tolerance...
5        [manners, responsibility, tolerance, thrift, u...
                               ...                        
70862             [manners, hard work, imagination, faith]
70863               [manners, hard work, tolerance, faith]
70864    [manners, hard work, determination, unselfishn...
70865    [manners, independence, hard work, tolerance, ...
70866    [manners, hard work, thrift, determination, ob...
Name: traits, Length: 68059, dtype: object

In [None]:
# Function to group responses by country
def qbc(df, question, urbrural=True):
    if urbrural == True:
        q = df.groupby(['B_COUNTRY_ALPHA', 'H_URBRURAL'])[question].mean()
    else:
        q = df.groupby(['B_COUNTRY_ALPHA'])[question].mean()
    return pd.DataFrame(q).unstack()