# World Values Survey Data Notebook - Child Traits

In [152]:
import numpy as np
import pandas as pd

from mlxtend.frequent_patterns import apriori

import pyreadr

import matplotlib.pyplot as plt

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

## Acquiring Data
The data was acquired from the World Values Survey at https://www.worldvaluessurvey.org/wvs.jsp. Wave 1 through 6 were in .rds format while Wave 7 was in .rdata format.

In [2]:
# Use pyreadr to read .rds files to DataFrame
wv1_6 = [ pyreadr.read_r(f'../rdata/wv{i}.rds')[None] for i in range(1, 7) ]

In [3]:
# RData converted from .rdata file to CSV with RStudio
wv7 = pd.read_csv('../rdata/wv7.csv', index_col=0)

  interactivity=interactivity, compiler=compiler, result=result)


### Traits that should be encouraged in children (Wave 7, Q7-17)
- List of Traits (choose 5): good manners, independence, hard work, feeling of responsibility, imagination, tolerance and respect for other people, thrift/saving money, determination/perseverance, religious faith, not being selfish, obedience

In [224]:
df = wv7[['B_COUNTRY_ALPHA', 'Q7', 'Q8', 'Q9', 'Q10', 'Q11', 'Q12', 'Q13', 'Q14', 'Q15', 'Q16', 'Q17']].copy()

In [225]:
# Create dictionary from questionnaire
traits_dict = {
    'B_COUNTRY_ALPHA': 'country',
    'Q7': 'manners',
    'Q8': 'independence',
    'Q9': 'hard work',
    'Q10': 'responsibility',
    'Q11': 'imagination',
    'Q12': 'tolerance',
    'Q13': 'thrift',
    'Q14': 'determination',
    'Q15': 'faith',
    'Q16': 'unselfishness',
    'Q17': 'obedience',
}

# Function to boolean
def convert_bool(x):
    if x == 1:
        return True
    else:
        return False

In [226]:
# Clean column names and convert to boolean
df.columns = df.columns.map(traits_dict)
df.iloc[:, 1:] = df.iloc[:, 1:].applymap(convert_bool)

In [227]:
traits = [ col for col in df.columns if col != 'country' ]

In [229]:
# NaN incorrectly answered questionnaire (more than 5 traits selected or 0 selected traits)
df['drop'] = df[traits].sum(axis=1)
df['drop'] = np.where(df['drop'] > 5, np.nan, df['drop'])
df['drop'] = np.where(df['drop'] == 0, np.nan, df['drop'])

# Drop NaN
df.dropna(inplace=True)
df.drop(columns='drop', inplace=True)

In [236]:
# Set data columns
X = df.drop(columns='country')

# Run apriori algorithm
trait_sets = apriori(X, 0.1)

In [245]:
trait_sets['itemsets'] = [ list(value) for value in trait_sets['itemsets'].values ]

In [252]:
item2trait = {
    i : traits[i]
    for i in range(11)
}

In [254]:
trait_sets['traits'] = trait_sets['itemsets'].map(lambda x: [ item2trait[ele] for ele in x ])

In [255]:
trait_sets

Unnamed: 0,support,itemsets,traits
0,0.773685,[0],[manners]
1,0.412915,[1],[independence]
2,0.527035,[2],[hard work]
3,0.651602,[3],[responsibility]
4,0.197252,[4],[imagination]
5,0.61625,[5],[tolerance]
6,0.289406,[6],[thrift]
7,0.310403,[7],[determination]
8,0.354408,[8],[faith]
9,0.247061,[9],[unselfishness]


In [187]:
# Function to map traits onto values
def map_trait(frame, trait):
    return np.where(frame[trait] == True, trait, np.nan)

# Function to concat traits
def list_traits(x):
    return [ trait for trait in x if trait != 'nan' ]

In [188]:
# Map traits onto values
new_df = pd.DataFrame()
for i in range(len(X.columns)):
    trait = X.columns[i]
    new_df[i] = map_trait(X, trait)

In [189]:
# Apply concat function row-wise
df['traits'] = new_df.apply(list_traits, axis=1)

In [190]:
# Drop one-hot columns
df.drop(columns=[ col for col in df.columns if col not in ['country', 'traits'] ], inplace=True)

In [191]:
# Drop NaN
df.dropna(inplace=True)

In [192]:
# NaN incorrectly answered questionnaire (more than 5 traits selected)
df['num_traits'] = df['traits'].map(lambda x: len(x))

In [193]:
df['traits'] = np.where(df['num_traits'] > 5, np.nan, df['traits'])
df['traits'] = np.where(df['num_traits'] == 0, np.nan, df['traits'])

In [194]:
# Drop NaN
df.dropna(inplace=True)

In [195]:
df.drop(columns='num_traits', inplace=True)

In [196]:
df['traits']

1        [manners, hard work, responsibility, tolerance...
2        [independence, responsibility, imagination, to...
3        [manners, responsibility, tolerance, determina...
4        [manners, hard work, responsibility, tolerance...
5        [manners, responsibility, tolerance, thrift, u...
                               ...                        
70862             [manners, hard work, imagination, faith]
70863               [manners, hard work, tolerance, faith]
70864    [manners, hard work, determination, unselfishn...
70865    [manners, independence, hard work, tolerance, ...
70866    [manners, hard work, thrift, determination, ob...
Name: traits, Length: 68059, dtype: object

In [None]:
# Function to group responses by country
def qbc(df, question, urbrural=True):
    if urbrural == True:
        q = df.groupby(['B_COUNTRY_ALPHA', 'H_URBRURAL'])[question].mean()
    else:
        q = df.groupby(['B_COUNTRY_ALPHA'])[question].mean()
    return pd.DataFrame(q).unstack()