In [None]:
# python libraries
import pandas as pd
import numpy as np
from math import log

import re

import matplotlib.pyplot as plt
import seaborn as sns

# load R into python
% load_ext rpy2.ipython
# show plots inline
% matplotlib inline

In [None]:
# set figure aesthetics

cat_colours = sns.xkcd_palette(["cerulean", "goldenrod", "red", "grass green"])

chain_colours = sns.color_palette()

sns.set_context(rc={"lines.linewidth":1.2})

sns.set(font_scale=1.3)

In [None]:
%%R 
# R libraries
library("lme4")
library("lmerTest")

In [None]:
# function to make printing from R output faster

def print_output(output):
    
    for line in output:
        
        print line

In [None]:
# read in data files 
ex1 = pd.read_csv("../data_files/ex1.csv")
ex2 = pd.read_csv("../data_files/ex2.csv")
ex3 = pd.read_csv("../data_files/ex3.csv")

## Contents

1. <a href='#funcs'>Functions for efficiency measures</a>
2. <a href='#gest-len'>Gesture sequence length</a>
    * <a href='#gl-ex1'>Experiment 1</a>
    * <a href='#gl-ex2'>Experiment 2</a>
    * <a href='#gl-ex3'>Experiment 3</a>
3. <a href='#reps'>Frequency of repetitions</a>
    * <a href='#r-ex1'>Experiment 1</a>
    * <a href='#r-ex2'>Experiment 2</a>
    * <a href='#r-ex3'>Experiment 3</a>

## 1. Functions for efficiency measures

<a id='funcs'></a>

In [None]:
# get length of gesture code

# split gesture into list of elements

# count number of elements

def code_length(df):
    
    # for each row in df
    for row in range(len(df)):
        
        # get code string
        code = df.loc[row, "code_string"]
        
        # if there is no value, data missing
        if pd.isnull(code):
            
            df.loc[row, 'code_len'] = np.nan
        
        else:
            
            # split code into list elements
            elements = code.split(',')
            
            # count elements
            code_len = len(elements)
            
            df.loc[row, 'code_len'] = code_len
            

In [None]:
#takes a list of gesture coding, and pulls out the shapes, giving a list of all instances of all shapes
# and a set of the shapes used

def shape_list(alist,reg):
    
    """
    
    Takes a list of gesture coding and gives a list of each shape,
    and the set of unique shapes.
    
    Takes as input a list of code and the regular expression that identifies gesture shapes.
    
    """
    
    #join list of code
    allcode=(',').join(alist)
    
    #search for gesture shapes (expressions starting with 1h or 2h)
    regex=re.compile(reg)
    setlist=re.findall(regex,allcode)
    
    #return the list of shapes and the set of different shapes
    return setlist, set(setlist)

In [None]:
def remove_rep_tag(df):
    
    '''
    Remove gesture internal repetitions from code
    
    '''
    
    for row in range(len(df)):
        
        code = df.loc[row, 'code_string']
        
        if pd.isnull(code) == False:
        
            new_code = re.sub('repeat\[|\]', '', code)
        
            df.loc[row, 'code_string'] = new_code

In [None]:

def count_repeats(code):
    
    ''' 
    
    Count the number of repetitions in a gesture sequence
    
    '''
    
    code_list, code_set = shape_list([code], r'1h.*?\b|2h.*?\b')
    
    num_reps = len(code_list) - len(code_set)
    
    return num_reps

In [None]:
def count_repeats_df(df):
    
    '''
    Count number of repetitions for each row in a data frame.
    
    '''
    
    for row in range(len(df)):
        
        code_str = df.loc[row, "code_string"]
        
        if pd.isnull(code_str) == False:
            
            df.loc[row, "num_reps"] = count_repeats(code_str)
            
            

## 2. Gesture sequence length

<a id='gest-len'></a>

In [None]:
# add code length column
code_length(ex1)
code_length(ex2)
code_length(ex3)

In [None]:
ex1.to_csv("../data_files/ex1.csv", index=False)
ex2.to_csv("../data_files/ex2.csv", index=False)
ex3.to_csv("../data_files/ex3.csv", index=False)

### Experiment 1

<a id='gl-ex1'></a>

Plot showing mean gesture sequence length across generations in experiment 1.

In [None]:
code_len_ex1_plot = sns.factorplot(x='generation', y='code_len', hue='chain',
                                  ci=None, data=ex1,
                                 size = 6, aspect = 1, legend=False,
                                     markers=['o','^','s', '*', 'D'],)

code_len_ex1_plot.set(xlabel= "Generation", ylabel = "Mean gesture length", xticklabels = [0,1,2,3,4,5],
                     ylim=(0,4))


The models below analyse the effect of generation on the length of gesture sequences. Firstly, we run a generalised mixed effects model with a poisson distribution (for count data). 

**Outcome variable:**
 * gesture sequence length
 
**Fixed effects:**
 * generation
 
**Random effects:**
 * chain (random intercept, random slope of generation)
 * participant (random intercept, nested in chains)
 * target meaning

In [None]:
%R -i ex1

In [None]:
%%R

# run glm poisson model for gesture length

len_model_ex1 = glmer(code_len ~ generation +
                            (1+ generation|chain) + (1|participant:chain)+
                            (1|target), data=ex1, family='poisson',
                             control=glmerControl(optimizer='bobyqa'))


In [None]:
len_model_ex1 = %R capture.output(summary(len_model_ex1))

print_output(len_model_ex1)

The model demonstrates a significant effect of generation on gesture sequence length. Here, gesture sequences become shorter over generations.

In [None]:
%%R 

# remove seed generation (test 1 - 5)
ex1_from0 = subset(ex1, generation!=0)

# reset generations to 0
ex1_from0$generation = ex1_from0$generation - 1

In [None]:
%%R

# gesture length model without seed generation

len_model_ex1_from0 = glmer(code_len ~ generation +
                            (1+ generation|chain) + (1|participant:chain)+
                            (1|target), data=ex1_from0, family='poisson',
                             control=glmerControl(optimizer='bobyqa'))


In [None]:
len_model_ex1_from0 = %R capture.output(summary(len_model_ex1_from0))

print_output(len_model_ex1_from0)

### Experiment 2

<a id='gl-ex2'></a>

Plot showing the mean length of gesture sequences over generations/rounds for each chain/pair in each condition of experiment 2.

In [None]:
code_len_ex2_plot = sns.factorplot(x='generation', y='code_len', hue='chain',
                                  ci=None, data=ex2, col='condition',
                                 size = 5, aspect = 0.8, legend=False,
                                     markers=['o','^','s', '*', 'D'],
                                      sharey=False,
                                     col_order = ["Transmission + Interaction",
                                                 "Interaction only", "Transmission only"])

code_len_ex2_plot.set(xlabel= "Generation", ylabel = "", xticklabels = [0,1,2,3,4,5],
                     ylim=(0,11))

code_len_ex2_plot.set_titles('{col_name}')

plt.legend(title='Chain', bbox_to_anchor=(1,1))

plt.text(-16, 8, 'Mean gesture length', rotation=90)

The models below analyse the effect of generation and condition on the length of gesture sequences. Firstly, we run a generalised mixed effects model with a poisson distribution (for count data). 

**Outcome variable:**
 * gesture sequence length
 
**Fixed effects:**
 * generation
 * condition
 * generation * condition
 
**Random effects:**
 * chain (random intercept, random slope of generation)
 * participant (random intercept, nested in chains)
 * target meaning

In [None]:
%R -i ex2

In [None]:
%%R 

# set transmission + interaction as baseline condition
ex2$condition = relevel(ex2$condition, "Transmission + Interaction")

In [None]:
%%R

# run poisson model for gesture length
len_model_ex2 = glmer(code_len ~ generation * condition +
                            (1+generation|chain) + (1|participant:chain)+
                            (1|target), data=ex2, family='poisson',
                             control=glmerControl(optimizer='bobyqa'))

In [None]:
len_model_ex2 = %R capture.output(summary(len_model_ex2))

print_output(len_model_ex2)

The model here shows a significant effect of generation for the baseline condition (ex1), and significant interactions between generation and the two other conditions. Gestures in the interaction-only condition show a further reduction in length, while gestures in the transmission-only condition show an increase in length.

In [None]:
%%R

# remove seed generation (test 1 - 5)
ex2_from0 = subset(ex2, generation!=0)

# reset generations to 0
ex2_from0$generation = ex2_from0$generation - 1

In [None]:
%%R

# gesture length model without seed generation
len_model_ex2_from0 = glmer(code_len ~ generation * condition +
                            (1+generation|chain) + (1|participant:chain)+
                            (1|target), data=ex2_from0, family='poisson',
                             control=glmerControl(optimizer='bobyqa'))

In [None]:
len_model_ex2_from0 = %R capture.output(summary(len_model_ex2_from0))

print_output(len_model_ex2_from0)

### Experiment 3

<a id='gl-ex3'></a>

Plot showing the mean length of gesture sequences over generations/rounds for each chain/pair in each condition of experiment 3.

In [None]:
code_len_ex3_plot = sns.factorplot(x='generation', y='code_len', hue='chain',
                                  ci=None, data=ex3, col='condition',
                                 size = 5, aspect = 0.8, legend=False,
                                     markers=['o','^','s', '*', 'D'],
                                      sharey=False,
                                     col_order = ["Transmission + Interaction",
                                                 "Interaction only", "Transmission only"])

code_len_ex3_plot.set(xlabel= "Generation", ylabel = "", xticklabels=[0,1,2,3,4,5],
                     ylim=(0,11))

code_len_ex3_plot.set_titles('{col_name}')

plt.legend(title='Chain', bbox_to_anchor=(1,1))

plt.text(-16, 8, 'Mean gesture length', rotation=90)

The models below analyse the effect of generation and condition on the length of gesture sequences. Firstly, we run a generalised mixed effects model with a poisson distribution (for count data). 

The model structure here is the same as experiment 2.

**Outcome variable:**
 * gesture sequence length
 
**Fixed effects:**
 * generation
 * condition
 * generation * condition
 
**Random effects:**
 * chain (random intercept, random slope of generation)
 * participant (random intercept, nested in chains)
 * target meaning

In [None]:
%R -i ex3

In [None]:
%%R

# set transmission + interaction condition as baseline condition
ex3$condition = relevel(ex3$condition, 'Transmission + Interaction')

In [None]:
%%R

# run poisson model for gesture length

len_model_ex3 = glmer(code_len ~ generation * condition +
                            (1+generation|chain) + (1|participant)+
                            (1|target), data=ex3, family='poisson',
                             control=glmerControl(optimizer='bobyqa'))



In [None]:
len_model_ex3 = %R capture.output(summary(len_model_ex3))

print_output(len_model_ex3)

In [None]:
%%R

# subset generations 1-5
ex3_from0 = subset(ex3, generation!=0)

# reset generations to 0
ex3_from0$generation = ex3_from0$generation - 1

In [None]:
%%R

# run gesture length model without seed generation

len_model_ex3_from0 = glmer(code_len ~ generation * condition +
                            (1+generation|chain) + (1|participant)+
                            (1|target), data=ex3_from0, family='poisson',
                             control=glmerControl(optimizer='bobyqa'))


In [None]:
len_model_ex3_from0 = %R capture.output({summary(len_model_ex3_from0)})

print_output(len_model_ex3_from0)

## 2. Frequency of repetitions

<a id='reps'></a>

In [None]:
# quantify repetitions
for ex in [ex1, ex2, ex3]:
    
    remove_rep_tag(ex)
    count_repeats_df(ex)


### Experiment 1

<a id='r-ex1'></a>

Plot showing frequency of repetitions over generations for each chain in experiment 1.

In [None]:
reps_ex1_plot = sns.factorplot(x='generation', y='num_reps', hue='chain',
                                  ci=None, data=ex1,
                                 size = 6, aspect = 1, legend=False,
                                     markers=['o','^','s', '*', 'D'],
                                     )

reps_ex1_plot.set(xlabel= "Generation", ylabel = "Mean repetition frequency", xticklabels = [0,1,2,3,4,5],
                 ylim=(0,1.4))

In [None]:
%R -i ex1

In [None]:
%%R

# run poisson repetition model

reps_model_ex1 = glmer(num_reps ~ generation +
                            (1+ generation|chain) + (1|participant:chain)+
                            (1|target), data=ex1, family='poisson',
                             control=glmerControl(optimizer='bobyqa'))


In [None]:
reps_ex1 = %R capture.output(summary(reps_model_ex1))

print_output(reps_ex1)

### Experiment 2

<a id='r-ex2'></a>

Plots showing the frequency of repetitions for each generation/round in each chain/pair across conditions in experiment 2.

In [None]:
reps_ex2_plot = sns.factorplot(x='generation', y='num_reps', hue='chain',
                                  ci=None, data=ex2, col='condition',
                                 size = 5, aspect = 0.8, legend=False,
                                     markers=['o','^','s', '*', 'D'],
                                      sharey=False,
                                     col_order = ["Transmission + Interaction",
                                                 "Interaction only", "Transmission only"])

reps_ex2_plot.set(xlabel= "Generation", ylabel = "", xticklabels = [0,1,2,3,4,5],
                 ylim=(0,5))

reps_ex2_plot.set_titles('{col_name}')

plt.legend(title='Chain', bbox_to_anchor=(1,1))

plt.text(-16, 4, 'Mean repetition frequency', rotation=90)

The models below analyse the effect of generation and condition on repetition frequency. We run a generalised mixed effects model with a poisson distribution (for count data). 

**Outcome variable:**
 * frequency of repetitions
 
**Fixed effects:**
 * generation
 * condition
 * generation * condition
 
**Random effects:**
 * chain (random intercept, random slope of generation)
 * participant (random intercept, nested in chains)
 * target meaning

In [None]:
ex2.info()

In [None]:
%R -i ex2

In [None]:
%%R 

# set transmission + interaction as baseline condition

ex2$condition = relevel(ex2$condition, 'Transmission + Interaction')

In [None]:
%%R

# run repetition model for ex2

reps_model_ex2 = glmer(num_reps ~ generation * condition +
                            (1+generation|chain) + (1|participant:chain)+
                            (1|target), data=ex2, family = 'poisson',
                               control=glmerControl(optimizer='bobyqa'))



In [None]:
reps_model_ex2 = %R capture.output(summary(reps_model_ex2))

print_output(reps_model_ex2)

We find no main effect of generation for the baseline condition, but we do find significant interactions with both other conditions. Gestures in the interaction-only condition show reduction in the number of repetitions over generations, compared to the baseline. Gestures in the transmission-only condition show an increase in repetitions over generations, relative to the baseline condition.

### Experiment 3

<a id='r-ex3'></a>

Plots showing the frequency of repetitions for each generation/round in each chain/pair across conditions in experiment 3.

In [None]:
reps_ex3_plot = sns.factorplot(x='generation', y='num_reps', hue='chain',
                                  ci=None, data=ex3, col='condition',
                                 size = 5, aspect = 0.8, legend=False,
                                     markers=['o','^','s', '*', 'D'],
                                      sharey=False,
                                     col_order = ["Transmission + Interaction",
                                                 "Interaction only", "Transmission only"])

reps_ex3_plot.set(xlabel= "Generation", ylabel = "", xticklabels = [0,1,2,3,4,5],
                 ylim=(0,5))

reps_ex3_plot.set_titles('{col_name}')

axs = reps_ex3_plot.axes

plt.legend(title='Chain', bbox_to_anchor=(1,1))

plt.text(-16, 4, 'Mean repetition frequency', rotation=90)

The models below analyse the effect of generation and condition on repetition frequency. We run a generalised mixed effects model with a poisson distribution (for count data). 

The model structure here is the same as experiment 2.

**Outcome variable:**
 * frequency of repetitions
 
**Fixed effects:**
 * generation
 * condition
 * generation * condition
 
**Random effects:**
 * chain (random intercept, random slope of generation)
 * participant (random intercept, nested in chains)
 * target meaning

In [None]:
%R -i ex3

In [None]:
%%R 

# set transmission + interaction as baseline condition

ex3$condition = relevel(ex3$condition, 'Transmission + Interaction')

In [None]:
%%R

# run repetition model for ex3

reps_model_ex3 = glmer(num_reps ~ generation * condition +
                            (1+generation|chain) + (1|participant:chain)+
                            (1|target), data=ex3, family='poisson',
                             control=glmerControl(optimizer='bobyqa'))


In [None]:
reps_model_ex3 = %R capture.output(summary(reps_model_ex3))

print_output(reps_model_ex3)

The model here demonstrates an effect of generation for the baseline condition: repetitions increase over generations. We also find significant interactions between generation and condition for both other conditions. In the interaction-only condition, gestures reduce over generations compared with the baseline. In the transmission-only condition, they increase.