# Simulating Constitutive Processes of semantic change within heterogeneous populations of speakers

In [1]:
# basic imports
import torch
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

# project code imports
from mod.one_hot_agent import *
from mod.plot import *
# from mod.network import *

##### Hyper parameters

In [2]:
add_vocab_in = 50
semantic_features = 3
starting_observations = 5
words_per_semantic_feature = 5
enforce_word_feature_mapping = True

## Simple dyadic interaction across repeated turns in a random environment

In [3]:
starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

In [4]:
ag1 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)
ag2 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)

In [5]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(22.8696)

In [6]:
# and set the number of iterations
turns = 300

In [7]:
utt_tracking, vocab_dif = [], []

In [8]:
for _ in tqdm(range(turns)):
    env = starting_env.sample()
    speaker_prob = torch.rand(size=(1,))

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)

    else:
        utt = ag2.speak(env, lam=3)

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]


  0%|          | 0/300 [00:00<?, ?it/s]

In [9]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [10]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(1.3732)

In [11]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    # title='Dyadic interaction in a random environment',
    yaxis_title='Δ P(w|m)',
    xaxis_title='turns'
)
fig.show()

## Random environment and introduction of new terms

In [12]:
starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

In [13]:
ag1 = agent(
    words_per_semantic_feature,
    semantic_features,
    starting_observations=starting_observations,
    enforcing=enforce_word_feature_mapping
)
ag2 = agent(
    words_per_semantic_feature,
    semantic_features,
    starting_observations=starting_observations,
    enforcing=enforce_word_feature_mapping
)

In [14]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(18.8844)

In [15]:
# and set the number of iterations
turns = 300

In [16]:
vocab_dif = []

In [17]:
for rd in tqdm(range(turns)):
    env = starting_env.sample()
    speaker_prob = torch.rand(size=(1,))

    new_vocab_round = ((rd % add_vocab_in) == 0) * (rd != 0)

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)


    else:
        utt = ag2.speak(env, lam=3)

    if new_vocab_round:

        f = (env / env.sum().unsqueeze(-1)).argmax()

        ag1.add_vocab_item(f)
        ag2.add_vocab_item(f)

        utt = ag1.vocab.shape[0] - 1

        # if speaker 1s turn to talk, update their mental lexicon
        if speaker_prob > .5:
            ag1.vocab[utt][f] = env[0,f]
            ag1.var[utt] = torch.FloatTensor([1e-5]*ag1.var.shape[-1])
            ag1.var[utt][f] = .05

        # if speaker 2s turn to talk, update their mental lexicon
        else:
            ag2.vocab[utt][f] = env[0,f]
            ag2.var[utt] = torch.FloatTensor([1e-5]*ag2.var.shape[-1])
            ag2.var[utt][f] = .05

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]

  0%|          | 0/300 [00:00<?, ?it/s]

In [18]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [19]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(3.2284)

In [20]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    # title='Dyadic interaction in a random environment with novel words introduced',
    yaxis_title='Δ P(w|m)',
    xaxis_title='turns'
)
fig.show()

## Changing environment

In [21]:
new_environment_prob = .25
starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

In [22]:
ag1 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)
ag2 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)

In [23]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(30.4847)

In [24]:
# and set the number of iterations
turns = 300

In [25]:
utt_tracking, vocab_dif = [], []

In [26]:
for _ in tqdm(range(turns)):
    new_env_prob = torch.rand(size=(1,))
    if new_env_prob > new_environment_prob:
        starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

    env = starting_env.sample()
    speaker_prob = torch.rand(size=(1,))

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)

    else:
        utt = ag2.speak(env, lam=3)

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]


  0%|          | 0/300 [00:00<?, ?it/s]

In [27]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [28]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(1.5954)

In [29]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    # title='Dyadic interaction in a random environment',
    yaxis_title='Δ P(w|m)',
    xaxis_title='turns'
)
fig.show()

## Changing environment and introduction of new terms

In [30]:
ag1 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)
ag2 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)

In [31]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(15.0381)

In [32]:
# and set the number of iterations
turns = 300
add_vocab_in = 10

In [33]:
vocab_dif = []

In [34]:
for rd in tqdm(range(turns)):
    new_env_prob = torch.rand(size=(1,))
    if new_env_prob > new_environment_prob:
        starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

    env = starting_env.sample()
    # starting_env.loc = env
    speaker_prob = torch.rand(size=(1,))

    new_vocab_round = ((rd % add_vocab_in) == 0) * (rd != 0)

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)

    else:
        utt = ag2.speak(env, lam=3)

    if new_vocab_round:
        f = (env / env.sum().unsqueeze(-1)).argmax()

        ag1.add_vocab_item(f)
        ag2.add_vocab_item(f)

        utt = ag1.vocab.shape[0] - 1

        # if speaker 1s turn to talk, update their mental lexicon
        if speaker_prob > .5:
            ag1.vocab[utt][f] = env[0,f]
            ag1.var[utt] = torch.FloatTensor([1e-5]*ag1.var.shape[-1])
            ag1.var[utt][f] = .05

        # if speaker 2s turn to talk, update their mental lexicon
        else:
            ag2.vocab[utt][f] = env[0,f]
            ag2.var[utt] = torch.FloatTensor([1e-5]*ag2.var.shape[-1])
            ag2.var[utt][f] = .05

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]

  0%|          | 0/300 [00:00<?, ?it/s]

In [35]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [36]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(7.0857)

In [37]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    # title='Dyadic interaction in a random environment with novel words introduced',
    yaxis_title='Δ P(w|m)',
    xaxis_title='turns'
)
fig.show()

## Stochastic environment

In [38]:
new_environment_prob = .25
starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

In [39]:
ag1 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)
ag2 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)

In [40]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(24.9481)

In [41]:
# and set the number of iterations
turns = 300

In [42]:
utt_tracking, vocab_dif = [], []

In [43]:
for rd in tqdm(range(turns)):
    env = starting_env.sample()
    starting_env.loc = env
    speaker_prob = torch.rand(size=(1,))

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)

    else:
        utt = ag2.speak(env, lam=3)

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]

  0%|          | 0/300 [00:00<?, ?it/s]

In [44]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [45]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(1.1170)

In [46]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    # title='Dyadic interaction in a random environment',
    yaxis_title='Δ P(w|m)',
    xaxis_title='turns'
)
fig.show()

## Stochastic environment and introduction of new terms

In [47]:
new_environment_prob = .25
starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

In [48]:
ag1 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)
ag2 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)

In [49]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(18.8431)

In [50]:
# and set the number of iterations
turns = 300

In [51]:
vocab_dif = []

In [52]:
for rd in tqdm(range(turns)):
    env = starting_env.sample()
    starting_env.loc = env
    speaker_prob = torch.rand(size=(1,))

    new_vocab_round = ((rd % add_vocab_in) == 0) * (rd != 0)

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)

    else:
        utt = ag2.speak(env, lam=3)

    if new_vocab_round:
        f = (env / env.sum().unsqueeze(-1)).argmax()

        ag1.add_vocab_item(f)
        ag2.add_vocab_item(f)

        utt = ag1.vocab.shape[0] - 1

        # if speaker 1s turn to talk, update their mental lexicon
        if speaker_prob > .5:
            ag1.vocab[utt][f] = env[0,f]
            ag1.var[utt] = torch.FloatTensor([1e-5]*ag1.var.shape[-1])
            ag1.var[utt][f] = .05

        # if speaker 2s turn to talk, update their mental lexicon
        else:
            ag2.vocab[utt][f] = env[0,f]
            ag2.var[utt] = torch.FloatTensor([1e-5]*ag2.var.shape[-1])
            ag2.var[utt][f] = .05

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]

  0%|          | 0/300 [00:00<?, ?it/s]

In [53]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [54]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(59.4465)

In [55]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    # title='Dyadic interaction in a random environment with novel words introduced',
    yaxis_title='Δ P(w|m)',
    xaxis_title='turns'
)
fig.show()

## Random and stochastic changes to environment

In [56]:
new_environment_prob = .25
starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

In [57]:
ag1 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)
ag2 = agent(
    words_per_semantic_feature, 
    semantic_features, 
    starting_observations=starting_observations, 
    enforcing=enforce_word_feature_mapping 
)

In [58]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(13.9709)

In [59]:
# and set the number of iterations
turns = 300

In [60]:
utt_tracking, vocab_dif = [], []

In [61]:
for rd in tqdm(range(turns)):
    new_env_prob = torch.rand(size=(1,))
    if new_env_prob > new_environment_prob:
        starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

    env = starting_env.sample()
    starting_env.loc = env
    speaker_prob = torch.rand(size=(1,))

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)

    else:
        utt = ag2.speak(env, lam=3)

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]

  0%|          | 0/300 [00:00<?, ?it/s]

In [62]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [63]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(0.5991)

In [64]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    # title='Dyadic interaction in a random environment',
    yaxis_title='Δ P(w|m)',
    xaxis_title='turns'
)
fig.show()

## Random and stochastic environment, plus introduction of new terms

In [65]:
new_environment_prob = .25
starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

In [66]:
ag1 = agent(
    words_per_semantic_feature,
    semantic_features,
    starting_observations=starting_observations,
    enforcing=enforce_word_feature_mapping
)
ag2 = agent(
    words_per_semantic_feature,
    semantic_features,
    starting_observations=starting_observations,
    enforcing=enforce_word_feature_mapping
)

In [67]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(32.1352)

In [68]:
# and set the number of iterations
turns = 300

In [69]:
vocab_dif = []

In [70]:
for rd in tqdm(range(turns)):
    new_env_prob = torch.rand(size=(1,))
    if new_env_prob > new_environment_prob:
        starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

    env = starting_env.sample()
    starting_env.loc = env
    speaker_prob = torch.rand(size=(1,))

    new_vocab_round = ((rd % add_vocab_in) == 0) * (rd != 0)

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)

    else:
        utt = ag2.speak(env, lam=3)

    if new_vocab_round:
        f = (env / env.sum().unsqueeze(-1)).argmax()

        ag1.add_vocab_item(f)
        ag2.add_vocab_item(f)

        utt = ag1.vocab.shape[0] - 1

        # if speaker 1s turn to talk, update their mental lexicon
        if speaker_prob > .5:
            ag1.vocab[utt][f] = env[0,f]
            ag1.var[utt] = torch.FloatTensor([1e-5]*ag1.var.shape[-1])
            ag1.var[utt][f] = .05

        # if speaker 2s turn to talk, update their mental lexicon
        else:
            ag2.vocab[utt][f] = env[0,f]
            ag2.var[utt] = torch.FloatTensor([1e-5]*ag2.var.shape[-1])
            ag2.var[utt][f] = .05

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]

  0%|          | 0/300 [00:00<?, ?it/s]

In [71]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [72]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(15.5697)

In [73]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    # title='Dyadic interaction in a random environment with novel words introduced',
    yaxis_title='Δ P(w|m)',
    xaxis_title='turns'
)
fig.show()

## Within a social network

In [None]:
no_agents = 50
no_connections = 10

## Returning to forced birth vs. pro-life

So this one is trolly and fun. Basically, we want to replicate the changes in frequency for forced birth (FB) versus pro-life (PL) across months prior to and after the Dobbs decision. We can have a set of features representing the relative probability that a word will be associated with a feature. Something like the following table (note: these aren't normalized probabilities in the example below. I'm not sure whether we ought to do that or not.):

| **Date range** | **Antiabortion** | **legality** | **($\neg$) activist** | **morality** |
|------------|--------------| -------- | ----------------- | -------- |
| _2022/1-2022/5_ | .35          | .2       | .45            |  .0001   |
| _2022/6-2023/1_ | .2           | .45      | .35            | .0001    |
| ... | ... | ... | ... | ... |
| _2024/1-2024/5_ | .0001 | .2  | .45 | .35 |

which we can then use as a series of environments that dictate (1) what people say, (2) how people update their beliefs on the constraints around when to use certain words. We can even initialize the network with the same number of "users" as there are on _r/Feminism_!
