# Simulating Constitutive Processes of semantic change within heterogeneous populations of speakers

In [1]:
# basic imports
import torch
import numpy as np
import pandas as pd
from rpy2.robjects.lib.grid import xaxis
from tqdm.notebook import tqdm

# project code imports
from mod.agent import *
from mod.network import *
from mod.plot import *

In [2]:
no_agents = 50
no_connections = 10

## A very basic simulation

In [3]:
vocab_size = 10
semantic_features = 3

In [4]:
starting_env = torch.distributions.MultivariateNormal(torch.randn(size=(1,semantic_features)), covariance_matrix=torch.eye(semantic_features) * .2)

#### Simple dyadic interaction across repeated turns in a random environment

In [5]:
ag1 = agent(vocab_size,semantic_features, starting_observations=5)
ag2 = agent(vocab_size,semantic_features, starting_observations=5)

In [6]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(43.4399)

In [7]:
# and set the number of iterations
turns = 300

In [8]:
utt_tracking, vocab_dif = [], []

In [9]:
for _ in tqdm(range(turns)):
    env = starting_env.sample()
    speaker_prob = torch.rand(size=(1,))

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)

    else:
        utt = ag2.speak(env, lam=3)

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]


  0%|          | 0/300 [00:00<?, ?it/s]

In [10]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [11]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(1.8191)

In [12]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    title='Dyadic interaction in a random environment',
    yaxis_title='Δ vocab',
    xaxis_title='turns'
)
fig.show()

#### Simple dyadic interaction in random environment but with introduction of new term by one of the speakers tailor made to a particular feature.

In [13]:
ag1 = agent(vocab_size,semantic_features, starting_observations=5)
ag2 = agent(vocab_size,semantic_features, starting_observations=5)

In [14]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(88.4510)

In [15]:
# and set the number of iterations
turns = 300
add_vocab_in = 10

In [16]:
vocab_dif = []

In [17]:
for rd in tqdm(range(turns)):
    env = starting_env.sample()
    speaker_prob = torch.rand(size=(1,))

    new_vocab_round = ((rd % add_vocab_in) == 0) * (rd != 0)

    if speaker_prob > .5:
        utt = ag1.speak(env, lam=3)


    else:
        utt = ag2.speak(env, lam=3)

    if new_vocab_round:
        ag1.add_vocab_item()
        ag2.add_vocab_item()

        utt = ag1.vocab.shape[0] - 1

        f = (env / env.sum().unsqueeze(-1)).argmax()

        # if speaker 1s turn to talk, update their mental lexicon
        if speaker_prob > .5:
            ag1.vocab[utt][f] = env[0,f]
            ag1.var[utt] = torch.FloatTensor([1e-5]*ag1.var.shape[-1])
            ag1.var[utt][f] = .05

        # if speaker 2s turn to talk, update their mental lexicon
        else:
            ag2.vocab[utt][f] = env[0,f]
            ag2.var[utt] = torch.FloatTensor([1e-5]*ag2.var.shape[-1])
            ag2.var[utt][f] = .05

    ag2.listen(utt, env)
    ag1.listen(utt, env)
    vocab_dif += [((ag1.vocab - ag2.vocab)**2).sum()]

  0%|          | 0/300 [00:00<?, ?it/s]

In [18]:
# utt_tracking = torch.FloatTensor(utt_tracking)
vocab_dif = torch.FloatTensor(vocab_dif)

In [19]:
((ag1.vocab - ag2.vocab)**2).sum()

tensor(31.2713)

In [20]:
fig = plot(vocab_dif.numpy(), 'vocabulary difference')
fig.update_layout(
    title='Dyadic interaction in a random environment with novel words introduced',
    yaxis_title='Δ vocab',
    xaxis_title='turns'
)
fig.show()

## Returning to forced birth vs. pro-life

So this one is trolly and fun. Basically, we want to replicate the changes in frequency for forced birth (FB) versus pro-life (PL) across months prior to and after the Dobbs decision. We can have a set of features representing the relative probability that a word will be associated with a feature. Something like the following table (note: these aren't normalized probabilities in the example below. I'm not sure whether we ought to do that or not.):

| **Date range** | **Antiabortion** | **legality** | **($\neg$) activist** | **morality** |
|------------|--------------| -------- | ----------------- | -------- |
| _2022/1-2022/5_ | .35          | .2       | .45            |  .0001   |
| _2022/6-2023/1_ | .2           | .45      | .35            | .0001    |
| ... | ... | ... | ... | ... |
| _2024/1-2024/5_ | .0001 | .2  | .45 | .35 |

which we can then use as a series of environments that dictate (1) what people say, (2) how people update their beliefs on the constraints around when to use certain words. We can even initialize the network with the same number of "users" as there are on _r/Feminism_!
