In [1]:
import numpy as np
import pandas as pd
from itertools import combinations
import math

In [2]:
dim_dict = {
    'chimpanzee': {
        'literature': 'mind perception',
        'category': 'animal'
    },
    'rabbit': {
        'literature': 'mind perception',
        'category': 'animal'
    },
    'jelly fish': {
        'literature': 'mind perception',
        'category': 'animal'
    },
    '3-year old' : {
        'literature': 'mind perception',
        'category': 'human'
    },
    'adult': {
        'literature': 'mind perception',
        'category': 'human'
    },
    'in coma': {
        'literature': 'mind perception',
        'category': 'human'
    },
    'zombie': {
        'literature': 'mind perception',
        'category': 'human'
    },
    'newborn': {
        'literature': 'mind perception',
        'category': 'human'
    },
    'computer': {
        'literature': 'mind perception',
        'category': 'tool'
    },
    'god' : {
        'literature': 'mind perception',
        'category': 'entity'
    },
    'alien' : {
        'literature': 'mind perception',
        'category': 'entity'
    },
    'robot': {
        'literature': 'mind perception',
        'category': 'entity'
    },
    'robot 2050': {
        'literature': 'mind perception',
        'category': 'entity'
    },
    'belgium': {
        'literature': 'mind perception',
        'category': 'organisation'
    },
    'coca cola': {
        'literature': 'mind perception',
        'category': 'organisation'
    },
    'city council': {
        'literature': 'mind perception',
        'category': 'organisation'
    }
}

word_list = [
    'laptop',
    'desktop', 
    'car',
    'autonomous car',
    'toaster',
    'hammer',
    'rock',
    'single cell',
    'virus',
    'tree',
    'carnivorous plant',
    'mosquito',
    'human' 
]

proposed_dict = {
    'chimpanzee': {
        'literature': 'mind perception',
        'category': 'animal'
    },
    'jelly fish': {
        'literature': 'mind perception',
        'category': 'animal'
    },
    'rabbit': {
        'literature': 'mind perception',
        'category': 'animal'
    },
    'virus' : {
        'literature': 'features of agency',
        'category': 'organism'
    },
    'tree' : {
        'literature': 'features of agency',
        'category': 'organism'
    },
    'robot': {
        'literature': 'mind perception',
        'category': 'entity'
    },
    'artificial intelligence': {
        'literature': 'features of agency',
        'category': 'entity'
    },
    'human': {   # Collapsed all version of human
        'literature': 'mind perception',
        'category': 'human' 
    },
    'god' : {
        'literature': 'mind perception',
        'category': 'entity'
    },
    'rock' : {
        'literature': 'features of agency',
        'category': 'tool'
    },
    'car' : {
        'literature': 'features of agency',
        'category': 'tool'
    },
    'autonomous car' : {
        'literature': 'features of agency',
        'category': 'tool'
    },
    'toaster' : {
        'literature': 'features of agency',
        'category': 'tool'
    },
    'computer': {
        'literature': 'mind perception',
        'category': 'tool'
    }
}
# Dimensions: literature (mind perception, features of agency ), generic category (thing, tool, plant, animal, human, organism, organisation), any other dimension for the feature tree~


In [3]:
class Item_list():
    def __init__(self, word_dict, word_selection=None):
        self._df = pd.DataFrame(data=word_dict).T
        
        if not word_selection:
            self._word_selection = list(self.words)
        else:
            self._word_selection = word_selection

        self._pairs_list = list(combinations(self._word_selection, 2))
        self._pairs = np.array(self._pairs_list)
        self._pairs_idx = np.arange(self.pairs.shape[0])

    @property
    def df(self):
        return self._df.loc[self.selection]

    @property
    def pairs(self):
        return self._pairs

    @property
    def words(self):
        return self._df.index

    @property
    def selection(self):
        return self._word_selection
    
    @selection.setter
    def selection(self, new_selection):
        if set(new_selection).issubset(self.words):
            self._word_selection = new_selection
            self._update_pairs()
        else:
            print('Selection uses words not in current item list, add them as dictionaries using the add_word method.')


    def add_words(self, word_dict, add_selection=True):
        for k, v in word_dict.items():
            for k1, v1 in v.items():
                self._df.loc[k, k1] = v1
        
        if add_selection:
            self._word_selection += list(word_dict.keys())
            self._update_pairs()

    def sample_pairs(self, size=1, prob=None):
        if prob:
            choices = np.random.choice(self._pairs_idx, size=size, replace=False, p=prob)
        else:
            choices = np.random.choice(self._pairs_idx, size=size, replace=False)

        selection = self._pairs[choices, :].squeeze()
        np.random.shuffle(selection)
        return selection

    def _update_pairs(self):
        self._pairs_list = list(combinations(self._word_selection, 2))
        self._pairs = np.array(self._pairs_list)
        self._pairs_idx = np.arange(self.pairs.shape[0]) 


item_list = Item_list(proposed_dict)

print(item_list.pairs.shape)

sample_size = 40
sampled_comp = item_list.sample_pairs(sample_size)

(91, 2)


In [4]:
#item_list.selection = ['chimpanzee', 'rabbit']
new_selection = [word for word in item_list.words if word not in ['rabbit', 'car', 'toaster', 'tree']]
item_list.selection = new_selection
item_list.df

Unnamed: 0,literature,category
chimpanzee,mind perception,animal
jelly fish,mind perception,animal
virus,features of agency,organism
robot,mind perception,entity
artificial intelligence,features of agency,entity
human,mind perception,human
god,mind perception,entity
rock,features of agency,tool
autonomous car,features of agency,tool
computer,mind perception,tool


In [5]:
item_list.selection

['chimpanzee',
 'jelly fish',
 'virus',
 'robot',
 'artificial intelligence',
 'human',
 'god',
 'rock',
 'autonomous car',
 'computer']

In [6]:


df = pd.DataFrame(data=item_list.pairs)
df.to_csv(f'.\\data\\word_combinations\\pairs_words.csv')

In [7]:
(len(item_list._pairs_list) * 5) / 60 

3.75