# <center> IEOR 169 Final Project: Data Generation and Exploration </center>
# <center> Team: Chris Landgrebe, Calvin Suster, Wyatt Walsh </center>

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Data-Generation" data-toc-modified-id="Data-Generation-1">Data Generation</a></span><ul class="toc-item"><li><span><a href="#Step-1:-Imports-and-Environment-Considerations" data-toc-modified-id="Step-1:-Imports-and-Environment-Considerations-1.1">Step 1: Imports and Environment Considerations</a></span></li><li><span><a href="#Step-2:-Define-Generator-Function" data-toc-modified-id="Step-2:-Define-Generator-Function-1.2">Step 2: Define Generator Function</a></span></li><li><span><a href="#Step-3:-Define-Set-of-Person/Object-Combinations-and-Apply-Generator" data-toc-modified-id="Step-3:-Define-Set-of-Person/Object-Combinations-and-Apply-Generator-1.3">Step 3: Define Set of Person/Object Combinations and Apply Generator</a></span></li><li><span><a href="#Step-1:-Load-in-Data-and-View" data-toc-modified-id="Step-1:-Load-in-Data-and-View-1.4">Step 1: Load in Data and View</a></span></li></ul></li></ul></div>

## Data Generation

### Step 1: Imports and Environment Considerations

In [1]:
# import necessary libraries
import numpy as np
import pandas as pd
import os
from IPython.display import display
from math import *

### Step 2: Define Generator Function

In [None]:
(0 .. 0.25 by 0.00625) union (0.4 .. 1 by 0.3);
param cash;

In [9]:
np.append(np.arange(0,0.25+0.00624,0.00625), np.arange(0.4,1.1,0.2))

array([0.     , 0.00625, 0.0125 , 0.01875, 0.025  , 0.03125, 0.0375 ,
       0.04375, 0.05   , 0.05625, 0.0625 , 0.06875, 0.075  , 0.08125,
       0.0875 , 0.09375, 0.1    , 0.10625, 0.1125 , 0.11875, 0.125  ,
       0.13125, 0.1375 , 0.14375, 0.15   , 0.15625, 0.1625 , 0.16875,
       0.175  , 0.18125, 0.1875 , 0.19375, 0.2    , 0.20625, 0.2125 ,
       0.21875, 0.225  , 0.23125, 0.2375 , 0.24375, 0.25   , 0.4    ,
       0.6    , 0.8    , 1.     ])

In [None]:
sizes = [1,4,5]
grid = np.random.uniform(0,1,(sizes[1:]))
normed = np.round(grid/np.sum(grid,1)[:,None],4)
df = pd.DataFrame(normed)
df
df.to_csv('./data/generated/prologtest.csv', header = False, index = False)

In [None]:
def synthesize(sizes):
    '''This function takes in row of a dataframe that contains three columns:
    1. the index associated with that row'
    2. the number of objects for this particular dataset
    3. the number of people for this particular dataset
    These parameters are then used to generate a matrix of Uniform[0,1] r.v.s of whic is then massaged into
    a compatible .dat format for use in AMPL'''
    
    # create r.v. matrix of size len(people) by len(objects) then normalize
    ## normalization in this case takes the form of dividing rows by their respective sums
    grid = np.random.uniform(0,1,(sizes[1:]))
    normed = grid/np.sum(grid,1)[:,None]
    
    # create df with columns whose values correspond with the indice of the value from the original matrix
    df = pd.DataFrame(normed).reset_index()

    # add 1 to all variables so that the ranges start from 1
    df.iloc[:,0] = df.iloc[:,0].astype('int32') + 1
    
    # move columns for correct AMPL reading
    columns = ['p', *range(1,sizes[2]+1)]
    df.columns = columns 
    
    # get index of current row for use in file naming
    name = './data/generated/' +  str(sizes[0]) + '.dat'
    # generate the sets for people and objects given their size
    people = ' '.join([str(i) for i in range(1,sizes[1]+1)])
    items = ' '.join([str(i) for i in range(1,sizes[2]+1)])
    
    # since np.savetxt will be used, bundle all other information in the header
    header = 'data; \nset P := %s; \n' % people
    header += 'set I := %s;' % items
    header += '\nparam v : '
    header += items + ':='
    
    types = ['%i'] + (['%.4f'] * sizes[2])
    # save df values space separated under header
    np.savetxt(name, df.values, fmt = types, header = header, comments = '')
    # add final semicolon and close file
    file = open(name, 'a')
    file.write('\n ;')
    file.close()

In [None]:
def detCash(cash, numItem):
    num = np.matrix([0.5]+[0]*(numItem-1))
    row_sum = np.array([sum(num)])
    for i in range(1,numItem):
        num = np.append(num, np.matrix([0.5]*(i+1) + (numItem-i-1) * [0]),0)

    # row_sum = sum(num)
    # num = num/row_sum
    # sum(num)
    # num = np.append(num,1)
    # num = num/(1+5)
    # sum(num)
# #     display(pd.DataFrame(num))

    normed = num/np.sum(num,1)
    display(pd.DataFrame(normed))

    add = np.matrix([cash] * numItem).T
    normed = np.append(normed,add,1)/(np.sum(normed,1) + cash)
    # # normed = np.append(normed,add,0)/(sum(normed,1) + 1)
    display(pd.DataFrame(normed))
# #     display(pd.DataFrame(np.sum(normed,1)).T)
#     display(np.shape(normed))

for c in cash:
    detCash(c,6)

In [None]:
def synthesize_cash(sizes):
    '''This function takes in row of a dataframe that contains three columns:
    1. the index associated with that row'
    2. the number of objects for this particular dataset
    3. the number of people for this particular dataset
    These parameters are then used to generate a matrix of Uniform[0,1] r.v.s of whic is then massaged into
    a compatible .dat format for use in AMPL'''
    
    # set random seed for reproducibility
    # create r.v. matrix of size len(people) by len(objects) then normalize
    ## normalization in this case takes the form of dividing rows by their respective sums
    grid = np.random.uniform(0,1,(sizes[1:]))
    
    # create df with columns whose values correspond with the indice of the value from the original matrix
    df = pd.DataFrame(grid).reset_index()
    
    # add 1 to all variables so that the ranges start from 1
    df.iloc[:,0] = df.iloc[:,0].astype('int32') + 1
    
    # move columns for correct AMPL reading
    columns = ['p', *range(1,sizes[2]+1)]
    df.columns = columns 
    
    # get index of current row for use in file naming
    name = './data/generated/' +  str(sizes[0]) + '_' + ;cash.dat'
    # generate the sets for people and objects given their size
    people = ' '.join([str(i) for i in range(1,sizes[1]+1)])
    items = ' '.join([str(i) for i in range(1,sizes[2]+1)])
    
    # since np.savetxt will be used, bundle all other information in the header
    header = 'data; \nset P := %s; \n' % people
    header += 'set I := %s;' % items
    header += '\nparam v : '
    header += items + ':='
    
    types = ['%i'] + (['%.4f'] * sizes[2])
    # save df values space separated under header
    np.savetxt(name, df.values, fmt = types, header = header, comments = '')
    # add final semicolon and close file
    file = open(name, 'a')
    file.write('\n ;')
    file.close()

### Step 3: Define Set of Person/Object Combinations and Apply Generator

In [None]:
# define range of values to combine
## linear spacing is utilized until 160, then a log spacing is used since order of magnitude is more important
# values = np.append(np.array([5,10,20,40,80,160]), np.geomspace(320, 5120, 14, dtype = int))
values = [*range(5,15), *range(15,25,5), *range(25,150,25), *range(150,250,100), *range(250, 1001, 250), *range(1000, 5001, 2000), 10000]

# create df from meshed value ranges.
# ## must take transpose of meshgrid and reshape in order for output to be .dat file ready
sizes = pd.DataFrame(np.array(np.meshgrid(values,values)).T.reshape(-1,2))

# # remove any rows where there are more people than objects since min p will always be 1
sizes = sizes.loc[sizes[1] <= sizes[0] * 4].reset_index(drop=True).reset_index()
columns = ['fileNum', 'numPeople', 'numItems']
sizes.columns = columns

# # add 1 to index so that row sets start at 1 and display resultant df
sizes['fileNum'] = sizes['fileNum'] + 1
display(sizes)

# # apply generator function to remaining rows 
sizes.apply(synthesize, axis = 1) # !!! line 17 is commmented out since the data already exists !!!

In [None]:
400*16

In [None]:
len([*np.arange(0,0.26,0.0125), *np.arange(0.4,1,0.3)])

### Step 1: Load in Data and View

In [None]:
df = pd.read_csv('./data/output/full.txt')
df

In [None]:
0.0125/2

In [217]:
import numpy as np
import pandas as pd
import os
from IPython.display import display
from math import *

%load_ext autoreload
%autoreload 2

import importlib

import agent_based_simulation
from agent_based_simulation import agent, functions
agent_based_simulation = importlib.reload(agent_based_simulation)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [218]:
simple = np.array([[0.4,0.1],[0.6,0.2]])
display(pd.DataFrame(simple))
(num_agents, num_items) = simple.shape

Unnamed: 0,0,1
0,0.4,0.1
1,0.6,0.2


In [219]:
agents = []
for i in np.arange(0,num_agents):
    agents.append(agent.Agent(i,simple[i]))
    
functions.first_allocation(agents,np.arange(num_items))
functions.share(agents)
functions.calculate_p_envy_free(agents)

1

In [185]:
[len(agent.portfolio) for agent in agents]

[1, 1]

In [115]:
prefs

array([[0.8 , 0.2 ],
       [0.75, 0.25]])

In [131]:
portfolio = []
portfolio = portfolio.append(5)
portfolio