# <center> IEOR 169 Final Project: Data Generation and Exploration </center>
# <center> Team: Chris Landgrebe, Calvin Suster, Wyatt Walsh </center>

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Data-Generation" data-toc-modified-id="Data-Generation-1">Data Generation</a></span><ul class="toc-item"><li><span><a href="#Step-1:-Imports-and-Environment-Considerations" data-toc-modified-id="Step-1:-Imports-and-Environment-Considerations-1.1">Step 1: Imports and Environment Considerations</a></span></li><li><span><a href="#Step-2:-Define-Generator-Function" data-toc-modified-id="Step-2:-Define-Generator-Function-1.2">Step 2: Define Generator Function</a></span></li><li><span><a href="#Step-3:-Define-Set-of-Person/Object-Combinations-and-Apply-Generator" data-toc-modified-id="Step-3:-Define-Set-of-Person/Object-Combinations-and-Apply-Generator-1.3">Step 3: Define Set of Person/Object Combinations and Apply Generator</a></span></li></ul></li><li><span><a href="#Model-Output-Exploration-and-Analysis" data-toc-modified-id="Model-Output-Exploration-and-Analysis-2">Model Output Exploration and Analysis</a></span><ul class="toc-item"><li><span><a href="#Step-1:-Load-in-Data-and-View" data-toc-modified-id="Step-1:-Load-in-Data-and-View-2.1">Step 1: Load in Data and View</a></span></li></ul></li></ul></div>

## Data Generation

### Step 1: Imports and Environment Considerations

In [52]:
# import necessary libraries
import numpy as np
import pandas as pd
import os
from IPython.display import display
# set random seed for reproducibility
np.random.seed(18)

### Step 2: Define Generator Function

In [53]:
def synthesize(sizes):
    '''This function takes in row of a dataframe that contains three columns:
    1. the index associated with that row'
    2. the number of objects for this particular dataset
    3. the number of people for this particular dataset
    These parameters are then used to generate a matrix of Uniform[0,1] r.v.s of whic is then massaged into
    a compatible .dat format for use in AMPL'''
    
    # create r.v. matrix of size len(people) by len(objects) then normalize
    ## normalization in this case takes the form of dividing rows by their respective sums
    grid = np.random.uniform(0,1,(sizes[1:]))
    normed = grid/np.sum(grid,axis=0)
    
    # create df with columns whose values correspond with the indice of the value from the original matrix
    df = pd.DataFrame(normed).unstack().reset_index()
    
    # add 1 to all variables so that the ranges start from 1
    df.iloc[:,0:2] = df.iloc[:,0:2] + 1
    
    # move columns for correct AMPL reading
    columns_titles = ['level_1', 'level_0', 0]
    df = df.reindex(columns=columns_titles)
    
    # get index of current row for use in file naming
    index = sizes['index']
    name = './data/generated/' +  str(index) +'.dat'
    [str(i) for i in range(1,500)]
    # generate the sets for people and objects given their size
    people = ' '.join([str(i) for i in range(1,sizes[0]+1)])
    objects = ' '.join([str(i) for i in range(1,sizes[1]+1)])
    
    # since np.savetxt will be used, bundle all other information in the header
    header = 'data; \nset people := %s; \n' % people
    header += 'set objects := %s; \n' % objects
    header += 'param numObjects := %d; \nparam numPeople := %d; \nparam v := ' % (sizes[1],sizes[0])
    
    # save df values space separated under header
    np.savetxt(name,df.values,fmt=['%i','%i', '%f'], header = header, comments = '')
    
    # add final semicolon and close file
    file = open(name, 'a')
    file.write('\n ;')
    file.close()

### Step 3: Define Set of Person/Object Combinations and Apply Generator

In [54]:
# define range of values to combine
## linear spacing is utilized until 160, then a log spacing is used since order of magnitude is more important
values = np.append(np.array([5,10,20,40,80,160]), np.geomspace(320, 5120, 14, dtype = int))

# create df from meshed value ranges.
## must take transpose of meshgrid and reshape in order for output to be .dat file ready
sizes = pd.DataFrame(np.array(np.meshgrid(values,values)).T.reshape(-1,2))

# remove any rows where there are more people than objects since min p will always be 1
sizes = sizes.loc[sizes[0] <= sizes[1],:].reset_index(drop=True).reset_index()

# add 1 to index so that row sets start at 1 and display resultant df
sizes.iloc[:,0] = sizes.iloc[:,0] + 1
display(sizes)

#apply generator function to remaining rows 
# sizes.apply(synthesize, axis = 1) # !!! line 17 is commmented out since the data already exists !!!

Unnamed: 0,index,0,1
0,1,5,5
1,2,5,10
2,3,5,20
3,4,5,40
4,5,5,80
...,...,...,...
205,206,3342,4136
206,207,3342,5120
207,208,4136,4136
208,209,4136,5120


0      None
1      None
2      None
3      None
4      None
       ... 
205    None
206    None
207    None
208    None
209    None
Length: 210, dtype: object

## Model Output Exploration and Analysis

### Step 1: Load in Data and View

In [55]:
df = pd.read_csv('./data/output/full.txt')
df

Unnamed: 0,p,solveTime,numObjects,numPeople,fileNumber
0,0.571220,0.358290,5,5,1
1,0.000000,0.444294,10,5,2
2,0.000000,0.505016,20,5,3
3,0.000000,0.529354,40,5,4
4,0.000000,1.712887,80,5,5
...,...,...,...,...,...
60,0.231558,4715.530548,319,40,61
61,0.068670,4721.144692,396,40,62
62,0.097490,4722.984810,490,40,63
63,1.000000,4724.100935,606,40,64
