## Inits

+ Modules for LP/IP:

In [1]:
import cplex
from cplex import Cplex
from cplex.exceptions import CplexError

+ Global vars:

In [14]:
CLEAN_DIR = '~/data/venture=Zalora/sg/clean/'
RES_DIR = '~/projects/daas-markdown/res/'
GROUP_DIR = CLEAN_DIR + 'groups/'
FEAT_DIR = GROUP_DIR + 'feats/'
# TODAY = pd.datetime.today().date()

## IP formulation

### Input, Output

Given a date $d$, input & output of the `price optimizer` module are follows:

__Input__:

+ the set of SKU configs whose prices on $d$ need to be optimized
+ the set of possible prices for the configs
+ inventory constraints on $d$, i.e. the number of remaining units in inventory of each config on $d$

__Output__: the list of optimal prices for the configs

### Math formulation of the problem

\begin{align}
	\label{IP_s}
	\max & \sum_{i \in I_o} \sum_{j \in J} p_j D_{i, j, s} x_{i, j} \\
	s.t. & \sum_{j \in J} x_{i, j} = 1, ~\forall i \in I_o  \\
	 	 & \sum_{i \in I_o} \sum_{j \in J} p_j x_{i, j} = s  \\
		 & x_{i, j} \in \{0, 1 \} \\
		 & D_{i, j, s} \le n_i 
\end{align}

### Components of the IP

+ objective function
+ single-price constraints
+ sum constraints
+ binary type constraints
+ inventory constraints

# Helpers to populate LP

## Variable setters

In [2]:
def set_var_type(prob, binary=True):
    var_names = prob.variables.get_names()
    if binary:
        types = [prob.variables.type.binary]*len(var_names)
    else:
        types = [prob.variables.type.continuous]*len(var_names)
        
    prob.variables.set_types(list(zip(var_names, types)))
    print('+ set variable types as {}'.format(prob.variables.get_types()))
    return prob

## Constraint setters

In [21]:
def mk_var_names(configs, prices):
    
    n_config, n_price = len(configs), len(prices)
    var_names = []
    config_idx, price_idx = np.arange(n_config) + 1, np.arange(n_price) + 1
    
    for i in config_idx:
        prefix = 'x_{}'.format(i)
        row = [prefix + str(j) for j in price_idx]
        var_names += [row]
        
    return np.array(var_names)

def mk_opc_lhs(configs, prices, var_names):
    lhs = []
    coefs = np.ones(len(prices))
    
    for i, cf in enumerate(configs):
        count = i+1
        if count % 100 == 0:
            print('\t created one-price lhs for {} config ...'.format(count))
        row = cplex.SparsePair(ind=var_names[i], val=coefs)
        lhs += [row]
        
    return lhs

In [4]:
def add_one_price_constraints(prob, configs, prices, var_names): # one per config
    
    n_config, n_price = len(configs), len(prices)
    n_var = n_config * n_price
    lhs = mk_opc_lhs(configs, prices, var_names)
    
    my_sense = ['E'] * n_config
    my_rhs = np.ones(n_config)
    my_rownames = ['opc_config_{}'.format(i) for i in range(n_config)]
    
    prob.linear_constraints.add(lin_expr=lhs, senses=my_sense, rhs=my_rhs, names=my_rownames)
    
    print('+ added {} one-price constraints for {} configs'.format(n_config, n_config))
    return prob

In [5]:
def add_sum_constraints(prob, s, prices, configs, var_names):
    
    coefs = np.array([prices] * len(configs))
    lhs = cplex.SparsePair(ind=var_names.ravel(), val=coefs.ravel())
    
    prob.linear_constraints.add(lin_expr=[lhs], senses=['E'], rhs=[s], names=['sum_constraint'])
    print('+ added sum constraint')
    return prob

__Note:__ inventory constraints are non-linear, stochastic ones. Thus, they will be added in later version.

In [7]:
def add_inventory_constraints(prob, predicted_demands, inv_amount):
    print('+ added inventory constraints')
    pass

In [8]:
def add_bounds(prob, lb, ub):
    var_names = prob.variables.get_names()
    n_var = len(var_names)
    lower_bounds, upper_bounds = [lb]*n_var, [ub]*n_var
    prob.variables.set_lower_bounds(list(zip(var_names, lower_bounds)))
    prob.variables.set_upper_bounds(list(zip(var_names, upper_bounds)))
    print('+ added bounds')
    return prob

## Objective function setters

### Train a sales predictor
The trainning should be done offline.

In [None]:
demand_predictor = DemandPredictor()

In [None]:
feat_mat = input_loader.form_feat_mat(group=gname, global_X=global_X)
ready_df = input_loader.attach_response(feat_mat, my_trigger.sale_hist)
train_df, test_df = data_splitter.split_train_test(ready_df)

In [None]:
best_xgb = demand_predictor.fit_xgb(train_df)
predictor = best_xgb.best_estimator_

### Predict sales associated with a value of total price

Given the trained predictor, we now can perform the sales prediction needed by PO.

__Context__: Consider date $dd$. Let `group_size` be the number of configs in the group on $dd$ and $s_{out}$ be the total price of configs that we do not need to be optimized (thus the total price is fixed).

Given a config, possible prices and the fact that the prices sum to $s$, predict sales of the config at the given prices.

In [None]:
# context
group_size =  len(all_configs)
s_out = 1000 # hypothetic, replace by actual value later
mean_price = (s + s_out)/group_size
cols = ['sku_config', 'price', 'total_price', 'predicted_sale']

In [19]:
def predict_sales(cf, prices, predictor):
    print('config: {}'.format(cf))
    data_cols = feats + ['black_price']
    
    def predict_sale_for_price(p):
        rec = feat_mat.query('sku_config == "{}"'.format(cf))[data_cols].head(1)
        rec['date'] = dd

        # update price-relevant feats
        rec['current_price'] = p
        rec['discount_from_rrp'] = 100*(rec['black_price'] - rec['current_price'])/rec['black_price']
        rec['rel_price'] = rec['current_price']/mean_price
        return predictor.predict(rec[feats])[0]
    
    sales = [predict_sale_for_price(p) for p in prices]
    res = pd.DataFrame({'sku_config': cf, 'price': prices, 'total_price': s, 'predicted_sale': sales})
    return res[cols]

In [10]:
def cal_obj_coefs(df): # pred_sale_df
    coefs = np.array(df['price'] * df['predicted_sale'])
    return coefs

In [11]:
def add_obj(prob, configs, prices, s, dd):
    '''
    add obj functions & binary vars declaration
    '''
    frames = [predict_sales(cf, prices, predictor) for cf in configs]
    pred_sale_df = pd.concat(frames)
    
    obj_coefs = cal_obj_coefs(pred_sale_df)
    var_names = prob.variables.get_names()
    prob.objective.set_linear(list(zip(var_names, obj_coefs)))
    print('+ added objective function')
    return prob

## To get components

In [12]:
def get_constraints(prob):
    
    def pretty_lhs(spair):
        '''
        Given lhs of a constraint in `cplex.SparsePair` form, convert it to human readable format
        '''
        idx = spair.ind
        variables = prob.variables.get_names(idx)
        coefs = spair.val
        terms = [''.join([str(coefs[i]), v]) for i, v in enumerate(variables)]
        return ' + '.join(terms)
    
    lin_con = prob.linear_constraints
    names = lin_con.get_names()
    sparse_pairs = lin_con.get_rows()
    lhs = [pretty_lhs(spair) for spair in sparse_pairs]
    
    senses = lin_con.get_senses()
    rhs = lin_con.get_rhs()
    
    constraints = pd.DataFrame({'name': names, 'lhs': lhs, 'sense': senses, 'rhs': rhs})
    cols = ['name', 'lhs', 'sense', 'rhs']
    return constraints[cols]

In [20]:
def get_type(prob):
    type_code = prob.get_problem_type()
    return prob.problem_type[type_code]