In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error
from statsmodels.tsa.ar_model import AR
from xgboost import XGBRegressor
from datetime import datetime
from datetime import timedelta
import common
%matplotlib inline

In [7]:
class ARWrapper():
    def __init__(self,silent=False):
        self.model = None
        self.model_fit = None
        self.silent = silent
    def fit(self,train):
        self.train_len = len(train)
        self.model = AR(train)
        self.model_fit = self.model.fit(ic='bic',maxiter=100)
        if(self.silent==False):
            print('Lag: %s' % self.model_fit.k_ar)
            print('Coefficients: %s' % self.model_fit.params)        
    def predict(self,test_len):
        pred = self.model_fit.predict(start=self.train_len,end=self.train_len+test_len-1,dynamic=False)
        return pred

my_ar = ARWrapper(silent=True)
my_ar.fit([1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4])
print(my_ar.predict(10))

[ 1.  2.  3.  4.  1.  2.  3.  4.  1.  2.]


In [10]:
def generate_demand_submission():
    hd = pd.read_csv('historical_demand.csv')
    hd['pid'] = hd.Product_ID.apply(lambda x:int(x[1:]))
    hd['rid'] = hd.Region.apply(lambda x:int(x[1:]))
    hd = hd.sort_values(['pid','rid'])
    #Predict for every product region pair
    tproduct=[]
    tregion=[]
    tmonth=[]
    tdemand=[]
    for pidx in range(1,82):
        product = 'P'+str(pidx)
        for ridx in range(1,19):
            region = 'R'+str(ridx)
            train_demand = hd[(hd.Product_ID==product) & (hd.Region==region)].Demand.values
            ar = ARWrapper(silent=True)
            ar.fit(train_demand)
            pred = ar.predict(3)
            tdemand.extend(list(pred))
            tproduct.extend([product]*3)
            tregion.extend([region]*3)
            tmonth.extend([37,38,39])
        print('.',end='')
    df = pd.DataFrame()
    df['Product_ID']=tproduct
    df['Region']=tregion
    df['Month']=tmonth
    df['Demand']=tdemand
    df = df.sort_values(['Product_ID','Region','Month'])
    df.to_csv('demand_forecast.csv',index=False)
    print("Done creating demand_forecast.csv")
generate_demand_submission()

  return np.log(self.sigma2) + (1 + self.df_model) * np.log(nobs)/nobs


.................................................................................Done creating demand_forecast.csv


In [87]:
def prune_low_quantities(minimum):
    dem_for = pd.read_csv('demand_forecast.csv')
    old_demand = dem_for.Demand.sum()
    monthly_demand = dem_for.groupby(['Product_ID','Month']).Demand.sum()
    low_monthly_demand = monthly_demand[(monthly_demand > 0) & (monthly_demand < minimum)]
    low_monthly_demand = pd.DataFrame(low_monthly_demand)
    low_monthly_demand.reset_index(inplace=True)
    for index,row in low_monthly_demand.iterrows():
        dem_for.loc[(dem_for.Product_ID==row.Product_ID) &(dem_for.Month==row.Month),'Demand']=0
    dem_for.to_csv('demand_forecast.csv',index=False)
    new_demand = dem_for.Demand.sum()
    print("Reduced demand from ",old_demand,"to",new_demand,(old_demand-new_demand)*100/old_demand,"%")
prune_low_quantities(150)    

Reduced demand from  65800.69799033992 to 60703.79053142524 7.745977800513521 %


In [89]:
def aggregate_low_demands(upper_limit):
    dem_for = pd.read_csv('demand_forecast.csv')
    pro_cap = pd.read_csv('production_capacity.csv')
    pro_cos = pd.read_csv('production_cost.csv')
    dem_pri = pd.read_csv('demand_price.csv')

    
    monthly_demand = dem_for.groupby(['Product_ID','Month']).Demand.sum()
    monthly_demand = monthly_demand.reset_index()
    max_cap = pro_cap.groupby('Product').Capacity.max()
    for product,capacity in max_cap.iteritems():
        monthly_demand.loc[monthly_demand.Product_ID==product,'Capacity']=capacity
    monthly_demand['percent_demand'] = monthly_demand.Demand/monthly_demand.Capacity
    low_demand_products = monthly_demand[(monthly_demand.percent_demand > 0) & (monthly_demand.percent_demand < upper_limit)]
    
    monthly_demand['LowProfit']=0
    demands = dem_for.groupby('Product_ID').Demand.sum()
    capacities = pro_cap.groupby('Product').Capacity.max()
    prod_costs = pro_cos.groupby('Product_ID').Production_cost.max()
    prices = dem_pri.groupby('Product_ID').Demand_price.max()
    for product in monthly_demand.Product_ID.unique():
        demand = demands[product]
        if demand == 0:
            monthly_demand.loc[monthly_demand.Product_ID==product,'PPD']=0
            monthly_demand.loc[monthly_demand.Product_ID==product,'LowProfit']=0            
            continue
        capacity = capacities[product]
        if capacity == 0:
            monthly_demand.loc[monthly_demand.Product_ID==product,'PPD']=0
            monthly_demand.loc[monthly_demand.Product_ID==product,'LowProfit']=0            
            continue
        if demand/capacity < 2:
            monthly_demand.loc[monthly_demand.Product_ID==product,'PPD']=0
            monthly_demand.loc[monthly_demand.Product_ID==product,'LowProfit']=0            
            continue
        prod_cost = prod_costs[product]
        price = prices[product]
        profit_per_day = ((price-prod_cost)*demand)/(demand/capacity)
        monthly_demand.loc[monthly_demand.Product_ID==product,'PPD']=round(profit_per_day)
#         if profit_per_day < 80000:
#             monthly_demand.loc[monthly_demand.Product_ID==product,'LowProfit']=1
#         else:
#             monthly_demand.loc[monthly_demand.Product_ID==product,'LowProfit']=0
        
    monthly_demand['Aggregated']=0
#     low_demand_product_ids=low_demand_products.Product_ID
#     for product in low_demand_product_ids.unique():
#         if len(low_demand_products[low_demand_products.Product_ID==product]==3):
#             mpd = monthly_demand[monthly_demand.Product_ID==product]
#             total_demand = (mpd[mpd.Month==37].Demand.values[0]+
#                         mpd[mpd.Month==38].Demand.values[0]+mpd[mpd.Month==39].Demand.values[0])
#             monthly_demand.loc[(monthly_demand.Product_ID==product)
#                                &(monthly_demand.Month==37),'Demand'] = total_demand
#             monthly_demand.loc[(monthly_demand.Product_ID==product)
#                                &(monthly_demand.Month==37),'Aggregated'] = 1            
#             monthly_demand.loc[(monthly_demand.Product_ID==product)
#                                &(monthly_demand.Month==38),'Demand'] = 0
#             monthly_demand.loc[(monthly_demand.Product_ID==product)
#                                &(monthly_demand.Month==39),'Demand'] = 0
    monthly_demand = monthly_demand.drop(['Capacity','percent_demand','PPD'],axis=1)
    monthly_demand.to_csv('modified_demand_forecast.csv',index=False)


aggregate_low_demands(0.75)