In [3]:
import re
import sys
import random
import pandas as pd
import numpy as np
import warnings 
warnings.filterwarnings('ignore')
import statsmodels.api as sm
from statsmodels.formula.api import ols
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
from scipy import optimize
from gekko import GEKKO

def input_genre():
    genre = input('''공연이 해당하는 장르의 번호를 입력해주세요: 
                    1: 서양음악(오페라, 클래식 등)
                    2: 전통공연
                    3: 연극
                    4: 뮤지컬
                    5: 무용(발레, 현대무용 등)''')
    try:
        genre=int(genre)
    except:
        print('유효하지 않은 값입니다')
        input_genre()        
    if genre==1 : return 'western'
    elif genre==2 : return 'tradition'
    elif genre==3 : return 'acting'
    elif genre==4 : return 'musical'
    elif genre==5 : return 'dancing'
    else : 
        print('유효하지 않은 값입니다')
        input_genre()
        
def input_seat():
    min_seat = input('공연장에 필요한 **최소** 좌석수를 입력해주세요:')
    max_seat = input('공연장에 필요한 **최대** 좌석수를 입력해주세요:')
    
    try: 
        min_seat = int(min_seat)
        max_seat = int(max_seat)
        return min_seat, max_seat
    except:
        print('유효하지 않은 값입니다')
        input_seat()

# concatenate 2016, 2018, 2019 survey data
def combine(lst):
    first = True
    for col in lst:
        col_name = col
        col = eval(col)
        df1 = df_2016[col[2016]]
        df2 = df_2018[col[2018]]
        df2 = df2.rename({col[2018]:col[2016]})
        df3 = df_2019[col[2019]]
        df3 = df3.rename({col[2019]:col[2016]})

        #typ = df1[col[2016]].dtype
        #df2[col[2016]] = df2[col[2016]].astype(typ)
        #df3[col[2016]] = df2[col[2016]].astype(typ)

        temp = pd.concat([df1, df2, df3]).reset_index(drop=True)
        if first == True: df = pd.DataFrame(temp, columns=[col_name])
        else: df[col_name] = temp.values
        first = False
    return df


def get_coef(genre: str):  
    # commit logsitic regression
    obj = 'satisfy_' + genre
    temp = df[df[obj].isnull()==False].copy()
    temp = temp[temp['time']!='기타']
    temp['satisfaction'] = temp[obj].apply(lambda x: 1 if x>=5 else 0)
    temp = pd.get_dummies(temp, columns=['time', 'location'])
    
    drop_col = list(df.columns)
    drop_col.remove('time')
    drop_col.remove('location')
    temp = temp.drop(columns=drop_col)
    X = temp.drop(columns=['satisfaction'])
    y = temp['satisfaction']
    
    reg = LogisticRegression()
    reg.fit(X, y)
    cv_results = cross_validate(reg, X, y, cv=5)
    print('logistic regression accuracy -', genre, ':')
    print(cv_results['test_score'].mean())
    coef = reg.coef_
    
    return coef, X


# Optimization model of location and time with logistic regression model
def optimize(genre: str, coef, X, k=-1):    
    m = GEKKO(remote=False)
    m.options.SOLVER=1  # APOPT is an MINLP solver

    mod = sys.modules[__name__]
    for i in range(len(coef[0])):
        setattr(mod, f'x{i}', m.Var(value=0,lb=0,ub=1,integer=True))

    v1 = np.array([x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, 
                   x11, x12, x13, x14, x15, x16, x17, x18, x19, x20])        

    m.Equation(x0+x1+x2+x3==1) # time: weekday daytime / weekday dinner / weekend daytime / weekend dinner
    m.Equation(x4+x5+x6+x7+x8+x9+x10+x11+x12+x13+x14+x15+x16+x17+x18+x19+x20==1) # location: 
    m.Equation(-(m.exp(np.dot(v1, coef[0]))) / (1+m.exp(np.dot(v1, coef[0])))>k)
    m.Obj(-(m.exp(np.dot(v1, coef[0]))) / (1+m.exp(np.dot(v1, coef[0]))))
    m.solve(disp=False)

    
    obj = (m.options.objfcnval)*-1
    lst = []
    for i in range(len(X.columns)):
        if list(v1)[i][0] == 1:
            lst.append(X.columns[i])

    return lst[0], lst[1], obj

# select the optimal stage under the conditions            
def stage_select(genre, loc, pos, min_seat, max_seat, time):
    time = time[5:]
    loc = loc[9:]
    # location condition
    temp = stage[stage['지역']==loc].copy()
    # seat condition
    temp = temp[(temp['좌석수']>=min_seat) & (temp['좌석수']<=max_seat)]
    
    # genre condition
    if genre == 'western':
        temp = temp[(temp['오페라']==1) | (temp['클래식']==1)]
    elif genre == 'tradition':
        temp = temp[temp['국악']==1]
    elif genre == 'acting' :
        temp = temp[temp['연극']==1]
    elif genre == 'musical' :
        temp = temp[temp['뮤지컬']==1]
    elif genre == 'dancing' :
        temp = temp[temp['무용']==1]
    
    # 
    output = temp[['시설명', '공연홀', '좌석수', '시설특성', '주소']]
    output['시간'] = time
    output['예상 만족도 점수'] = pos
    
    rand = random.randrange(len(output))
    output = output.iloc[[rand]]
    return output


df = pd.read_csv('data/data_preprocessed.csv', index_col=0)
stage = pd.read_csv('data/stage_info.csv', index_col=0)

genre = input_genre()
min_seat, max_seat = input_seat()
coef, X = get_coef(genre)
k = -1
for i in range(5):
    time, loc, pos = optimize(genre, coef, X, k)
    if k==-1:
        output = stage_select(genre, loc, pos, min_seat, max_seat, time)
    else:
        output = pd.concat([output, stage_select(genre, loc, pos, min_seat, max_seat, time)])
    k = pos*-1 + 0.001
output

공연이 해당하는 장르의 번호를 입력해주세요: 
                    1: 서양음악(오페라, 클래식 등)
                    2: 전통공연
                    3: 연극
                    4: 뮤지컬
                    5: 무용(발레, 현대무용 등) 3
공연장에 필요한 **최소** 좌석수를 입력해주세요: 300
공연장에 필요한 **최대** 좌석수를 입력해주세요: 500


logistic regression accuracy - acting :
0.8932366954967306


Unnamed: 0,시설명,공연홀,좌석수,시설특성,주소,시간,예상 만족도 점수
332,해운대문화회관,해운홀,458,공공(문예회관),부산광역시 해운대구 양운로 97 (좌동),주말 낮 시간,0.720978
332,해운대문화회관,해운홀,458,공공(문예회관),부산광역시 해운대구 양운로 97 (좌동),주중 저녁 시간,0.705347
329,금정문화회관,소공연장,330,공공(문예회관),부산광역시 금정구 체육공원로 7 (구서동),주말 저녁 시간,0.691478
263,목포시민문화체육센터,소공연장,400,공공(문예회관),전라남도 목포시 부주로 312 (옥암동),주말 낮 시간,0.683802
502,예울마루,소극장,302,민간(대학로 외),전라남도 여수시 예울마루로 100 (시전동),주중 저녁 시간,0.66705


In [28]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['font.family'] = 'NanumGothic'