In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow
import torch

cuda = torch.device('cuda:1')
cuda

deathWoman = np.array(pd.read_excel('../../data/여성사망률추이.xlsx').set_index(['연도']).iloc[:43,:100])
deathMan = np.array(pd.read_excel('../../data/남성사망률추이.xlsx').set_index(['연도']).iloc[:43,:100])
deathRate = np.array([deathWoman, deathMan])
dataOriDf = pd.read_excel('../../data/데이터정리3.xlsx')
ageDataOriDf = pd.read_excel('../../data/의사연령별분포.xlsx')
npAgeData = np.array(pd.read_excel('../../data/의사연령별분포.xlsx'))

#1977년 합격자부터 2019년 합격자
dataDf = dataOriDf[['년도','의대졸합격/남', '의대졸합격/여', '의전졸합격/남', '의전졸합격/여', '불합합격/남', '불합합격/여']]
dataDf = dataDf[27:70]

npData = np.around(np.array(dataDf)[:,1:7])
npData

tcDeathWoman = torch.from_numpy(deathWoman).to(device=cuda)
tcDeathMan = torch.from_numpy(deathMan).to(device=cuda)
tcNpData = torch.from_numpy(npData).to(device=cuda)
tcNpAgeData = torch.from_numpy(npAgeData).to(device=cuda)

In [31]:
pd.set_option('display.max_row', 1200)
pd.set_option('display.max_columns', 100)

# 연령 분류 모델

In [32]:
# 분류모델함수 생성
# x - x값, sp - 시작점, xl - x축 조정, yl - y축 조정
dataArray = np.zeros(100)


In [33]:
def clusterAgeModel(rate,st,end):
    for i in range(st,end):
        dataArray[i] = (rate**(i-end))-1         
    sumValue = np.sum(dataArray)
    rateData = dataArray / sumValue
    return rateData

In [34]:
%time clusterAgeModel(0.7,10,40)

CPU times: user 128 µs, sys: 0 ns, total: 128 µs
Wall time: 136 µs


array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 3.00060870e-01, 2.10040580e-01,
       1.47026377e-01, 1.02916435e-01, 7.20394754e-02, 5.04256038e-02,
       3.52958937e-02, 2.47050966e-02, 1.72915386e-02, 1.21020480e-02,
       8.46940462e-03, 5.92655423e-03, 4.14655897e-03, 2.90056228e-03,
       2.02836460e-03, 1.41782622e-03, 9.90449356e-04, 6.91285551e-04,
       4.81870888e-04, 3.35280624e-04, 2.32667439e-04, 1.60838209e-04,
       1.10557748e-04, 7.53614259e-05, 5.07240002e-05, 3.34778022e-05,
       2.14054636e-05, 1.29548266e-05, 7.03938064e-06, 2.89856850e-06,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
      

# 사망률 적용

In [35]:
def makeAlivePerson(data, deathRate):

    resultData = data - torch.round(data*deathRate)
    
    return resultData

# 은퇴율 적용

In [36]:
def makeWorkPerson(data,tuningSet):
    valueList = torch.zeros([2,100]).to(device=cuda)
    c1 = tuningSet[2][0]/((tuningSet[0][0]**(100-tuningSet[1][0]))-1)
    c2 = tuningSet[2][1]/((tuningSet[0][1]**(100-tuningSet[1][1]))-1)
    for i in range(100):
        result1 = ((tuningSet[0][0]**(i-tuningSet[1][0]))-1)*c1
        result2 = ((tuningSet[0][1]**(i-tuningSet[1][1]))-1)*c2
        valueList[0][i] = 0 if result1<0 else 1 if result1>1 else  result1
        valueList[1][i] = 0 if result2<0 else 1 if result2>1 else  result2

    result = data - torch.round(data*valueList)
    
    return result

# 출신 연령별 분류모델

In [37]:
def makeArrayUseModel(tuningList):
    resultData = torch.zeros([6,100]).to(device=cuda)
    resultData[0] = clusterAgeModel(tuningList[0][0], tuningList[1][0], tuningList[2][0]) # 의대 남
    resultData[1] = clusterAgeModel(tuningList[0][1], tuningList[1][1], tuningList[2][1]) # 의대 여
    resultData[2] = clusterAgeModel(tuningList[0][2], tuningList[1][2], tuningList[2][2]) # 의전원 남
    resultData[3] = clusterAgeModel(tuningList[0][3], tuningList[1][3], tuningList[2][3]) # 의전원 여
    resultData[4] = clusterAgeModel(tuningList[0][4], tuningList[1][4], tuningList[2][4]) # 재시험 남
    resultData[5] = clusterAgeModel(tuningList[0][5], tuningList[1][5], tuningList[2][5]) # 재시험 여

    return resultData

# 신규인원 생성

In [38]:
def makeNewPerson(data, tuningSet):
    oldSize = 100
    yearSize = 43
    modelSize = 6
    
    #모델 적용 배열
    applyModelArray = torch.zeros([yearSize,modelSize,oldSize]).to(device=cuda)

    #신규인원 배열
    newPersonArray = torch.zeros([yearSize,2,oldSize]).to(device=cuda)

    
    modelAry = makeArrayUseModel(tuningSet)
    
    for i in range(yearSize):
        for j in range(modelSize):
            applyModelArray[i][j] = torch.round(modelAry[j]*data[i][j])

            if j%2==0:
                newPersonArray[i][0] += applyModelArray[i][j]
            else:
                newPersonArray[i][1] += applyModelArray[i][j]
    
    resultData = newPersonArray
    
    return resultData

In [39]:
tuningSetAgeRate = [[0.9,0.6,0.1,0.2,0.9,0.6],[26,26,28,28,27,27],[39, 39, 39, 39, 39, 39]]
newPerson = makeNewPerson(tcNpData, tuningSetAgeRate)
%time newPerson[0]

TypeError: can't assign a numpy.ndarray to a torch.cuda.FloatTensor

# 최종인원 도출

In [40]:
def shiftOld(data):
    dataArray = torch.zeros([2,100]).to(device=cuda)
    dataArray[0] = torch.roll(data[0], 1)  
    dataArray[1] = torch.roll(data[1], 1)
    dataArray[0][0] = 0
    dataArray[1][0] = 0
    
    return dataArray

def makeResultPersonArray(newPersonArray, tuningSet):
    #누적인원 배열
    yearSize = 43
    resultPersonArray = torch.zeros([43,2,100]).to(device=cuda)

    for i in range(yearSize):
        if i>0:
            # 1살 올리기 / shiftData -> 2 x 100
            shiftData = shiftOld(resultPersonArray[i-1])
            # 사망률 적용 / aliveData -> 2 x 100
            aliveData = makeAlivePerson(shiftData, i)
            #은퇴율 적용 / workData -> 2 x 100
            workData = makeWorkPerson(aliveData, tuningSet)
            # 최종 계산
            resultPersonArray[i] =  workData + newPersonArray[i]
        else:
            resultPersonArray[i] =  newPersonArray[i]
            
            
    return resultPersonArray

# 연령별 합산

In [41]:
def sumPeopleUseAge(data):
    yearSize = 43
    
    resultData = torch.zeros([yearSize, 3, 12]).to(device=cuda)
    
    for i in range(yearSize):
        for j in range(2):
            # 남/여 소계 계산
            resultData[i][j][0] = torch.sum(data[i][j])
            resultData[i][j][1] = torch.sum(data[i][j][0:30])
            resultData[i][j][2] = torch.sum(data[i][j][30:40])
            resultData[i][j][3] = torch.sum(data[i][j][40:50])
            resultData[i][j][4] = torch.sum(data[i][j][50:60])
            resultData[i][j][5] = torch.sum(data[i][j][60:])
            
            # 남/여 비율 계산
            for n in range(6):
                resultData[i][j][6+n] = resultData[i][j][n]/resultData[i][2][0] if resultData[i][2][0] > 0 else 0
        
        for j in range(6):
            # 합계 계산
            resultData[i][2][j] = resultData[i][0][j] + resultData[i][1][j]  
            # 합계 비율 계산
            resultData[i][2][6+j] = resultData[i][2][j]/resultData[i][2][0] if resultData[i][2][0] > 0 else 0
            # 남/여 비율 계산
            resultData[i][0][6+j] = resultData[i][0][j]/resultData[i][2][0] if resultData[i][2][0] > 0 else 0
            resultData[i][1][6+j] = resultData[i][1][j]/resultData[i][2][0] if resultData[i][2][0] > 0 else 0


    return resultData

# 코스트 계산

In [42]:
def calculateCost(data):   
    resultData = torch.zeros([6,5,5]).to(device=cuda)

    for i in range(6):
        resultData[i][0] = data[39-i][0][1:6] - tcNpAgeData[5-i][8:13] # manNumCost
        resultData[i][1] = data[39-i][1][1:6] - tcNpAgeData[5-i][14:19] # womanNumCost
        resultData[i][2] = data[39-i][2][1:6] - tcNpAgeData[5-i][2:7] # totalNumCost
        
        resultData[i][3] = data[39-i][0][7:12] - tcNpAgeData[5-i][26:31] # manRateCost
        resultData[i][4] = data[39-i][1][7:12] - tcNpAgeData[5-i][32:37] # womanRateCost

    return resultData

In [43]:
def getCost(npData, tuningSet):
    newPerson = makeNewPerson(npData, tuningSet[0])
    resultPerson = makeResultPersonArray(newPerson,tuningSet[1])
    # sumData -> 43 x 3 x 12
    sumData = sumPeopleUseAge(resultPerson)
    costData = calculateCost(sumData)
    
    return [sumData, costData]

In [45]:
tuningSetAgeRate = [[0.9,0.6,0.1,0.2,0.9,0.6],[25,25,27,27,26,26],[39, 39, 39, 39, 39, 39]]
tuningSetRetireRate = [[1.1, 1.1],[30, 30],[0.8, 0.8]]
tuningSet = [tuningSetAgeRate, tuningSetRetireRate]

%time data = getCost(tcNpData,tuningSet)

TypeError: can't assign a numpy.ndarray to a torch.cuda.FloatTensor

In [51]:
np.shape(data)
data[1][0]

array([[-4.67000000e+02,  2.37800000e+03,  5.30000000e+01,
         3.89200000e+03,  2.40000000e+01],
       [-8.04000000e+02,  1.43000000e+03,  1.59900000e+03,
         1.77800000e+03,  3.79000000e+02],
       [-1.27100000e+03,  3.80800000e+03,  1.65200000e+03,
         5.67000000e+03,  4.03000000e+02],
       [-9.67193538e-03,  2.44147423e-03, -2.19755300e-02,
         1.98828362e-02, -8.42710549e-03],
       [-1.10848089e-02,  2.53161360e-03,  9.49110965e-03,
         1.41859436e-02,  2.62640249e-03]])

In [119]:
dfResultCost = pd.DataFrame(data[1][0])
# dfResultCost.columns = ["남성오차", "여성오차", "남여총계오차","남비율오차", "여비율오차", "비율총계오차"]
# dfResultCost.index = ["2011", "2012", "2013","2014", "2015", "2016", "합계"]
dfResultCost

Unnamed: 0,0,1,2,3,4
0,-670.0,-868.0,-530.0,3033.0,479.0
1,399.0,986.0,1131.0,1463.0,172.0
2,-271.0,118.0,601.0,4496.0,651.0
3,-0.009596,-0.019876,-0.018022,0.020289,-0.000227
4,0.001907,0.003471,0.007959,0.012939,0.001157


In [120]:
dfResult = pd.DataFrame(data[0][0])
indexList = []
indexList.append('1977 남성')
indexList.append('1977 여성')
indexList.append('1977 합계')
    
for i in range(42):
    dfResult = pd.concat([dfResult,pd.DataFrame(data[0][i+1])])
    indexList.append(str(i+1978)+' 남성')
    indexList.append(str(i+1978)+' 여성')
    indexList.append(str(i+1978)+' 합계')

dfResult.columns = ['합계', '0~29', '30~39', '40~49', '50~59', '60~', '합계/비율', '0~29/비율','30~39/비율', '40~49/비율', '50~59/비율', '60~']
dfResult.index = indexList
dfResult

Unnamed: 0,합계,0~29,30~39,40~49,50~59,60~,합계/비율,0~29/비율,30~39/비율,40~49/비율,50~59/비율,60~.1
1977 남성,1028.0,665.0,363.0,0.0,0.0,0.0,0.858097,0.555092,0.303005,0.0,0.0,0.0
1977 여성,170.0,156.0,14.0,0.0,0.0,0.0,0.141903,0.130217,0.011686,0.0,0.0,0.0
1977 합계,1198.0,821.0,377.0,0.0,0.0,0.0,1.0,0.685309,0.314691,0.0,0.0,0.0
1978 남성,2061.0,1237.0,824.0,0.0,0.0,0.0,0.859825,0.516062,0.343763,0.0,0.0,0.0
1978 여성,336.0,300.0,36.0,0.0,0.0,0.0,0.140175,0.125156,0.015019,0.0,0.0,0.0
1978 합계,2397.0,1537.0,860.0,0.0,0.0,0.0,1.0,0.641218,0.358782,0.0,0.0,0.0
1979 남성,3104.0,1699.0,1399.0,6.0,0.0,0.0,0.859596,0.470507,0.387427,0.001662,0.0,0.0
1979 여성,507.0,434.0,73.0,0.0,0.0,0.0,0.140404,0.120188,0.020216,0.0,0.0,0.0
1979 합계,3611.0,2133.0,1472.0,6.0,0.0,0.0,1.0,0.590695,0.407643,0.001662,0.0,0.0
1980 남성,4221.0,2076.0,2122.0,23.0,0.0,0.0,0.857927,0.421951,0.431301,0.004675,0.0,0.0


In [220]:
len(dfResult)

129

In [221]:
npAgeData = np.array(pd.read_excel('../../data/의사연령별분포.xlsx'))
pd.DataFrame(npAgeData).set_index(0)

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
2011.0,82685.0,839.0,23488.0,27826.0,19466.0,11066.0,64371.0,561.0,14723.0,22244.0,16881.0,9962.0,18314.0,278.0,8765.0,5582.0,2585.0,1104.0,1.0,0.010147,0.284066,0.33653,0.235424,0.133833,0.778509,0.006785,0.178061,0.269021,0.20416,0.120481,0.221491,0.003362,0.106005,0.067509,0.031263,0.013352
2012.0,85073.0,1963.0,25100.0,27820.0,19381.0,10809.0,65989.0,1356.0,15836.0,22270.0,16796.0,9731.0,19084.0,607.0,9264.0,5550.0,2585.0,1078.0,1.0,0.023074,0.295041,0.327013,0.227816,0.127056,0.775675,0.015939,0.186146,0.261775,0.19743,0.114384,0.224325,0.007135,0.108895,0.065238,0.030386,0.012671
2013.0,88945.0,3459.0,27682.0,27933.0,19357.0,10514.0,68330.0,2297.0,17412.0,22366.0,16775.0,9480.0,20615.0,1162.0,10270.0,5567.0,2582.0,1034.0,1.0,0.038889,0.311226,0.314048,0.217629,0.118208,0.768228,0.025825,0.195761,0.251459,0.1886,0.106583,0.231772,0.013064,0.115465,0.062589,0.029029,0.011625
2014.0,91197.0,5078.0,28738.0,27892.0,19258.0,10231.0,69677.0,3258.0,18114.0,22374.0,16695.0,9236.0,21520.0,1820.0,10624.0,5518.0,2563.0,995.0,1.0,0.055682,0.31512,0.305843,0.211169,0.112186,0.764027,0.035725,0.198625,0.245337,0.183065,0.101275,0.235973,0.019957,0.116495,0.060506,0.028104,0.01091
2015.0,93484.0,6955.0,29528.0,27928.0,19203.0,9870.0,71066.0,4328.0,18808.0,22359.0,16656.0,8915.0,22418.0,2627.0,10720.0,5569.0,2547.0,955.0,1.0,0.074398,0.315862,0.298746,0.205415,0.10558,0.760194,0.046297,0.20119,0.239175,0.17817,0.095364,0.239806,0.028101,0.114672,0.059572,0.027245,0.010216
2016.0,96180.0,8795.0,30729.0,27938.0,19154.0,9564.0,72818.0,5272.0,19852.0,22420.0,16642.0,8632.0,23362.0,3523.0,10877.0,5518.0,2512.0,932.0,1.0,0.091443,0.319495,0.290476,0.199147,0.099439,0.757101,0.054814,0.206405,0.233105,0.17303,0.089748,0.242899,0.036629,0.11309,0.057372,0.026118,0.00969


In [222]:
npData = np.around(np.array(dataDf)[:,1:7])
npData

tuningSetAgeRate = [[0.7,0.7,0.9,0.9,0.9,0.9],[26,26,28,28,27,27],[40, 40, 40, 40, 40, 40]]
tuningSetRetireRate = [[1.1, 1.1],[25, 25],[1, 1]]
tuningSet = [tuningSetAgeRate, tuningSetRetireRate]
print(tuningSet)
data = getCost(npData,tuningSet)



tuningList = range(0.1, 1.0, 0.1)



def makeTuningSet(tuningSetAgeRate):
    
    
    return 





[[[0.7, 0.7, 0.9, 0.9, 0.9, 0.9], [26, 26, 28, 28, 27, 27], [40, 40, 40, 40, 40, 40]], [[1.1, 1.1], [25, 25], [1, 1]]]


TypeError: 'float' object cannot be interpreted as an integer

In [86]:
def test():
    npData = np.around(np.array(dataDf)[:,1:7])
    npData

    meanCost = np.array([])
    meanParameter = np.array([])
    resultData = []

    for h in np.arange(0.1,1,0.1):
        for i in np.arange(0.1,1,0.1):
            for j in np.arange(0.1,1,0.1):
                for k in np.arange(0.1,1,0.1):
                    for m in np.arange(30,40,1):
                        tuningSetAgeRate = [[h,i,j,k,h,i],[26,26,28,28,27,27],[m, m, m, m, m, m]]
                        tuningSetRetireRate = [[0,0],[0, 0],[0, 0]]
                        tuningSet = [tuningSetAgeRate, tuningSetRetireRate]
                        data = getCost(npData,tuningSet)
                        resultData.append([[h,i,j,k,m ] ,data[1]])
    return resultData
                
%time resultData = np.array(test())

CPU times: user 22min 9s, sys: 2.56 s, total: 22min 12s
Wall time: 22min 13s


TypeError: Mismatch between array dtype ('object') and format specifier ('%f,%f')

In [89]:
saveData = resultData

In [104]:
dfResultCost = pd.DataFrame(saveData[0][1][0])
dfResultCost.columns = ["20대", "30대", "40대","50대", "60~"]
dfResultCost.index = ["남", "여", "남여","남비율", "여비율"]
dfResultCost

Unnamed: 0,20대,30대,40대,50대,60~
남,574.0,3746.0,223.0,3087.0,-1725.0
여,-174.0,2025.0,1030.0,1463.0,70.0
남여,400.0,5771.0,1253.0,4550.0,-1655.0
남비율,7.9e-05,0.015175,-0.020492,0.012221,-0.024893
여비율,-0.005183,0.008057,0.004113,0.011207,-0.000282


In [111]:
cost = 0
cost += np.sum(np.abs(saveData[0][1][0][3:5,:]))
cost += np.sum(np.abs(saveData[0][1][1][3:5,:]))
cost += np.sum(np.abs(saveData[0][1][2][3:5,:]))
cost += np.sum(np.abs(saveData[0][1][3][3:5,:]))
cost += np.sum(np.abs(saveData[0][1][4][3:5,:]))
cost += np.sum(np.abs(saveData[0][1][5][3:5,:]))
cost

1.3705501773291702

array([[[ 4.84600000e+03,  5.93200000e+03,  4.05000000e+02,
         -2.64000000e+02, -6.14600000e+03],
        [ 2.53700000e+03,  9.33000000e+02,  2.20000000e+02,
          1.51000000e+02, -4.90000000e+02],
        [ 7.38300000e+03,  6.86500000e+03,  6.25000000e+02,
         -1.13000000e+02, -6.63600000e+03],
        [ 4.77618820e-02,  4.47272989e-02, -1.68730596e-02,
         -1.85041974e-02, -7.22360178e-02],
        [ 2.50426604e-02, -5.18653299e-05, -2.97185497e-03,
         -8.73945548e-04, -6.02090073e-03]],

       [[ 6.37600000e+03,  7.28800000e+03,  7.54000000e+02,
         -1.65200000e+03, -7.45900000e+03],
        [ 3.35900000e+03,  1.12000000e+03,  1.46000000e+02,
         -2.62000000e+02, -7.07000000e+02],
        [ 9.73500000e+03,  8.40800000e+03,  9.00000000e+02,
         -1.91400000e+03, -8.16600000e+03],
        [ 6.62845770e-02,  5.97598075e-02, -1.69327805e-02,
         -3.63857358e-02, -9.02231575e-02],
        [ 3.50402884e-02,  1.53108043e-03, -4.66554296e-03,
  

In [236]:
npData = np.around(np.array(dataDf)[:,1:7])
npData

meanCost = np.array([])
meanParameter = np.array([])

In [94]:
range(len(resultData))

range(0, 65610)

In [113]:
minCost = [1000000,[]]
for i in range(65610):
    cost = 0
    cost += np.sum(np.abs(saveData[i][1][0][3:5,:]))
    cost += np.sum(np.abs(saveData[i][1][1][3:5,:]))
    cost += np.sum(np.abs(saveData[i][1][2][3:5,:]))
    cost += np.sum(np.abs(saveData[i][1][3][3:5,:]))
    cost += np.sum(np.abs(saveData[i][1][4][3:5,:]))
    cost += np.sum(np.abs(saveData[i][1][5][3:5,:]))
    cost

    if cost<minCost[0]:
        minCost = [cost,resultData[i][0]]
        
minCost

[0.839513195479443, [0.9, 0.6, 0.1, 0.2, 39]]

[66763.0, [0.9, 0.30000000000000004, 1.1, 0.7000000000000001]]
[66763.0, [0.9, 0.30000000000000004, 1.1, 1.1]]
[70727.0, [0.9, 0.9, 1.1, 0.9]]

[65443.0, [0.9, 0.9, 1.1, 0.30000000000000004]]
[65211.0, [0.9, 0.1, 1.1, 0.7999999999999999]]