# Import Libraries
#### Standard

In [1]:
import os
import numpy as np
import pandas as pd

#### Visualization

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

#### Visualization Setting

In [3]:
color_palette = 'Set3'
title_size = 20
sns.set_style("whitegrid")

In [4]:
pd.set_option('display.max_rows', 5, 'display.max_columns', None)

# Import Data
Data Preprocessing 단계에서 생성된 통합 데이터 테이블을 불러온다

In [101]:
root = os.path.join(os.getcwd(), 'DATA')

df_raw = pd.read_csv(os.path.join(root, 'raw_data.csv'), index_col = 0)
df_project = pd.read_csv(os.path.join(root, 'project_data.csv'), index_col = 0)

code_jeom = pd.read_csv(os.path.join(root, 'jeom_code.csv'), index_col = 0)
code_jikwhi = pd.read_csv(os.path.join(root, 'jikwhi_code.csv'), index_col = 0)
code_jkmu = pd.read_csv(os.path.join(root, 'jkmu_code.csv'), index_col = 0)

직원 데이터는 지속적으로 참고해야 한다

In [17]:
df_jikwon = pd.read_csv(os.path.join(root, 'jikwon.csv'), encoding='cp949')

----

# Drop Data
2020년 이후로 프로그램을 신규/수정한 적 없는 직원들이 존재한다

In [102]:
df_raw[df_raw['신청번호'].isnull()].sort_values(['HOBONG'])

Unnamed: 0,JIKWON_NO,JEOM_NO,JIKGUN,JIKGEUB,JIKWHI,JIKWHI2,JUJKMU_C,JUJKMU_RATE,BUJKMU_C,BUJKMU_RATE,JUMJANG_G,HOBONG,BUIM_ILJA,시스템구분,COL02,COL03,프로그램종류,프로그램명,프로젝트번호,변경일자,변경구분,신청번호,경로
83715,21501238,507,20040,99999,20019,9,,,,,1,30,20210101,,,,,,,,,,
291153,6150020,69,20004,1,20023,1,10895.0,100.0,,0.0,1,980,20200203,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72930,21502323,61,30000,0,20425,9,,,,,0,99998,20210318,,,,,,,,,,
369698,10618082,61,40002,99999,20456,9,10961.0,100.0,,0.0,0,99998,20200203,,,,,,,,,,


이들의 직위를 확인해보면..

In [103]:
with pd.option_context('display.max_rows', df_raw.shape[0]+1):
    display(pd.merge(left=df_raw[df_raw['프로그램명'].isnull()][['JIKWON_NO', 'JIKWHI']], right=code_jikwhi, left_on='JIKWHI', right_on='JIKWHI').groupby(['JIKWHI', 'JIKWHI_NAME']).count())

Unnamed: 0_level_0,Unnamed: 1_level_0,JIKWON_NO
JIKWHI,JIKWHI_NAME,Unnamed: 2_level_1
20019,본부장,1
20023,부장,6
20112,차장,18
20133,부부장,39
20161,과장,7
20164,대리,4
20258,행원,26
20399,팀장(부서장대우),4
20425,전문계약직원,8
20456,사무직원,7


프로그램 변경을 안한 직원 제외

In [107]:
print(df.shape, end="->")
df = df_raw.dropna(subset=['신청번호'])
print(df.shape)

(369919, 23)->(369919, 23)


# ㄱ
시스템구분에 따라 분류해보기

In [108]:
df[['시스템구분']].drop_duplicates().reset_index(drop=True)

Unnamed: 0,시스템구분
0,코어
1,단위
2,웹컨텐츠
3,프로프레임4.0


In [110]:
with pd.option_context('display.max_rows', df.shape[0]+1):
    display(df[['JIKWON_NO', '시스템구분', '신청번호']].groupby(['JIKWON_NO', '시스템구분']).count())

Unnamed: 0_level_0,Unnamed: 1_level_0,신청번호
JIKWON_NO,시스템구분,Unnamed: 2_level_1
6126995,단위,19
6126995,프로프레임4.0,10
6127258,프로프레임4.0,20
6128040,단위,91
6128040,웹컨텐츠,25
6129307,단위,35
6130399,코어,69
6130860,코어,175
6132790,프로프레임4.0,1
6133819,코어,946


In [118]:
df[['프로그램종류']].drop_duplicates().reset_index(drop=True)

Unnamed: 0,프로그램종류
0,온라인서비스
1,채널화면_txt
...,...
72,웹_C관련
73,웹_xadl(소스)


In [119]:
with pd.option_context('display.max_rows', df.shape[0]+1):
    display(df[['JIKWON_NO', '프로그램종류', '신청번호']].groupby(['JIKWON_NO', '프로그램종류']).count())

Unnamed: 0_level_0,Unnamed: 1_level_0,신청번호
JIKWON_NO,프로그램종류,Unnamed: 2_level_1
6126995,.net(dll),1
6126995,DBIO,4
6126995,HTML5_xml,6
6126995,Header FILE,1
6126995,SHELL,6
6126995,실행모듈(서버),2
6126995,온라인서비스,2
6126995,프로프레임소스(.c),6
6126995,화면_VB,1
6127258,DBIO,2


In [117]:
df.head(1)

Unnamed: 0,JIKWON_NO,JEOM_NO,JIKGUN,JIKGEUB,JIKWHI,JIKWHI2,JUJKMU_C,JUJKMU_RATE,BUJKMU_C,BUJKMU_RATE,JUMJANG_G,HOBONG,BUIM_ILJA,시스템구분,COL02,COL03,프로그램종류,프로그램명,프로젝트번호,변경일자,변경구분,신청번호,경로
0,20101491,63,20004,5,20164,3,11609.0,100.0,,0.0,0,43440,20210201,코어,External Channel,펌뱅킹,온라인서비스,sfis2100d.c,P2021-00342-01,2021-01-20,수정,2021-04-002421,/nbsdev/fib/fis/src


In [115]:
df_jikwon[df_jikwon['JIKWON_NO'] == 21100179]

Unnamed: 0,JIKWON_NO,NAME,JEOM_NO,JEOM_NAME,JIKGUN,JIKGEUB,JIKWHI,JIKWHI_NAME,JUMINNO,JIKWHI2,JUJKMU_C,JUJKMU_NM,JUJKMU_RATE,BUJKMU_C,BUJKMU_NM,BUJKMU_RATE,JUMJANG_G,HOBONG,TOIJIKGBN,BUIM_ILJA,LST_CHG_ILSI,SILNO_CID,CUSNO,SAME_MN_NO
10236,21100179,박현묵,62,정보개발부,20004,5,20258,행원,,3,11227.0,BDA (Big Data Architect),100.0,,,0.0,0,48315,0,20210111,20210120000000.0,bfEp2ytjPemjI,638584070,638584070
