In [1]:
import pandas as pd

In [11]:
df_protocol = pd.read_csv('CT_protocol_data.csv')

In [14]:
df_protocol.columns

Index(['scan_protocol_no', 'scan_protocol_name', 'scan_series', 'Speed',
       'Type', 'Rows', 'HiRes', 'Shuttle', 'Tilt', 'SFOV', 'kV', 'SmartmA',
       'NoiseIndex', 'MinmA', 'MaxmA', 'Message', 'Timer', 'Light', 'CTDI',
       'CTDI NV', 'DLP', 'DLP NV', 'mA', ' pitch'],
      dtype='object')

In [15]:
drop_columns = ['Type', 'HiRes', 'Shuttle', 'Tilt', 'Message', 'Timer', 'Light', 'CTDI NV', 'DLP', 'DLP NV',]

In [31]:
df_protocol['scan_protocol'] = df_protocol.apply(lambda row: row['scan_protocol_no'] + row['scan_protocol_name'], axis=1)

In [32]:
df_protocol

Unnamed: 0,scan_protocol_no,scan_protocol_name,scan_series,Speed,Rows,SFOV,kV,SmartmA,NoiseIndex,MinmA,MaxmA,CTDI,mA,pitch,scan_protocol
0,1.5,QQ Brain-Head Routine TFI-H,Series 1,0.8,64,Adult Head,100,true,4.0,150.0,515.0,55.19,,,1.5 QQ Brain-Head Routine TFI-H
1,1.3,Dental non-Helical,,1.0,SmartCollimation,Adult Head,140,true,18.0,80.0,150.0,29.72,,,1.3 Dental non-Helical
2,1.4,QQ Brain non-Helical Routine,,1.0,256,Adult Head,100,true,4.0,200.0,720.0,66.38,,,1.4 QQ Brain non-Helical Routine
3,1.11,Head CTA 1Volume SmartPrep Routine,Series 2 Group 1 Scan Settings,1.0,SmartCollimation,Adult Head,100,true,3.2,200.0,720.0,55.26,,,1.11 Head CTA 1Volume SmartPrep Routine
4,1.11,Head CTA 1Volume SmartPrep Routine,Series 3 Group 1 Scan Settings,1.0,SmartCollimation,Adult Head,100,true,3.2,200.0,570.0,52.50,,,1.11 Head CTA 1Volume SmartPrep Routine
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
516,17.1,Child L-Spine Routine,Series 2 Group 1 Scan Settings,0.6,128,Small Body,100,true,11.6,150.0,700.0,8.46,,,17.1 Child L-Spine Routine
517,18.1,Child Pelvis Rotine,Series 2 Group 1 Scan Settings,0.6,128,Small Body,100,false,11.6,150.0,720.0,16.99,,,18.1 Child Pelvis Rotine
518,18.1,Child Pelvis Rotine,Series 3 Group 1 Scan Settings,0.6,128,Medium,100,true,11.6,150.0,720.0,20.04,,,18.1 Child Pelvis Rotine
519,19.1,Child Lower Extremity Heiical Routine,Series 2 Group 1 Scan Settings,0.6,128,Small Body,100,true,4.1,100.0,570.0,8.58,,,19.1 Child Lower Extremity Heiical Routine


In [42]:
def preprocess_import_data(df):
    """この関数は、読み込んだデータを整形して、列名を変更する関数です.
    
    params:
        df: 読み込んだDataFrame
        
    Return:
        df: データ整形後のDataFrame
    """
    df.drop('Unnamed: 0', axis=1, inplace=True)
    # 予測に不要な特徴量を削除する
    # 実施検査日、study_date, accessionno, 患者ID, プリセット名称は削除
    drop_list = ['実施検査日(YYYYMMDD)', 'study_date', 'ACCESSIONNO', '患者ID', 'プリセット名称', 'DLP']
    df.drop(drop_list, axis=True, inplace=True)
    
    # column名を変更する
    df.rename(columns={'検査時年齢': 'age', '性別': 'gender', '身長（ｃｍ）': 'height_cm', '体重（ｋｇ）': 'weight_kg',
                       '依頼科名称': 'department', '入院病棟名称': 'hospital_ward', '実施検査室名称': 'room', '撮影機種': 'modality', 
                       '部位名称': 'scan_area', '検査方法': 'scan_method'}, inplace=True)
    # 予測に使う装置
    df.query('modality == "Revolution"', inplace=True)
    
    # 現状ではroom, modalityは１つだけを想定しているので、dropする。
    df.drop(['room', 'modality'], axis=1, inplace=True)

    # hospital_wardのNaNは'外来'を意味する
    df.loc[df['hospital_ward'].isna(), 'hospital_ward'] = '外来'
    
    df.reset_index(inplace=True)
    df.drop(['index', 'kV', 'rotation_time', 'scan_series'], axis=1, inplace=True)
    

In [43]:
df = pd.concat([pd.read_excel('../scan_data/202109_all_scan_data.xlsx'), 
                pd.read_excel('../scan_data/202110_all_scan_data.xlsx'),
                pd.read_excel('../scan_data/202111_all_scan_data.xlsx'),
                pd.read_excel('../scan_data/202112_all_scan_data.xlsx')])

preprocess_import_data(df)
df.head(3)

Unnamed: 0,age,gender,height_cm,weight_kg,adult_child,department,hospital_ward,scan_area,scan_method,scan_protocol,mA,CTDI
0,72,M,170.0,83.0,成人,救急科,外来,胸部〜骨盤CT,造影,5.7 P+CE Chest-Pelvis Routine,366.41,16.64
1,72,M,170.0,83.0,成人,救急科,外来,胸部〜骨盤CT,造影,5.7 P+CE Chest-Pelvis Routine,366.41,16.61
2,85,M,171.0,58.9,成人,循環器内科,外来,胸部〜骨盤CT,単純,5.7 P+CE Chest-Pelvis Routine,234.59,16.66


In [44]:
pd.merge(df, df_protocol, how='inner', on='scan_protocol')

Unnamed: 0,age,gender,height_cm,weight_kg,adult_child,department,hospital_ward,scan_area,scan_method,scan_protocol,...,Rows,SFOV,kV,SmartmA,NoiseIndex,MinmA,MaxmA,CTDI_y,mA_y,pitch
0,91,F,150.0,40.00,成人,脳神経外科,外来,脳CT,単純,1.7 Brain Head Routine TFI-H,...,64,Adult Head,100,true,4.0,150.0,515.0,55.19,,
1,91,F,150.0,40.00,成人,脳神経外科,外来,脳CT,単純,1.7 Brain Head Routine TFI-H,...,64,Adult Head,100,true,4.0,150.0,515.0,55.19,,
2,81,F,160.0,49.10,成人,救急科,外来,脳CT,単純,1.7 Brain Head Routine TFI-H,...,64,Adult Head,100,true,4.0,150.0,515.0,55.19,,
3,33,M,168.0,67.20,成人,脳神経外科,外来,脳CT,単純,1.7 Brain Head Routine TFI-H,...,64,Adult Head,100,true,4.0,150.0,515.0,55.19,,
4,49,M,176.0,73.00,成人,精神神経科,外来,脳CT,単純,1.7 Brain Head Routine TFI-H,...,64,Adult Head,100,true,4.0,150.0,515.0,55.19,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2748,81,M,160.0,62.85,成人,救急科,外来,胸椎・胸髄CT,Dual Energy,7.7 GSIX Th-Spine Routine,...,64,Medium,100,,,,,14.49,270,63
2749,92,M,165.0,54.80,成人,脳神経外科,外来,脳CTA,CTA,1.14 Head-Neck CTA Helical SmartPrep Routine,...,64,Adult Head,100,false,11.6,200.0,700.0,6.63,,
2750,92,M,165.0,54.80,成人,脳神経外科,外来,脳CTA,CTA,1.14 Head-Neck CTA Helical SmartPrep Routine,...,64,Adult Head,100,false,8.0,200.0,570.0,34.33,,
2751,92,M,165.0,54.80,成人,脳神経外科,外来,脳CTA,CTA,1.14 Head-Neck CTA Helical SmartPrep Routine,...,64,Adult Head,100,false,11.6,200.0,700.0,6.63,,


In [45]:
len(df)

2845

In [46]:
pd.merge(df, df_protocol, how='left', on='scan_protocol')

Unnamed: 0,age,gender,height_cm,weight_kg,adult_child,department,hospital_ward,scan_area,scan_method,scan_protocol,...,Rows,SFOV,kV,SmartmA,NoiseIndex,MinmA,MaxmA,CTDI_y,mA_y,pitch
0,72,M,170.0,83.0,成人,救急科,外来,胸部〜骨盤CT,造影,5.7 P+CE Chest-Pelvis Routine,...,,,,,,,,,,
1,72,M,170.0,83.0,成人,救急科,外来,胸部〜骨盤CT,造影,5.7 P+CE Chest-Pelvis Routine,...,,,,,,,,,,
2,85,M,171.0,58.9,成人,循環器内科,外来,胸部〜骨盤CT,単純,5.7 P+CE Chest-Pelvis Routine,...,,,,,,,,,,
3,91,F,150.0,40.0,成人,脳神経外科,外来,脳CT,単純,1.7 Brain Head Routine TFI-H,...,64,Adult Head,100.0,true,4.0,150.0,515.0,55.19,,
4,91,F,150.0,40.0,成人,脳神経外科,外来,脳CT,単純,1.7 Brain Head Routine TFI-H,...,64,Adult Head,100.0,true,4.0,150.0,515.0,55.19,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3765,74,M,172.0,66.6,成人,総合診療科,５西,胸部〜骨盤CT,造影,5.7 P+CE Chest-Pelvis Routine,...,,,,,,,,,,
3766,74,M,172.0,66.6,成人,総合診療科,５西,胸部〜骨盤CT,造影,5.7 P+CE Chest-Pelvis Routine,...,,,,,,,,,,
3767,67,M,167.0,64.0,成人,消化器内科,外来,胸部〜骨盤CT,造影,5.13 Aorta CTA Routine SmartPrep,...,,,,,,,,,,
3768,67,M,167.0,64.0,成人,消化器内科,外来,胸部〜骨盤CT,造影,5.13 Aorta CTA Routine SmartPrep,...,,,,,,,,,,
