### ライブラリのインポート

In [1]:
# ライブラリのインポート
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import mean_squared_error, mean_absolute_error

from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor


# 警告文を非表示に
import warnings
warnings.filterwarnings('ignore')
#カラム内の文字数。デフォルトは50だった
pd.set_option("display.max_colwidth", 100)
#行数
pd.set_option("display.max_rows", 500)
#列数
pd.set_option("display.max_columns", 500)

### データのインポート

In [2]:
# データセットのインポート
Data2019 = pd.read_excel("kouhosha_senkyoku_2019.xlsx", index=False)

### データの確認

In [3]:
Data2019.columns

Index(['prefecture', 'num', 'candidate_J', 'age', 'party', 'status',
       'elected_count'],
      dtype='object')

In [4]:
Data2019.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 215 entries, 0 to 214
Data columns (total 7 columns):
prefecture       215 non-null object
num              215 non-null int64
candidate_J      215 non-null object
age              215 non-null int64
party            215 non-null object
status           215 non-null object
elected_count    215 non-null int64
dtypes: int64(3), object(4)
memory usage: 11.8+ KB


### 前処理

In [5]:
# カラム名を変える
Data2019 = Data2019.rename({"prefecture": "district"}, axis=1)

In [6]:
# ダミー化する
party_dummy = pd.get_dummies(Data2019["party"])
incumbent_dummy = pd.get_dummies(Data2019["status"])

In [7]:
party_dummy

Unnamed: 0,れい,公明,共産,国民,無所属,社民,立憲,維新,自民,諸派
0,0,0,0,0,0,0,0,0,0,1
1,0,0,0,0,0,0,0,0,1,0
2,0,0,1,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,1,0
5,0,0,0,0,0,0,1,0,0,0
6,0,0,0,0,0,0,0,0,0,1
7,0,0,0,1,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,1
9,0,0,0,0,0,0,0,0,0,1


In [8]:
# カラム名を変える
party_dummy = party_dummy.rename({"公明": "kom", "共産": "jcp", "維新": "ishin",
                                  "自民": "ldp", "立憲": "dpj", "社民": "syamin", "国民": "kokumin"}, axis=1)
incumbent_dummy = incumbent_dummy.rename({"現": "incumbent", "新": "newcomer"}, axis=1)

# 使うカラムを選ぶ
party_dummy = party_dummy[["ldp", "kom", "jcp", "ishin", "dpj", "syamin", "kokumin"]]
incumbent_dummy = incumbent_dummy[["incumbent", "newcomer"]]

In [9]:
Data2019

Unnamed: 0,district,num,candidate_J,age,party,status,elected_count
0,北海道,0,森山佳則もりやまよしのり,52,諸派,新,0
1,北海道,1,高橋はるみたかはしはるみ,65,自民,新,0
2,北海道,2,畠山和也はたやまかずや,47,共産,新,0
3,北海道,3,岩瀬清次いわせせいじ,66,諸派,新,0
4,北海道,4,岩本剛人いわもとつよひと,54,自民,新,0
5,北海道,5,勝部賢志かつべけんじ,59,立憲,新,0
6,北海道,6,中村治なかむらおさむ,66,諸派,新,0
7,北海道,7,原谷那美はらやなみ,35,国民,新,0
8,北海道,8,山本貴平やまもとたかひら,44,諸派,新,0
9,青森,9,小山日奈子こやまひなこ,53,諸派,新,0


In [10]:
# 横に連結する
Data2019 = pd.concat([Data2019, party_dummy, incumbent_dummy], axis=1)

In [11]:
Data2019.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 215 entries, 0 to 214
Data columns (total 16 columns):
district         215 non-null object
num              215 non-null int64
candidate_J      215 non-null object
age              215 non-null int64
party            215 non-null object
status           215 non-null object
elected_count    215 non-null int64
ldp              215 non-null uint8
kom              215 non-null uint8
jcp              215 non-null uint8
ishin            215 non-null uint8
dpj              215 non-null uint8
syamin           215 non-null uint8
kokumin          215 non-null uint8
incumbent        215 non-null uint8
newcomer         215 non-null uint8
dtypes: int64(3), object(4), uint8(9)
memory usage: 13.7+ KB


In [12]:
# 現職の定義を狭く
councillors_senkyoku2019 = pd.read_excel("councillors_senkyoku2019.xlsx")
Data2019["councillors_senkyoku"] = councillors_senkyoku2019["councillors_senkyoku"]

In [13]:
Data2019.to_excel("Data2019.xlsx", index=False)

In [14]:
Data2019

Unnamed: 0,district,num,candidate_J,age,party,status,elected_count,ldp,kom,jcp,ishin,dpj,syamin,kokumin,incumbent,newcomer,councillors_senkyoku
0,北海道,0,森山佳則もりやまよしのり,52,諸派,新,0,0,0,0,0,0,0,0,0,1,0
1,北海道,1,高橋はるみたかはしはるみ,65,自民,新,0,1,0,0,0,0,0,0,0,1,0
2,北海道,2,畠山和也はたやまかずや,47,共産,新,0,0,0,1,0,0,0,0,0,1,0
3,北海道,3,岩瀬清次いわせせいじ,66,諸派,新,0,0,0,0,0,0,0,0,0,1,0
4,北海道,4,岩本剛人いわもとつよひと,54,自民,新,0,1,0,0,0,0,0,0,0,1,0
5,北海道,5,勝部賢志かつべけんじ,59,立憲,新,0,0,0,0,0,1,0,0,0,1,0
6,北海道,6,中村治なかむらおさむ,66,諸派,新,0,0,0,0,0,0,0,0,0,1,0
7,北海道,7,原谷那美はらやなみ,35,国民,新,0,0,0,0,0,0,0,1,0,1,0
8,北海道,8,山本貴平やまもとたかひら,44,諸派,新,0,0,0,0,0,0,0,0,0,1,0
9,青森,9,小山日奈子こやまひなこ,53,諸派,新,0,0,0,0,0,0,0,0,0,1,0
