<a href="https://colab.research.google.com/github/ymuto0302/RW2025/blob/main/mushroom.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import fetch_openml

# Mushroomデータセットの読み込み：OpenMLからMushroomデータセットを取得
mushroom = fetch_openml('mushroom', version=1, as_frame=True, parser='auto')

In [2]:
print("Type of mushroom.data:", type(mushroom.data))
print("Type of mushroom.target:", type(mushroom.target))

Type of mushroom.data: <class 'pandas.core.frame.DataFrame'>
Type of mushroom.target: <class 'pandas.core.series.Series'>


In [3]:
mushroom.data.head()

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises%3F,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,x,s,n,t,p,f,c,n,k,e,...,s,w,w,p,w,o,p,k,s,u
1,x,s,y,t,a,f,c,b,k,e,...,s,w,w,p,w,o,p,n,n,g
2,b,s,w,t,l,f,c,b,n,e,...,s,w,w,p,w,o,p,n,n,m
3,x,y,w,t,p,f,c,n,n,e,...,s,w,w,p,w,o,p,k,s,u
4,x,s,g,f,n,f,w,b,k,t,...,s,w,w,p,w,o,e,n,a,g


In [5]:
X, y = mushroom.data, mushroom.target

In [6]:
# 欠損値の確認
X.isnull().sum()

Unnamed: 0,0
cap-shape,0
cap-surface,0
cap-color,0
bruises%3F,0
odor,0
gill-attachment,0
gill-spacing,0
gill-size,0
gill-color,0
stalk-shape,0


In [7]:
# 欠損値を含む列の削除
X = X.drop('stalk-root', axis=1)

In [8]:
# one-hot encoding
import pandas as pd

X_encoded = pd.get_dummies(X, columns=X.columns, dtype=int)

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# データ分割
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42, stratify=y
)

# 特徴量のスケーリング
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

svm_linear = SVC(kernel='linear', random_state=42)
svm_linear.fit(X_train_scaled, y_train)

y_pred_linear = svm_linear.predict(X_test_scaled)
accuracy_linear = accuracy_score(y_test, y_pred_linear)
print(f"linear SVMの正解率: {accuracy_linear:.4f}")

linear SVMの正解率: 1.0000


In [11]:
svm_rbf = SVC(kernel='rbf', random_state=42)
svm_rbf.fit(X_train_scaled, y_train)

y_pred_rbf = svm_rbf.predict(X_test_scaled)
accuracy_rbf = accuracy_score(y_test, y_pred_rbf)
print(f"rbf SVMの正解率: {accuracy_rbf:.4f}")

rbf SVMの正解率: 0.9988


In [12]:
# ハイパパラメータのチューニング

from sklearn.model_selection import GridSearchCV

# ハイパパラメータの候補値
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1],
    'kernel': ['rbf', 'poly', 'linear']
}

# グリッドサーチの設定
grid_search = GridSearchCV(
    SVC(random_state=42), param_grid, cv=5, scoring='accuracy')

# グリッドサーチ
grid_search.fit(X_train_scaled, y_train)

print(f"パラメータの最適値: {grid_search.best_params_}")
print(f"最適モデルによる正解率: {grid_search.best_score_:.4f}")

パラメータの最適値: {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
最適モデルによる正解率: 0.9998
