# ・Download CSV file.


In [27]:
import urllib.request as req
import pandas as pd

# Download file.
url = "https://raw.githubusercontent.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv"
savefile = "iris.csv"
req.urlretrieve(url, savefile)
print("Saved.")

# Displays the contents of the downloaded file.
csv = pd.read_csv(savefile, encoding="utf-8")
csv

Saved.


Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


# ・Machine learning using Iris data.

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

#!
#! Read iris data.
#!
iris_data = pd.read_csv("iris.csv", encoding="utf-8")

#!
#! Separate iris data into labels and input data.
#!
# Separate by CSV header name
y = iris_data.loc[:, "Name"]
x = iris_data.loc[:, ["SepalLength", "SepalWidth", "PetalLength", "PetalWidth"]]

#!
#! Divide into learning and testing.
#!
# Separate 80% for learning and 20% for testing.(Shuffle=Sort the original data randomly.)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, train_size = 0.8, shuffle = True)

#!
#! Learn.
#!
# Create a classifier based on support vector machines.
clf = SVC(gamma="scale")
# fit= Machine learning.
clf.fit(x_train, y_train)

#!
#! Evaluation.(accuracy_score()=Calculation of accuracy rate.)
#!
y_pred = clf.predict(x_test)
print("正解率 = ", accuracy_score(y_test, y_pred))

正解率 =  0.9666666666666667


| | 分類 | ライブラリ | メソッド | 説明 |
|---- | ---- | ---- | ---- | :---- |
| 1 | データ読込 | Pandas | read_csv() | CSVファイルを指定すると、PandasのDataFrameオブジェクトを返す。|
| 2 | データ分離(列) | Pandas | loc() | データをラベルと入力データに分離(列による分離)するのに利用。|
| 3 | データ分離(行) | scikit-learn | train_test_split() | データを学習用と結果用に分離(行による分離)するのに利用。<br>[test_size][train_size]パラメータにより学習用、テスト用の比率を指定できる。|
| 4 | 学習 | scikit-learn | fit() | 学習用と結果用の配列データを指定すると、学習できる。|
| 5 | 予測 | scikit-learn | predict() | テストデータの配列を指定すると、予測結果を返す。|
| 6 | 正解率の計算 | scikit-learn | accuracy_score() | 正しい結果と予測結果を指定すると、正解率を返す。|

# ・Use of scikit-learn sample data 

In [26]:
from sklearn import datasets, svm
# Reading data
iris = datasets.load_iris()
print("<<Label data>> \n", iris.target)
print("-------------------------------")
print("<<Predict data>> \n", iris.data)


<<Label data>> 
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
-------------------------------
<<Predict data>> 
 [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]