In [120]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets 
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.model_selection import learning_curve 
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import accuracy_score

In [121]:
# load iris dataset 
iris = datasets.load_iris()
# Labels: 
# 0: Setosa 
# 1: Versicolor
# 2: Virginica 
# Features: 
# Sepal length, Sepal width, Petal length, Petal width

# Inputs 
X = iris.data
Y = iris.target
# Standardized training features 
standardizedX = preprocessing.scale(X)

In [None]:
# Plot learning curves 
# Square-loss linear classifier with gradient descent 
clf = linear_model.SGDClassifier(\
            loss='squared_loss', penalty='none',\
            max_iter=10000, tol=None, shuffle=True,\
        )

# Cross validator 
cv = ShuffleSplit(\
        n_splits=10, test_size=0.25\
    )

# Generate learning curve 
trainSize, trainScore, testScore = \
    learning_curve(\
        clf, standardizedX, Y,\
        train_sizes=np.linspace(0.1, 1.0, 10),\
        cv=cv
    )
# Calculate score metrics 
trainScoreMean = np.mean(trainScore, axis=1)
trainScoreStd = np.std(trainScore, axis=1)
testScoreMean = np.mean(testScore, axis=1)
testScoreStd = np.std(testScore, axis=1)
plt.fill_between(trainSize, trainScoreMean - trainScoreStd,\
        trainScoreMean + trainScoreStd, alpha=0.1, color='r'\
    )
plt.fill_between(trainSize, testScoreMean - testScoreStd,\
        testScoreMean + testScoreStd, alpha=0.1, color='b'\
    )
plt.plot(trainSize, trainScoreMean, 'o-', color='r',\
        label="Training Score")
plt.plot(trainSize, testScoreMean, 'o-', color='b',\
        label="Cross-Validation Score")
plt.xlabel("Training Examples")
plt.ylabel("Classification Accuracy")
plt.title("Learning Curve (Square Loss)")
plt.legend(loc='lower right')