<a href="https://colab.research.google.com/github/udupa-varun/pyimagesearch_uni/blob/main/deep_learning/102/understanding_regularization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!wget https://pyimagesearch-code-downloads.s3.us-west-2.amazonaws.com/understanding-regularization/understanding-regularization.zip
!unzip -qq understanding-regularization.zip
%cd understanding-regularization

--2023-01-31 13:43:06--  https://pyimagesearch-code-downloads.s3.us-west-2.amazonaws.com/understanding-regularization/understanding-regularization.zip
Resolving pyimagesearch-code-downloads.s3.us-west-2.amazonaws.com (pyimagesearch-code-downloads.s3.us-west-2.amazonaws.com)... 3.5.80.127, 52.92.181.146, 52.92.248.138, ...
Connecting to pyimagesearch-code-downloads.s3.us-west-2.amazonaws.com (pyimagesearch-code-downloads.s3.us-west-2.amazonaws.com)|3.5.80.127|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 197007501 (188M) [application/zip]
Saving to: ‘understanding-regularization.zip’


2023-01-31 13:43:15 (21.5 MB/s) - ‘understanding-regularization.zip’ saved [197007501/197007501]

/content/understanding-regularization


In [2]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from pyimagesearch.preprocessing import SimplePreprocessor
from pyimagesearch.datasets import SimpleDatasetLoader
from imutils import paths

In [3]:
args = {
    "dataset": "dataset/animals"
}

In [5]:
print("[INFO] loading images...")
image_paths = list(paths.list_images(args["dataset"]))

# init image preprocessor and load dataset from disk
sp = SimplePreprocessor(32, 32)
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(image_paths, verbose=500)

# reshape data matrix
data = data.reshape((data.shape[0], 3072))


[INFO] loading images...
[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000


In [6]:
# encode labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# partition the data
(train_x, test_x, train_y, test_y) = train_test_split(
    data, 
    labels,
    test_size=0.25,
    random_state=42
    )


In [8]:
# loop over set of regularizers
for r in (None, "l1", "l2"):
    # train an SGD classifier using a softmax loss function
    # and the specified regularization function, for 10 epochs
    print(f"[INFO] training model with {r} penalty...")
    model = SGDClassifier(
        loss="log", 
        penalty=r, 
        max_iter=10,
        learning_rate="constant",
        tol=1e-3,
        eta0=0.01,
        random_state=12
        )
    model.fit(train_x, train_y)

    # evaluate classifier
    acc = model.score(test_x, test_y)
    print(f"[INFO] {r} penalty accuracy: {acc * 100:.2f}%")

[INFO] training model with None penalty...




[INFO] None penalty accuracy: 55.47%
[INFO] training model with l1 penalty...




[INFO] l1 penalty accuracy: 54.80%
[INFO] training model with l2 penalty...
[INFO] l2 penalty accuracy: 55.33%


