In [15]:
import numpy as np
from azureml.core import Workspace
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from azureml.core import Experiment
import time

In [4]:
ws = Workspace.create("breadCancerPca3", subscription_id="030117c9-e67b-4755-af85-4683fe8eddf2", \
                       resource_group="dp100_cc", location='francecentral')

Deploying KeyVault with name breadcankeyvault7f384f64.
Deploying StorageAccount with name breadcanstorage0a8d1e1db.
Deploying AppInsights with name breadcaninsights39325944.
Deployed AppInsights with name breadcaninsights39325944. Took 5.49 seconds.
Deployed KeyVault with name breadcankeyvault7f384f64. Took 18.13 seconds.
Deploying Workspace with name breadCancerPca3.
Deployed StorageAccount with name breadcanstorage0a8d1e1db. Took 20.44 seconds.
Deployed Workspace with name breadCancerPca3. Took 54.32 seconds.


In [5]:
data = load_breast_cancer()
type(data)

sklearn.utils.Bunch

In [6]:
data.data

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [7]:
data.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [9]:
print(np.shape(data.data))
print(np.shape(data.feature_names))

(569, 30)
(30,)


In [10]:
experiment = Experiment(ws, "experiment")

In [11]:
Experiment.list(ws)

[Experiment(Name: experiment,
 Workspace: breadCancerPca3)]

In [12]:
experiment.set_tags({"tag1":"classification"})
experiment.set_tags({"tag2":"Breast Cancer"})
experiment.set_tags({"tag3":"PCA"})

In [29]:
def train_model(X, y, run, component):
    
    # Make an instance of the Model
    pca = PCA(component)

    pca.fit(X)
    X = pca.transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

    model = LogisticRegression()
    print(f'training: component number {component}')
    start_time = time.time()
    model.fit(X_train, y_train)
    duration = time.time() - start_time
    print('done')
    run.log('Explanied variance', pca.explained_variance_)
    run.log("Number of component", component)
    run.log("Run time Duration", duration)
    run.log("Accuracy", model.score(X_test,y_test))
    run.complete()

    return model

In [30]:
run = experiment.start_logging(outputs=None, snapshot_directory=False)
X, y = data.data, data.target

for component_number in (np.arange(1, 8, 1)):
    model= train_model(X, y, run, component_number)

training: component number 1
done
training: component number 2
done
training: component number 3
done
training: component number 4
done
training: component number 5
done
training: component number 6
done
training: component number 7
done
