## ==**This notebook is under active development**==

# Azure SDK Notebook

In [1]:
#r "nuget: Azure.ResourceManager.MachineLearning, 1.0.0-beta.1"
#r "nuget: Azure.Identity, 1.6.0"
#r "nuget: Azure.Security.KeyVault.Secrets, 4.3.0"
#r "nuget:Azure.Storage.Blobs, 12.12.0"
#r "Microsoft.VisualBasic"

In [1]:
// Import common usings.
using Azure.Storage.Blobs;
using Azure.Storage.Blobs.Models;
using System.IO;
using Microsoft.VisualBasic;
using Microsoft.VisualBasic.FileIO;



### The following section define file paths  and azure  resources to complete this machine learning opration in this notebook.

In [1]:
// File paths.
var imageFolderPath=@"C:\Users\zehailem\Desktop\SDKTestWeatherImage";
var tempPath = Path.GetTempPath();
var guid = Guid.NewGuid().ToString();
var modelName="MlModel";
var tsvFilePath = Path.Combine(tempPath, $"{guid}-{modelName}.ImageTrainData.tsv");
var jsonlFilePath = Path.Combine(tempPath, $"{guid}-{modelName}.AzureImageTrainData.jsonl");
var MlTableFolder = Directory.CreateDirectory(Path.Combine(tempPath, "TrainMlTable"));
var mLTableFilePath = Path.Combine(MlTableFolder.FullName, "MLTable.");

// Azure resources Names.
string subscriptionName;
string resourceGroupName;
string workspaceName;
string computeName;
string EmvironmentName;

# Generate data schema for training 
The following  cells are to generate  data schemas appropriate for Image classification. For furthere detail refer this  https://docs.microsoft.com/en-us/azure/machine-learning/reference-automl-images-schema

# Generate tsv file

In [1]:
public async Task WriteAllDataToTSVAsync(string selectedFolderName, string tsvFilePath)
        {
            DirectoryInfo rootDirectoryInfo = new DirectoryInfo(selectedFolderName);
            DirectoryInfo[] subDirectories = rootDirectoryInfo.GetDirectories();
			
             string[] AllowedImageFileExtensions = new[] { ".png", ".jpg", ".jpeg", ".gif" };
            using (var outFile = File.CreateText(tsvFilePath))
            {
                try
                {
                    // Add labels for the data.
                     string label = "Label";
                    
                    var labels = new string[] { label, "ImageSource" };
                    var headerString = string.Join("\t", labels);
                    outFile.WriteLine(headerString);

                    foreach (DirectoryInfo directory in subDirectories)
                    {
						TextFieldParser parser;
                        IEnumerable<FileInfo> files = directory.EnumerateFiles();
                        var folderList = files.Where(f => AllowedImageFileExtensions.Contains(f.Extension.ToLower()));
                            foreach (FileInfo file in folderList)
                            {
                                var values = new string[] { directory.Name, file.FullName };
                                var line = string.Join("\t", values);

                                outFile.WriteLine(line);
                            }
                    }
                        
                    
                }
                catch (Exception e)
                {
                    throw new Exception();
                }
            }

            
        }

# Generate jsonl  file


In [1]:
 public async Task CreateJsonlFileForRemoteAsync(string imageTsvPath, string generatedTsvPath)
        {

            // TODO Make Async and on background thread. (Suggestion: Use Stream Writer)
            using (var outFile = File.CreateText(generatedTsvPath))
            {
                try
                {
                    using (TextFieldParser parser = new TextFieldParser(imageTsvPath))
                    {
                        parser.TextFieldType = FieldType.Delimited;
                        parser.SetDelimiters("\t");

                        // Skip the header line
                        parser.ReadLine();
                        while (!parser.EndOfData)
                        {
                            var row = parser.ReadFields();

                            var label = row[0];
                            var file = new FileInfo(row[1]);
                            var imageRelativePath = Path.Combine(file.Directory.Parent.Name, file.Directory.Name, file.Name).Replace('\\', '/');

                            var values = new string[] { imageRelativePath, label };
                            var jsonLine = $@"{{""image_url"":""AmlDatastore://workspaceblobstore/{imageRelativePath}"", ""label"":""{label}""}}";

                            outFile.WriteLine(jsonLine);
                        }
                    }
                }
                catch (Exception e)
                {
                    throw new Exception(e.Message);
                }
            }
        }

# Generate MLTable File

In [1]:

        public void CreateMlTableFile(string selectedFolderName, string mlTableFilePath, string remoteAzureTrainingFile)
        {

            try
            {
                using (var outFile = File.CreateText(mlTableFilePath))
                {
                    var jsonlFileName = Path.GetFileName(remoteAzureTrainingFile);
                    var selectedFolder = new DirectoryInfo(selectedFolderName).Name;
                    var mlTableContent = $@"paths:
  - file: azureml://datastores/workspaceblobstore/paths/{selectedFolder}/{jsonlFileName}
transformations:
  - read_json_lines:
        encoding: utf8
        invalid_lines: error
        include_path_column: false
  - convert_column_types:
      - columns: image_url
        column_type: stream_info";
                    outFile.Write(mlTableContent);
                }
            }
            catch (Exception e)
            {
                throw new Exception(e.Message);

            }
        }
    

## Generate  remote  files  locally

In [1]:
await WriteAllDataToTSVAsync(imageFolderPath,tsvFilePath);
await CreateJsonlFileForRemoteAsync(tsvFilePath,jsonlFilePath);
CreateMlTableFile(imageFolderPath,mLTableFilePath, jsonlFilePath);


# Upload to Azure storage setup

## Configure your storage connection string
 https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-dotnet?tabs=environment-variable-windows#configure-your-storage-connection-string

In [1]:
  string connectionString = Environment.GetEnvironmentVariable("AZURE_STORAGE_CONNECTION_STRING");
  //Create a BlobServiceClient object which will be used to create a container client
  BlobServiceClient blobServiceClient = new BlobServiceClient(connectionString);
  var allblobs = blobServiceClient.GetBlobContainers(BlobContainerTraits.Metadata, BlobContainerStates.System);

  // Get workspaceblobstore.
  var defaultBlob= allblobs.First(b=> b.Name.StartsWith("azureml-blobstore")).Name;
  var defaultContainer = blobServiceClient.GetBlobContainerClient(defaultBlob);
  var selectedFolder = new DirectoryInfo(imageFolderPath).Name;

In [1]:
	  
    //Upload jsonl file.
    BlobClient blobClient = defaultContainer.GetBlobClient("./" + selectedFolder +"/" + Path.GetFileName(jsonlFilePath));
	await blobClient.UploadAsync(jsonlFilePath , true)

	// upload MLTable file.
	BlobClient blobClientMlTable =defaultContainer.GetBlobClient("./" + selectedFolder +"/" +"TrainMlTable/" + Path.GetFileName(mLTableFilePath));
	await blobClientMlTable.UploadAsync(mLTableFilePath, true);



# Upload images 

In [1]:
public async Task UploadImageToAzureAsync(string imageSourceFolder, string remoteInputFile)
        {
			
            var localFolderName = Path.GetFileName(imageSourceFolder);

            using (TextFieldParser parser = new TextFieldParser(remoteInputFile))
            {
                while (!parser.EndOfData)
                {
                    var row = parser.ReadLine();
                    var urlLength = row.IndexOf(',');
                    var urlText = $@"{{""image_url"":""AmlDatastore://workspaceblobstore/{localFolderName}/";

                    // save path relative to localFolderSource.Path
                    var relativeFilePath = row.Substring(urlText.Length, urlLength - urlText.Length - 1);
                    var relativeImagePath = relativeFilePath.Replace('/', '\\');

                    var fullImagePath = Path.Combine(imageSourceFolder, relativeImagePath);
                    BlobClient blobClientImage = defaultContainer.GetBlobClient("./" + Path.Combine(localFolderName ,relativeImagePath));
                    await blobClientImage.UploadAsync(fullImagePath, true);
                }
            }
        }
			


In [1]:
await UploadImageToAzureAsync(imageFolderPath,jsonlFilePath);

# Submit automl job

## Install Azure Tools in Powershell

###  Login into Azure  in powershell

In [1]:

#!pwsh
az login


]


# Use Azure SDKs to call into AzureML

In [1]:
using Azure.ResourceManager;
using Azure.ResourceManager.MachineLearning;
using System;
using System.Threading.Tasks;
using Azure.Core;
using Azure.ResourceManager;
using Azure.ResourceManager.Resources;
using Azure.Identity;
using Azure.ResourceManager.MachineLearning.Models;
using Azure;

In [1]:
var armClient = new ArmClient(new DefaultAzureCredential());
var subscription = armClient.GetSubscriptions();

## Select your subscription where you want to have machine learning oprations.

In [1]:
var selectedSubscription = subscription.Where(sub => sub.Data.DisplayName == subscriptionName).First();


## Select your  resource group.

In [1]:
ResourceGroupCollection resourceGroups = selectedSubscription.GetResourceGroups();
var selectedrg=resourceGroups.Where(rg=> rg.Data.Name==resourceGroupName).First();


## Select your workspace 

In [1]:
MachineLearningWorkspaceResource selectedWorkspace = await selectedrg.GetMachineLearningWorkspaceAsync(workspaceName);


## Select your environment .

In [1]:
 var env= selectedWorkspace.GetEnvironmentContainers().First();
 //var imageSupportedEnvi= env.Where(e => e.Data.Name.EndsWith("GPU") &&  e.Data.SystemData.CreatedBy == "Microsoft").First();
 //EnvironmentVersionResource environmentResource=imageSupportedEnvi.GetEnvironmentVersions().First();
 env.Data.Name

AzureML-pytorch-1.10-ubuntu18.04-py38-cuda11-gpu

## Select compute 

In [1]:
var computeId=$"/subscriptions/{selectedSubscription.Data.SubscriptionId}/resourceGroups/{selectedrg.Data.Name}/providers/Microsoft.MachineLearningServices/workspaces/{selectedWorkspace.Data.Name}/computes/{computeName}";


In [1]:
public async Task<MachineLearningJobResource> SubmitAutoMLImageClassificationAsync(
            ResourceGroupResource resourceGroup,
            string workspaceName,
            string id,
            string experimentName,
            string environmentId,
            string computeId)
        {
            MachineLearningWorkspaceResource ws = await resourceGroup.GetMachineLearningWorkspaces().GetAsync(workspaceName);

            // Upload the MLTable in the default workspaceblobstore.
            var trainData = new MLTableJobInput(new Uri($"azureml://datastores/workspaceblobstore/paths/{selectedFolder}/TrainMlTable"))
            {
                Mode = InputDeliveryMode.EvalMount,
                Description = "Train data",
            };

            var trainingData = new TrainingDataSettings(trainData);

            ImageVerticalDataSettings dataSettings = new ImageVerticalDataSettings("label", trainingData);
            
            ImageLimitSettings limitSettings = new ImageLimitSettings()
            {
                MaxConcurrentTrials = 2,
                MaxTrials = 10,
                Timeout = TimeSpan.FromHours(2)
            };

            ImageSweepLimitSettings sweepLimits = new ImageSweepLimitSettings() { MaxConcurrentTrials = 4, MaxTrials = 20 };
            SamplingAlgorithmType samplingAlgorithm = SamplingAlgorithmType.Random;
            List<ImageModelDistributionSettingsClassification> searchSpaceList = new List<ImageModelDistributionSettingsClassification>()
                {
                    new ImageModelDistributionSettingsClassification()
                    {
                        ModelName = "choice('vitb16r224', 'vits16r224')",
                        LearningRate = "uniform(0.001, 0.01)",
                        NumberOfEpochs = "choice(15, 30)",
                    },
                    new ImageModelDistributionSettingsClassification()
                    {
                        ModelName = "choice('seresnext', 'resnet50')",
                        LearningRate = "uniform(0.001, 0.01)",
                        NumberOfEpochs = "choice(0, 2)",
                    }
                };

            AutoMLVertical taskDetails = new ImageClassification(dataSettings, limitSettings)
            {
                LogVerbosity = LogVerbosity.Info,
                PrimaryMetric = ClassificationPrimaryMetrics.Accuracy,
                SweepSettings = new ImageSweepSettings(sweepLimits, samplingAlgorithm)
                {
                    EarlyTermination = new BanditPolicy() { SlackFactor = 0.2f, EvaluationInterval = 3 },
                },
                SearchSpace = searchSpaceList,
            };

            var autoMLJob = new AutoMLJob(taskDetails)
            {
                ExperimentName = experimentName,
                DisplayName = "AutoMLJob ImageClassification-" + Guid.NewGuid().ToString("n").Substring(0, 6),
                EnvironmentId = environmentId,
                IsArchived = false,
                ComputeId = computeId,
                Resources = new ResourceConfiguration
                {
                    InstanceCount = 3,
                },
                Properties = new Dictionary<string, string>
                    {
                        { "property-name", "property-value" },
                    },
                Tags = new Dictionary<string, string>
                    {
                        { "tag-name", "tag-value" },
                    },
                EnvironmentVariables = new Dictionary<string, string>()
                    {
                        { "env-var", "env-var-value" }
                    },
                Description = "This is a description of test AutoMLJob for multi-class Image classification job using fridge items dataset",
            };

            MachineLearningJobData MachineLearningJobData = new MachineLearningJobData(autoMLJob);
            ArmOperation<MachineLearningJobResource> jobOperation = await ws.GetMachineLearningJobs().CreateOrUpdateAsync(WaitUntil.Completed, id, MachineLearningJobData);
            MachineLearningJobResource jobResource = jobOperation.Value;
           
       
            return jobResource;
        }

# Submit Image classification  automl Job

In [1]:
// Generate job id.
public string RandomString(int size, bool lowerCase)
        {
            StringBuilder builder = new StringBuilder();
            Random random = new Random();
            char ch;
            for (int i = 0; i < size; i++)
            {
                ch = Convert.ToChar(Convert.ToInt32(Math.Floor(26 * random.NextDouble() + 65)));
                builder.Append(ch);
            }
            if (lowerCase)
                return builder.ToString().ToLower();
            return builder.ToString();
        }

In [1]:
var jobId = RandomString(15, true);
MachineLearningJobResource job=await SubmitAutoMLImageClassificationAsync(selectedrg,selectedWorkspace.Data.Name,jobId,experimentName,env.Data.Id,computeId);


# Monitor remote job status

In [1]:
private static async Task WaitForJobToFinishAsync(
            MachineLearningWorkspaceResource ws,
            string id)
        {
            // delay between each retry (in milliseconds)
            const int SleepIntervalMs = 20 * 1000;
            MachineLearningJobResource jobResource = null;
            Console.WriteLine($"Starting to poll the status of Job Id: {id}");
            do
            {
                jobResource = await ws.GetMachineLearningJobs().GetAsync(id);
                Console.WriteLine($"DateTime: {DateTime.Now}, Experiment Name:'{jobResource.Data.Properties.ExperimentName}' status returned: '{jobResource.Data.Properties.Status}'.");

                if (jobResource.Data.Properties.Status != JobStatus.Completed && jobResource.Data.Properties.Status != JobStatus.Failed && jobResource.Data.Properties.Status != JobStatus.Canceled)
                {
                    await Task
                        .Delay(SleepIntervalMs)
                        .ConfigureAwait(false);
                }
            }
            while (jobResource.Data.Properties.Status != JobStatus.Completed && jobResource.Data.Properties.Status != JobStatus.Failed && jobResource.Data.Properties.Status != JobStatus.Canceled);

        }

In [1]:
await WaitForJobToFinishAsync(selectedWorkspace,jobId);

DateTime: 6/17/2022 11:37:03 AM, Experiment Name:'ImageClassificationAzureSDKTest' status returned: 'Running'.


: System.OperationCanceledException: Command :SubmitCode: await WaitForJobToFinishAsync(selectedWorkspace,jo ... cancelled.

# Consume model ???

### Download completed job artifacts 

- job artifacts ?
- model/ onix file ?
-score 
- child jobs= > best job

