In [1]:

from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())
data_asset = ml_client.data.get("wind_bench", version="1")

print(data_asset.path)


Found the config file in: /config.json


azureml://subscriptions/8a889cf2-7b3d-4003-824e-b503f56604b0/resourcegroups/rg-jef-ml/workspaces/ml-gpu/datastores/workspaceblobstore/paths/UI/2023-11-29_114659_UTC/wind_bench.parquet/


In [2]:
import pandas as pd
from azureml.fsspec import AzureMachineLearningFileSystem

fs = AzureMachineLearningFileSystem(data_asset.path)
dirs = fs.glob('**/*.parquet')

print(dirs[0])

with fs.open(dirs[0]) as f:
    df = pd.read_parquet(f, columns=["Tower mid rotation y [deg]"])
df.describe()


UI/2023-11-29_114659_UTC/wind_bench.parquet/id=Exp0/5657695901cb4b018ed3e459b9c10e21-0.parquet


Unnamed: 0,Tower mid rotation y [deg]
count,12000.0
mean,-0.040897
std,0.008514
min,-0.075
25%,-0.046597
50%,-0.040464
75%,-0.034768
max,-0.016649


In [4]:
import pyarrow.parquet as pq


with fs.open(dirs[0]) as f:
    df = pq.read_table(f, columns=["Tower mid rotation y [deg]"])

df.schema


Tower mid rotation y [deg]: double
-- schema metadata --
pandas: '{"index_columns": ["Time"], "column_indexes": [{"name": null, "f' + 24267

In [None]:

from azure.ai.ml import MLClient, command, Input
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())

data_asset = ml_client.data.get("wind_bench", version="1")

# to successfully create a job, customize the parameters below based on your workspace resources
job = command(
        command='ls "${{inputs.data}}"',
        inputs={
            "data": Input(path=data_asset.id,
                type=AssetTypes.URI_FOLDER,
                mode=InputOutputModes.RO_MOUNT
            )
        },
        environment="azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest"
      )
returned_job = ml_client.jobs.create_or_update(job)

In [5]:

from azure.ai.ml import command
from azure.ai.ml.entities import Data
from azure.ai.ml import Input, Output
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())


subscription_id = '8a889cf2-7b3d-4003-824e-b503f56604b0'
resource_group = 'rg-jef-ml'
workspace ='ml-gpu'
input_parquet_file = 'UI/2023-11-29_114659_UTC/wind_bench.parquet'
output_parquet_file = 'UI/2023-11-29_114659_UTC/wind_bench_1000_1028.parquet'
inputs = {
    "input_parquet": Input(
        type=AssetTypes.URI_FOLDER, 
        path="azureml://subscriptions/{}/resourcegroups/{}/workspaces/{}/datastores/workspaceblobstore/paths/{}".format(
            subscription_id, resource_group, workspace, input_parquet_file
        ))
}

outputs = {
    "output_parquet": Output(
        type=AssetTypes.URI_FOLDER,
        path="azureml://subscriptions/{}/resourcegroups/{}/workspaces/{}/datastores/workspaceblobstore/paths/{}".format(
            subscription_id, resource_group, workspace, output_parquet_file
        ))
}

job = command(
    code="../scripts/dataset",  # local path where the code is stored
    command="python split_ml_parquet.py -i ${{inputs.input_parquet}} -o ${{outputs.output_parquet}} -n 1000 -s 1028",
    inputs=inputs,
    outputs=outputs,
    environment="wind_env:2",
    compute="cpu-32G",
)

# submit the command
returned_job = ml_client.create_or_update(job)
# get a URL for the status of the job
returned_job.studio_url

Found the config file in: /config.json


'https://ml.azure.com/runs/frosty_truck_yd37n2kkh5?wsid=/subscriptions/8a889cf2-7b3d-4003-824e-b503f56604b0/resourcegroups/rg-jef-ml/workspaces/ml-gpu&tid=8de15a81-f1b0-42ee-86ae-ca75c1b8ba65'

In [11]:
from azure.ai.ml import command
from azure.ai.ml.entities import Data
from azure.ai.ml import Input, Output
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential())

data_asset = ml_client.data.get("wind_bench_100_128", version="1")
print(data_asset.id)

inputs = {
    "input_parquet": Input(
                path=data_asset.id,
                type=AssetTypes.URI_FOLDER,
                mode=InputOutputModes.RO_MOUNT
                )
}


job = command(
    code="..",  # local path where the code is stored
    command="python scripts/virtual/Train.py ${{inputs.input_parquet}}",
    inputs=inputs,
    environment="wind_env:3",
    compute="cpu-32G",
)

# submit the command
returned_job = ml_client.create_or_update(job)
# get a URL for the status of the job
returned_job.studio_url

Found the config file in: /config.json


/subscriptions/8a889cf2-7b3d-4003-824e-b503f56604b0/resourceGroups/rg-jef-ml/providers/Microsoft.MachineLearningServices/workspaces/ml-gpu/data/wind_bench_100_128/versions/1


[32mUploading wind_bench (46.57 MBs): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 46573998/46573998 [00:02<00:00, 19230400.67i

'https://ml.azure.com/runs/cyan_bear_rmlf80tlst?wsid=/subscriptions/8a889cf2-7b3d-4003-824e-b503f56604b0/resourcegroups/rg-jef-ml/workspaces/ml-gpu&tid=8de15a81-f1b0-42ee-86ae-ca75c1b8ba65'