# Gold Price Time Series Forecasting in Azure Machine Learning Studio

This notebook implements a full MLOps workflow for gold price forecasting using Azure ML managed and serverless services.

In [None]:
# 1. Setup & Initialization
!pip install azure-ai-ml azure-identity mlflow scikit-learn pandas seaborn matplotlib
from azure.ai.ml import MLClient, automl
from azure.ai.ml.entities import Data, Model, ResourceConfiguration, CodeConfiguration
from azure.identity import DefaultAzureCredential
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# 2. Connect to Azure ML Workspace
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential=credential)
print(f"Connected to workspace: {ml_client.workspace_name}")

In [None]:
# 3. Data Preparation & Exploration
df = pd.read_csv("gold_price_50_years_1975_2025.csv")
df['date'] = pd.to_datetime(df['date'])
df.sort_values('date', inplace=True)
df.reset_index(drop=True, inplace=True)
print(df.info())
print(df.head())
print(df.isnull().sum())
print(df.describe())
plt.figure(figsize=(12,5))
sns.lineplot(data=df, x='date', y='close')
plt.title('Gold Price Over Time (1975–2025)')
plt.xlabel('Date')
plt.ylabel('Close Price (USD)')
plt.show()

In [None]:
# 4. Register Data Asset in Azure ML
from azure.ai.ml.constants import AssetTypes
gold_data = Data(
    name="gold-prices-data",
    version="1",
    description="Historical gold prices 1975–2025",
    path="gold_price_50_years_1975_2025.csv",
    type=AssetTypes.URI_FILE,
    tags={"source": "uploaded-csv", "target": "close"}
)
ml_client.data.create_or_update(gold_data)
print("Gold prices dataset successfully registered!")

In [None]:
# 5. Train Model with AutoML (Serverless)
from azure.ai.ml import Input
forecast_job = automl.forecasting(
    experiment_name="gold-price-forecasting",
    training_data=Input(type="uri_file", path="azureml:gold-prices-data:1"),
    target_column_name="close",
    primary_metric="normalized_root_mean_squared_error",
    forecasting_settings={
        "time_column_name": "date",
        "forecast_horizon": 30,
        "frequency": "D",
        "target_lags": [1,2,3,5,7,14],
        "target_rolling_window_size": 7
    },
    n_cross_validations="auto",
    validation_data_size=0.2,
    test_data_size=0.1,
    enable_early_stopping=True,
    timeout_minutes=120,
    max_trials=30
)
forecast_job.resources = ResourceConfiguration(instance_type="Standard_DS3_v2", instance_count=1)
submitted_job = ml_client.jobs.create_or_update(forecast_job)
print(f"AutoML job submitted: {submitted_job.name}")
print(submitted_job.studio_url)

In [None]:
# 6. Monitor Job & Retrieve Best Model
completed = ml_client.jobs.get(submitted_job.name)
print(f"Job Status: {completed.status}")
if completed.status == "Completed":
    best_model = list(ml_client.models.list(name=completed.name))[0]
    print(f"Model: {best_model.name}, Version: {best_model.version}")

In [None]:
# 7. Register the Model (if needed)
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes
model = Model(
    name="gold-forecasting-model",
    version="1",
    description="Best AutoML model for gold price forecasting",
    path=f"azureml://jobs/{submitted_job.name}/outputs/artifacts/outputs/mlflow-model",
    type=AssetTypes.MLFLOW_MODEL,
    tags={"model_type": "forecasting", "target": "close"}
)
ml_client.models.create_or_update(model)

In [None]:
# 8. Batch Inference Pipeline
%%writefile batch_predict.py
import os, pandas as pd, mlflow, numpy as np
def init():
    global model
    model_path = os.environ.get("AZUREML_MODEL_DIR")
    model = mlflow.sklearn.load_model(model_path)
def run(mini_batch):
    results = []
    for path in mini_batch:
        data = pd.read_csv(path)
        preds = model.predict(data)
        out = pd.DataFrame({"date": data["date"], "predicted_price": preds})
        results.append(out)
    return pd.concat(results)

In [None]:
# 8b. Define and Run Pipeline
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import Input
@pipeline(default_compute="serverless")
def batch_pipeline(input_data, model_path):
    from azure.ai.ml import command
    return command(
        name="gold_price_batch_predict",
        code="./",
        command="python batch_predict.py",
        inputs={"input_data": input_data, "model_path": model_path}
    )
pipeline_job = batch_pipeline(
    input_data=Input(path="azureml:gold-prices-data:1"),
    model_path=Input(path=f"azureml:gold-forecasting-model:1")
)
pipeline_run = ml_client.jobs.create_or_update(pipeline_job)
print(f"Pipeline submitted: {pipeline_run.name}")

In [None]:
# 9. Deploy Model to Managed Online Endpoint
%%writefile score.py
import json, os, pandas as pd, mlflow
def init():
    global model
    model_path = os.environ.get("AZUREML_MODEL_DIR")
    model = mlflow.sklearn.load_model(model_path)
def run(raw_data):
    data = pd.DataFrame(json.loads(raw_data)["data"])
    preds = model.predict(data)
    return json.dumps({"predictions": preds.tolist()})

In [None]:
# 9b. Deploy Endpoint
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, CodeConfiguration, OnlineRequestSettings
endpoint = ManagedOnlineEndpoint(
    name="gold-forecasting-endpoint",
    auth_mode="key",
    description="Real-time gold price forecast"
)
ml_client.begin_create_or_update(endpoint).result()
deployment = ManagedOnlineDeployment(
    name="gold-forecasting-deploy",
    endpoint_name="gold-forecasting-endpoint",
    model="azureml:gold-forecasting-model:1",
    code_configuration=CodeConfiguration(code="./", scoring_script="score.py"),
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1",
    instance_type="Standard_DS2_v2",
    instance_count=1,
    request_settings=OnlineRequestSettings(request_timeout_ms=60000)
)
ml_client.begin_create_or_update(deployment).result()
endpoint.traffic = {"gold-forecasting-deploy": 100}
ml_client.begin_create_or_update(endpoint).result()

In [None]:
# 10. Test the Live Endpoint
import requests, json
ep = ml_client.online_endpoints.get("gold-forecasting-endpoint")
keys = ml_client.online_endpoints.get_keys("gold-forecasting-endpoint")
sample = {"data": [{"open": 3050.9, "high": 3061.2, "low":3050.0, "close":3056.1}]}
headers = {
    "Authorization": f"Bearer {keys.primary_key}",
    "Content-Type": "application/json"
}
response = requests.post(ep.scoring_uri, data=json.dumps(sample), headers=headers)
print(response.json())

In [None]:
# 11. Responsible AI Dashboard
from azure.ai.ml import automl
from azure.ai.ml import Input
rai_job = automl.rai(
    experiment_name="rai-gold-forecasting",
    model_input="azureml:gold-forecasting-model:1",
    train_data=Input(path="azureml:gold-prices-data:1"),
    target_column_name="close",
    compute="serverless"
)
ml_client.jobs.create_or_update(rai_job)