In [122]:
import pandas as pd
import numpy as np
import glob

path = './Sensor_Data/'
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

sensor_data = pd.concat(li, axis=0, ignore_index=True)

In [191]:
sensor_data_filter = sensor_data.filter(items=['animal_activity', "temp_without_drink_cycles", 'time', 'CowID'])
sensor_data_filter['time']=pd.to_datetime(sensor_data_filter['time'])
calv_feature = pd.read_excel("./Sensor_Data/Calving_Features.xlsx")
calv_feature = calv_feature.filter(items=['ID', "DAT_TIME_FCAL"])

In [192]:
sensor_data_filter = sensor_data_filter.set_index('time')
sensor_data_aggre = sensor_data_filter.groupby("CowID").resample('H').mean()
sensor_data_aggre = sensor_data_aggre.drop(['CowID'], axis=1)
sensor_data_aggre.reset_index(inplace=True) 
sensor_data_aggre.head()

Unnamed: 0,CowID,time,animal_activity,temp_without_drink_cycles
0,10,2018-12-04 16:00:00,3.72,39.38
1,10,2018-12-04 17:00:00,3.4,39.74
2,10,2018-12-04 18:00:00,2.42,39.63
3,10,2018-12-04 19:00:00,1.42,39.88
4,10,2018-12-04 20:00:00,1.54,40.37


In [193]:
calv_feature['DAT_TIME_FCAL']=pd.to_datetime(calv_feature['DAT_TIME_FCAL'])
calv_feature = calv_feature.assign(Date=calv_feature.DAT_TIME_FCAL.dt.round('H'))
calv_feature.head()

Unnamed: 0,ID,DAT_TIME_FCAL,Date
0,11262,2018-09-04 13:30:00,2018-09-04 14:00:00
1,11313,2018-11-22 05:00:00,2018-11-22 05:00:00
2,11343,2018-11-03 01:20:00,2018-11-03 01:00:00
3,11390,2018-09-15 01:20:00,2018-09-15 01:00:00
4,11433,2018-07-12 11:00:00,2018-07-12 11:00:00


In [324]:
def merge_10_days_before_calving(original_data,calve_data,cowId):
    filtered_original_data = original_data[original_data["CowID"]==cowId]
    filtered_calve_data = calve_data[calve_data["CowID"]==cowId]
    result = pd.DataFrame(data = None, columns= filtered_original_data.columns)
    for index, row in filtered_calve_data.iterrows():
        tmp = filtered_original_data[(filtered_original_data.time >= row.time - pd.to_timedelta("20day")) & (filtered_original_data.time <= row.time)]
        result = result.append(tmp)
    return result
    

In [325]:
merge_data = sensor_data_aggre.merge(calv_feature, left_on=['CowID','time'], right_on=['ID','Date'],how='left')
merge_data['isCalving'] = pd.notna(merge_data['ID']).astype(int)
merge_data = merge_data.filter(items=['animal_activity', "temp_without_drink_cycles","CowID","isCalving","time"])
merge_data = merge_data.dropna()
calve_time = merge_data[merge_data['isCalving']==1]
merge_data_filter = pd.DataFrame(data = None, columns= merge_data.columns)
for cowId in calve_time["CowID"]:
    time_merge_df = merge_10_days_before_calving(merge_data,calve_time,cowId)
    merge_data_filter = merge_data_filter.append(time_merge_df)
merge_data_filter['CowID'] = merge_data_filter['CowID'].astype(float)
merge_data_filter['isCalving'] = merge_data_filter['isCalving'].astype(int)
merge_data_filter = merge_data_filter.drop_duplicates()
merge_data_filter.head()

Unnamed: 0,animal_activity,temp_without_drink_cycles,CowID,isCalving,time
96,2.54,40.04,10.0,0,2018-12-08 16:00:00
97,2.32,39.56,10.0,0,2018-12-08 17:00:00
98,1.06,39.77,10.0,0,2018-12-08 18:00:00
99,1.05,40.17,10.0,0,2018-12-08 19:00:00
100,2.12,40.37,10.0,0,2018-12-08 20:00:00


In [344]:
max(merge_data_filter['time'])

Timestamp('2019-10-02 08:00:00')

In [327]:
label = "isCalving"
test_labels = merge_data_filter[label].values

In [328]:
from azureml.automl.core.forecasting_parameters import ForecastingParameters

forecast_parameters = ForecastingParameters(time_column_name='time', 
                                            forecast_horizon=50,
                                            short_series_handling_configuration='auto',
                                            freq = 'H',
                                            target_lags='auto')

In [329]:
from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
import logging

automl_config = AutoMLConfig(task='forecasting',
                             primary_metric='normalized_root_mean_squared_error',
                             experiment_timeout_minutes=15,
                             enable_early_stopping=True,
                             training_data=merge_data_filter,
                             label_column_name=label,
                             n_cross_validations=2,
                            time_column_name='time',                                            
                             forecast_horizon=50,
                            short_series_handling_configuration='auto',
                            freq = 'H',
                            target_lags='auto',
                            time_series_id_column_names = 'CowID')
#                              enable_ensembling=False,
#                              **forecasting_parameters)

In [330]:
ws = Workspace.from_config()
experiment = Experiment(ws, "Tutorial-automl-forecasting")
local_run = experiment.submit(automl_config, show_output=True)
best_run, fitted_model = local_run.get_output()

Running on local machine
Parent Run ID: AutoML_d6301b53-e971-4543-85e3-c391125b6975

Current status: DatasetFeaturization. Beginning to featurize the dataset.
Current status: DatasetFeaturizationCompleted. Completed featurizing the dataset.
Heuristic parameters: Target_Lag = '[0]'.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.
Current status: DatasetFeaturization. Beginning to featurize the CV split.
Current status: DatasetFeaturizationCompleted. Completed featurizing the CV split.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Frequency detection
STATUS:       PASSED
DESCRIPTION:  The time series was analyzed, all data points are aligned with detected frequency.
              

*********************

In [331]:
label_query = test_labels.copy().astype(np.float)
label_query.fill(np.nan)
label_fcst, data_trans = fitted_model.forecast(
    forecast_destination=pd.Timestamp(2020, 1, 1))

In [343]:
data_trans

Unnamed: 0_level_0,Unnamed: 1_level_0,animal_activity,temp_without_drink_cycles,animal_activity_WASNULL,temp_without_drink_cycles_WASNULL,grain_CowID,_automl_year,_automl_half,_automl_quarter,_automl_month,_automl_day,_automl_hour,_automl_am_pm,_automl_hour12,_automl_wday,_automl_qday,_automl_week,_automl_target_col
time,CowID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2018-07-06 04:00:00,9659.00,4.58,39.74,1,1,311,2018,2,3,7,6,4,0,4,4,6,27,0.00
2018-07-06 05:00:00,9659.00,4.58,39.74,1,1,311,2018,2,3,7,6,5,0,5,4,6,27,0.00
2018-07-06 06:00:00,9659.00,4.58,39.74,1,1,311,2018,2,3,7,6,6,0,6,4,6,27,0.00
2018-07-06 07:00:00,9659.00,4.58,39.74,1,1,311,2018,2,3,7,6,7,0,7,4,6,27,0.00
2018-07-06 08:00:00,9659.00,4.58,39.74,1,1,311,2018,2,3,7,6,8,0,8,4,6,27,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-28 00:00:00,12478.00,4.58,39.74,1,1,816,2021,1,2,5,28,0,0,0,4,58,21,0.00
2021-05-28 00:00:00,12479.00,4.58,39.74,1,1,817,2021,1,2,5,28,0,0,0,4,58,21,0.00
2021-05-28 00:00:00,12481.00,4.58,39.74,1,1,818,2021,1,2,5,28,0,0,0,4,58,21,0.00
2021-05-28 00:00:00,12482.00,4.58,39.74,1,1,819,2021,1,2,5,28,0,0,0,4,58,21,0.00


### Azure Setup (Ignore now)

In [5]:
import os, glob, shutil

if not os.path.exists('./data'):
    os.mkdir('./data')
for f in glob.glob('./Sensor_Data/*.csv'):
    shutil.move(f, './data/')
print("Data moved!")

Data moved!


In [6]:
# acount metadata

AZURE_SUBSCRIPTION_ID = '32fda87f-7ad1-4d47-9078-eaa80469c690'
ML_WORKSPACE_NAME = 'dairyFarmDemo'
AZURE_IOT_HUB_NAME = 'dairyfarm5412'
RESOURCE_GROUP_NAME = 'cs5412-final-project'
LOCATION = 'East US 2'
STORAGE_ACCOUNT_NAME = 'dairydatastorage'
STORAGE_ACCOUNT_KEY = 'bLrvIMcD0g3P1LiCTYKnqSWWkEyeah7RWsfZdSRfz7U4DiWt/oHyPPL0XThvvy4WyTN0SUqdB9WTzTY9QTqiRw=='
STORAGE_ACCOUNT_CONTAINER = 'container5412'
# DATASTORE_NAME = 'dairydata'

In [9]:
import os, glob, shutil
from azureml.core import Workspace
workspace_name = ML_WORKSPACE_NAME
subscription_id = AZURE_SUBSCRIPTION_ID
resource_group = RESOURCE_GROUP_NAME
location = LOCATION

if not os.path.exists('./aml_config'):
    os.mkdir('./aml_config')
ws = Workspace.from_config()

Note, we have launched a browser for you to login. For old experience with device code, use "az login --use-device-code"


Performing interactive authentication. Please follow the instructions on the terminal.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


In [10]:
import azureml.core
import pandas as pd
from azureml.core import Workspace

output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data=output, index=['']).T

  pd.set_option('display.max_colwidth', -1)


Unnamed: 0,Unnamed: 1
SDK version,1.27.0
Subscription ID,32fda87f-7ad1-4d47-9078-eaa80469c690
Workspace,dairyFarmDemo
Resource Group,cs5412-final-project
Location,eastus


In [11]:
from azureml.core import Datastore, Workspace, Datastore, Dataset

datastore_name = DATASTORE_NAME
datastore = Datastore.get(ws, datastore_name)
# label_datastore_path = (datastore,'/label_data_large.csv')
# label_dataset = Dataset.Tabular.from_delimited_files(path=label_datastore_path)

# record_datastore_path = (datastore,'/record_data_large.csv')
# record_dataset = Dataset.Tabular.from_delimited_files(path=record_datastore_path)

# print("Data retrieved!")

NameError: name 'DATASTORE_NAME' is not defined