In [None]:
# https://auto.gluon.ai/dev/tutorials/tabular_prediction/tabular-multimodal.html

In [None]:
%%sh
pip -q install autogluon
pip -q uninstall -y torch

In [None]:
download_dir = './ag_petfinder_tutorial'
dataset_path = download_dir + '/petfinder_processed'
zip_file = 'https://automl-mm-bench.s3.amazonaws.com/petfinder_kaggle.zip'

In [None]:
from autogluon.core.utils.loaders import load_zip

load_zip.unzip(zip_file, unzip_dir=download_dir)

In [None]:
import pandas as pd

train_data = pd.read_csv(f'{dataset_path}/train.csv', index_col=0)
test_data = pd.read_csv(f'{dataset_path}/dev.csv', index_col=0)

In [None]:
train_data.head(3)

In [None]:
label = 'AdoptionSpeed'
image_col = 'Images'

In [None]:
train_data[image_col].iloc[0]

In [None]:
train_data[image_col] = train_data[image_col].apply(lambda ele: ele.split(';')[0])
test_data[image_col] = test_data[image_col].apply(lambda ele: ele.split(';')[0])

train_data[image_col].iloc[0]

In [None]:
import os 

def path_expander(path, base_folder):
    path_l = path.split(';')
    return ';'.join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])

train_data[image_col] = train_data[image_col].apply(lambda ele: path_expander(ele, base_folder=dataset_path))
test_data[image_col] = test_data[image_col].apply(lambda ele: path_expander(ele, base_folder=dataset_path))

train_data[image_col].iloc[0]

In [None]:
train_data.head(3)

In [None]:
example_row = train_data.iloc[1]

example_row

In [None]:
example_row['Description']

In [None]:
from IPython.display import Image, display

example_image = example_row['Images']
pil_img = Image(filename=example_image)
display(pil_img)

In [None]:
#train_data = train_data.sample(500, random_state=0)

In [None]:
from autogluon.tabular import FeatureMetadata

feature_metadata = FeatureMetadata.from_df(train_data)
print(feature_metadata)

In [None]:
feature_metadata = feature_metadata.add_special_types({image_col: ['image_path']})

print(feature_metadata)

In [None]:
from autogluon.tabular.configs.hyperparameter_configs import get_hyperparameter_config

hyperparameters = get_hyperparameter_config('multimodal')
hyperparameters

In [None]:
from autogluon.tabular import TabularPredictor

predictor = TabularPredictor(label=label).fit(
    train_data=train_data,
    hyperparameters=hyperparameters,
    feature_metadata=feature_metadata,
    time_limit=2*3600,
)

In [None]:
predictor.leaderboard()

In [None]:
train_data.iloc[[2]]

In [None]:
example_image = train_data.iloc[2]['Images']
pil_img = Image(filename=example_image)
display(pil_img)

In [None]:
predictor.predict(train_data.iloc[[2]])    # sample must be dataframe, not series

In [None]:
# Train for best quality

best_predictor=TabularPredictor(label=label).fit(
    train_data,
    hyperparameters=hyperparameters,
    presets='best_quality',
    feature_metadata=feature_metadata,
    time_limit=12*3600)

In [None]:
best_predictor.leaderboard()