In [None]:
"""Notebook that shares functionality with the Colab shared with CGFP. Used to make sure nothing breaks before updating the Huggingface model."""

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from cgfp.training.models import MultiTaskModel
from cgfp.inference.inference import inference, inference_handler
from transformers import AutoTokenizer
import torch
from pathlib import Path
import yaml

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [4]:
SCRIPT_DIR = Path("/home/tnief/1-Projects/good-food-purchasing/scripts")

In [5]:
with Path.open(SCRIPT_DIR / "config_train.yaml") as file:
    config = yaml.safe_load(file)

In [6]:
CHECKPOINT = config['eval']['eval_checkpoint']
print(CHECKPOINT)

/net/projects/cgfp/model-files/roberta_20240910_1156_final_subtypes_full_sft


In [7]:
model = MultiTaskModel.from_pretrained(CHECKPOINT)

In [8]:
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)

In [9]:
text = "IW WG BRAN MUFFIN"
# text = "frozen peas and carrots"
result = inference(model, tokenizer, text, device, assertion=False, confidence_score=False)
result

{'Food Product Group': 'Condiments & Snacks',
 'Food Product Category': 'Condiments & Snacks',
 'Primary Food Product Category': 'Condiments & Snacks',
 'Basic Type': 'muffin',
 'Flavor/Cut': 'None',
 'Shape': 'None',
 'Skin': 'None',
 'Seed/Bone': 'None',
 'Processing': 'None',
 'Cooked/Cleaned': 'None',
 'WG/WGR': 'whole grain rich',
 'Dietary Concern': 'None',
 'Additives': 'None',
 'Dietary Accommodation': 'None',
 'Frozen': 'None',
 'Packaging': 'ss',
 'Commodity': 'None'}

In [10]:
result = inference(model, tokenizer, text, device, assertion=False, confidence_score=False, combine_name=True)
result

'muffin, whole grain rich, ss'

In [11]:
DATA_DIR = "/net/projects/cgfp/data/test/"
FILENAME = "TestData_11.22.23.xlsx"
INPUT_COLUMN = "Product Type"
INPUT_PATH = DATA_DIR + FILENAME

In [12]:
SHEET_NUMBER = 0
ASSERTION = False # filters results that have mismatched food product groups and categories

In [14]:
# TODO: Add option for output file name
inference_handler(model, tokenizer, input_path=INPUT_PATH, save_dir=DATA_DIR, device=device, sheet_name=SHEET_NUMBER, input_column=INPUT_COLUMN, assertion=ASSERTION)

Classification completed! File saved to /net/projects/cgfp/data/test/TestData_11.22.23_classified.xlsx


Unnamed: 0,Product Type,Center Product ID,Food Product Group,Food Product Category,Primary Food Product Category,Basic Type,Sub-Type 1,Sub-Type 2,Sub-Type 3,Flavor/Cut,...,Seed/Bone,Processing,Cooked/Cleaned,WG/WGR,Dietary Concern,Additives,Dietary Accommodation,Frozen,Packaging,Commodity
0,IW WG BRAN MUFFIN,JAMAC/PR133,Condiments & Snacks,Condiments & Snacks,Condiments & Snacks,muffin,,,,,...,,,,whole grain rich,,,,,ss,
1,IW WG BANANA MUFFIN,JAMAC/PR135,Condiments & Snacks,Condiments & Snacks,Condiments & Snacks,muffin,,,,,...,,,,whole grain rich,,,,,ss,
2,IW WG SUPER DONUT,JAMAC/SB106,Condiments & Snacks,Condiments & Snacks,Condiments & Snacks,pastry,,,,,...,,,,whole grain rich,,,,,,
3,IW WG CORN MUFFINS,JAMAC/PR136,Condiments & Snacks,Condiments & Snacks,Condiments & Snacks,muffin,corn,,,,...,,,,whole grain rich,,,,,ss,
4,IW WG BLUEBERRY MUFFIN,JAMAC/PR134,Condiments & Snacks,Condiments & Snacks,Condiments & Snacks,muffin,blueberry,,,,...,,,,whole grain rich,,,,,ss,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,RED & GREEN PEPPER STRIPS,JAMAC/V510,Produce,Vegetables,Vegetables,pepper,bell,,,,...,,,,,,,,,,
103,PEAS & CARROTS,JAMAC/V845,Produce,Vegetables,Vegetables,vegetable,carrot,blend,,,...,,,,,,,,,,
104,IQF SLICED ZUCCHINI,JAMAC/V432,Produce,Vegetables,Vegetables,squash,zucchini,,,,...,,,,,,,,,,
105,PEAS & CARROTS,JAMAC/V946,Produce,Vegetables,Vegetables,vegetable,carrot,blend,,,...,,,,,,,,,,
