In [1]:
import os
import re
import pickle
import numpy as np
from dotenv import dotenv_values
from langchain import PromptTemplate, LLMChain, OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, HumanMessage, AIMessage

In [2]:
# Load env file with API KEY using full path
config = dotenv_values("../.env")
os.environ['OPENAI_API_KEY'] = config["OPENAI_API_KEY"]
OPENAI_API_KEY = config["OPENAI_API_KEY"]

In [None]:
labels_to_text = {
    "I dont'know": "I dont'know",
    "Date": "date",
    "Book/name": "name of book",
    "Language": "language",
    "Person/name": "name of person",
    "BookFormatType": "book format type",
    "OfferItemCondition": "offer item condition",
    "ItemAvailability": "item availability",
    "price": "price",
    "currency": "currency",
    "Review": "review",
    "Number": "number",
    "IdentifierAT": "identifier",
    "URL": "url",
    "Place/name": "name of place",
    "Event/name": "name of event",
    "EventStatusType": "event status",
    "EventAttendanceModeEnumeration": "event attendance mode",
    "telephone": "telephone",
    "email": "email",
    "category": "category",
    "Duration": "duration",
    "streetAddress": "street address",
    "addressLocality": "locality of address",
    "LocalBusiness/name": "name of local business",
    "priceRange": "price range",
    "openingHours": "opening hours",
    "faxNumber": "fax number",
    "Country": "country",
    "postalCode": "postal code",
    "addressRegion": "region of address",
    "Photograph": "photograph",
    "Movie/name": "name of movie",
    "Rating": "rating",
    "MusicArtistAT": "music artist",
    "MusicAlbum/name": "name of music album",
    "MusicRecording/name": "name of music recording",
    "weight": "weight",
    "GenderType": "gender type",
    "Product/name": "name of product",
    "DeliveryMethod": "delivery method",
    "Organization": "organization",
    "Book/description": "description of book",
    "CreativeWork": "creative work",
    "Boolean": "boolean",
    "DateTime": "date and time",
    "CreativeWork/name": "name of creative work",
    "Event/description": "description of event",
    "PostalAddress": "postal address",
    "Time": "time",
    "Hotel/name": "name of hotel",
    "CoordinateAT": "coordinate",
    "Hotel/description": "description of hotel",
    "LocationFeatureSpecification": "location feature",
    "paymentAccepted": "payment accepted",
    "Brand": "brand",
    "MonetaryAmount": "monetary amount",
    "JobPosting/name": "name of job posting",
    "OccupationalExperienceRequirements": "occupational experience requirements",
    "EducationalOccupationalCredential": "educational occupational credential",
    "workHours": "work hours",
    "CategoryCode": "category code",
    "JobPosting/description": "description of job posting",
    "DayOfWeek": "day of week",
    "Movie/description": "description of movie",
    "Museum/name": "name of museum",
    "ItemList": "item list",
    "Distance": "distance",
    "unitCode": "unit code",
    "ProductModel": "product model",
    "unitText": "unit text",
    "QuantitativeValue": "quantitative value",
    "Product/description": "description of product",
    "Recipe/name": "name of recipe",
    "Mass": "mass",
    "Energy": "energy",
    "RestrictedDiet": "restricted diet",
    "Recipe/description": "description of recipe",
    "Restaurant/name": "name of restaurant",
    "SportsEvent/name": "name of sports event",
    "SportsTeam": "sports team",
    "TVEpisode/name": "name of TV episode",
    "CreativeWorkSeries": "creative work series"
}

In [None]:
# Dictionary to map ChatGPT answers to label set: synonyms can be added here
text_to_label = {
    "i don't know": "-",
    "date": "Date",
    "name of book": "Book/name",
    "language": "Language",
    "name of person": "Person/name",
    "book format type": "BookFormatType",
    "offer item condition": "OfferItemCondition",
    "item availability": "ItemAvailability",
    "price": "price",
    "currency": "currency",
    "review": "Review",
    "number": "Number",
    "identifier": "IdentifierAT",
    "url": "URL",
    "name of place": "Place/name",
    "name of event": "Event/name",
    "event status": "EventStatusType",
    "event attendance mode": "EventAttendanceModeEnumeration",
    "telephone": "telephone",
    "email": "email",
    "category": "category",
    "duration": "Duration",
    "street address": "streetAddress",
    "locality of address": "addressLocality",
    "name of local business": "LocalBusiness/name",
    "price range": "priceRange",
    "opening hours": "openingHours",
    "fax number": "faxNumber",
    "country": "Country",
    "postal code": "postalCode",
    "region of address": "addressRegion",
    "photograph": "Photograph",
    "name of movie": "Movie/name",
    "rating": "Rating",
    "music artist": "MusicArtistAT",
    "name of music album": "MusicAlbum/name",
    "name of music recording": "MusicRecording/name",
    "weight": "weight",
    "gender type": "GenderType",
    "name of product": "Product/name",
    "delivery method": "DeliveryMethod",
    "organization": "Organization",
    "description of book": "Book/description",
    "creative work": "CreativeWork",
    "boolean": "Boolean",
    "date and time": "DateTime",
    "name of creative work": "CreativeWork/name",
    "description of event": "Event/description",
    "postal address": "PostalAddress",
    "time": "Time",
    "name of hotel": "Hotel/name",
    "coordinate": "CoordinateAT",
    "description of hotel": "Hotel/description",
    "location feature": "LocationFeatureSpecification",
    "payment accepted": "paymentAccepted",
    "brand": "Brand",
    "monetary amount": "MonetaryAmount",
    "name of job posting": "JobPosting/name",
    "occupational experience requirements": "OccupationalExperienceRequirements",
    "educational occupational credential": "EducationalOccupationalCredential",
    "work hours": "workHours",
    "category code": "CategoryCode",
    "description of job posting": "JobPosting/description",
    "day of week": "DayOfWeek",
    "description of movie": "Movie/description",
    "name of museum": "Museum/name",
    "item list": "ItemList",
    "distance": "Distance",
    "unit code": "unitCode",
    "product model": "ProductModel",
    "unit text": "unitText",
    "quantitative value": "QuantitativeValue",
    "description of product": "Product/description",
    "name of recipe": "Recipe/name",
    "mass": "Mass",
    "energy": "Energy",
    "restricted diet": "RestrictedDiet",
    "description of recipe": "Recipe/description",
    "name of restaurant": "Restaurant/name",
    "name of sports event": "SportsEvent/name",
    "sports team": "SportsTeam",
    "name of TV episode": "TVEpisode/name",
    "creative work series": "CreativeWorkSeries"
}

## Load test (and training) set

In [None]:
with open('data/cta-train-table-wise.pkl', "rb") as f:
    train = pickle.load(f)
with open('data/cta-test-table-wise.pkl', "rb") as f:
    test = pickle.load(f)

examples = [example[1] for example in test ]
labels = [l for example in test for l in example[2]]

train_examples = [ example[1] for example in train ]
train_example_labels = []
for table in train:
    col_labels = """"""
    for i, l in enumerate(table[2]):
        col_labels += f"""Column {i+1}: {labels_to_text[l]}\n"""
    train_example_labels.append(col_labels.strip())

In [None]:
chat = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model='gpt-3.5-turbo-0301')

## Choose setup: zero-shot, one-shot or five-shot

In [None]:
#Zero-shot table + instructions + roles
preds = []
for example in examples:
    messages = []
    
    #Task and instructions
    messages.append(SystemMessage(content="Your task is to classify the columns of a given table with only one of the following classes that are separated with comma: description of event, description of restaurant, locality of address, postal code, region of address, country, price range, telephone, date, name of restaurant, payment accepted, day of week, review, organization, date and time, coordinate, name of event, event attendance mode, event status, currency, time, description of hotel, name of hotel, location feature, rating, fax number, email, photograph, name of music recording, music artist, name of album, duration."))    
    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a class that best represents the meaning of all cells in the column. 4. Answer with the selected class for each columns with the format Column1: class."))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
import random
#One-shot table + instructions + roles
preds = []
for example in examples:
    messages = []
    
    #Task and instructions
    messages.append(SystemMessage(content="Your task is to classify the columns of a given table with only one of the following classes that are separated with comma: description of event, description of restaurant, locality of address, postal code, region of address, country, price range, telephone, date, name of restaurant, payment accepted, day of week, review, organization, date and time, coordinate, name of event, event attendance mode, event status, currency, time, description of hotel, name of hotel, location feature, rating, fax number, email, photograph, name of music recording, music artist, name of album, duration."))    
    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a class that best represents the meaning of all cells in the column. 4. Answer with the selected class for each columns with the format Column1: class."))
    
    #Add one random table demonstration
    index = random.randint(0, len(train_examples)-1)
    messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
    messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
import random
#Five-shot table + instructions + roles
preds = []
for example in examples:
    messages = []
    
    #Task and instructions
    messages.append(SystemMessage(content="Your task is to classify the columns of a given table with only one of the following classes that are separated with comma: description of event, description of restaurant, locality of address, postal code, region of address, country, price range, telephone, date, name of restaurant, payment accepted, day of week, review, organization, date and time, coordinate, name of event, event attendance mode, event status, currency, time, description of hotel, name of hotel, location feature, rating, fax number, email, photograph, name of music recording, music artist, name of album, duration."))    
    messages.append(SystemMessage(content="Your instructions are: 1. Look at the input given to you and make a table out of it. 2. Look at the cell values in detail. 3. For each column, select a class that best represents the meaning of all cells in the column. 4. Answer with the selected class for each columns with the format Column1: class."))
    
    #Add five random table demonstrations
    for i in range(0,5):
        index = random.randint(0, len(train_examples)-1)
        messages.append(HumanMessage(content=f"Classify these table columns: {train_examples[index]}"))
        messages.append(AIMessage(content=f"{train_example_labels[index]}"))
    
    messages.append(HumanMessage(content=f"Classify these table columns: {example}"))
    res = chat(messages)
    preds.append(res.content)

In [None]:
preds[:10]

In [None]:
#Save predictions in a file:
file_name='predictions/chat-table-zero-shot.pkl'
f = open(file_name,'wb')
pickle.dump(preds,f)
f.close()

## Evaluation

In [None]:
# Map predictions to label space
predictions = []
i=0
for j, table_preds in enumerate(preds):
    # How many columns does the table have? : To control cases when less/more classes are returned
    table_number = len(test[j][2])
    
    if "Class:" in table_preds:
        table_preds = table_preds.split("Class:")[1]
      
    #Break predictions into either \n or ,
    if ":" in table_preds or "-" in table_preds:
        if ":" in table_preds:
            separator = ":"
            start = 1
            end = table_number+1
        else:
            separator = "-"  
            start = 1
            end = table_number+1
    else:
        separator = ","
        start = 0
        end = table_number
        
    col_preds = table_preds.split(separator)[start:end]
    
    for pred in col_preds:
        i+=1
        
        # Remove break lines
        if "\n" in pred:
            pred = pred.split('\n')[0].strip()
        # Remove commas
        if "," in pred:
            pred = pred.split(",")[0].strip()
        # Remove paranthesis
        if '(' in pred:
            pred = pred.split("(")[0].strip()
        #Remove points
        if '.' in pred:
            pred = pred.split(".")[0].strip()
        # Lower-case prediction
        pred = pred.strip().lower()
        
        if pred in text_to_label:
            predictions.append(text_to_label[pred])
        else:
            print(f"For test example {i} out of label space prediction: {pred}")
            predictions.append('-')
        
    # If more/less predictions for table
    if len(col_preds) < table_number:
        for m in range(0, table_number-len(col_preds)):
            predictions.append('-')
            i+=1

### Calculate Precision, Recall, Macro-F1 and Micro-F1

In [None]:
def calculate_f1_scores(y_tests, y_preds, num_classes):
    
    y_tests = [types.index(y) for y in y_tests]
    y_preds = [types.index(y) for y in y_preds]
    
    #Confusion matrix
    cm = np.zeros(shape=(num_classes,num_classes))
    
    for i in range(len(y_tests)):
        cm[y_preds[i]][y_tests[i]] += 1
        
    report = {}
    
    for j in range(len(cm[0])):
        report[j] = {}
        report[j]['FN'] = 0
        report[j]['FP'] = 0
        report[j]['TP'] = cm[j][j]

        for i in range(len(cm)):
            if i != j:
                report[j]['FN'] += cm[i][j]
        for k in range(len(cm[0])):
            if k != j:
                report[j]['FP'] += cm[j][k]

        precision = report[j]['TP'] / (report[j]['TP'] + report[j]['FP'])
        recall = report[j]['TP'] / (report[j]['TP'] + report[j]['FN'])
        f1 = 2*precision*recall / (precision + recall)
        
        if np.isnan(f1):
            f1 = 0
        if np.isnan(precision):
            f1 = 0
        if np.isnan(recall):
            f1 = 0

        report[j]['p'] =  precision
        report[j]['r'] =  recall
        report[j]['f1'] = f1
    
    all_fn = 0
    all_tp = 0
    all_fp = 0

    for r in report:
        if r != num_classes-1:
            all_fn += report[r]['FN']
            all_tp += report[r]['TP']
            all_fp += report[r]['FP']
        
    class_f1s = [ report[class_]['f1'] for class_ in report]
    class_p = [ 0 if np.isnan(report[class_]['p']) else report[class_]['p'] for class_ in report]
    class_r = [ 0 if np.isnan(report[class_]['r']) else report[class_]['r'] for class_ in report]
    macro_f1 = sum(class_f1s[:-1]) / (num_classes-1)
    
    p =  sum(class_p[:-1]) / (num_classes-1)
    r =  sum(class_r[:-1]) / (num_classes-1)
    micro_f1 = all_tp / ( all_tp + (1/2 * (all_fp + all_fn) )) 
    
    per_class_eval = {}
    for index, t in enumerate(types[:-1]):
        per_class_eval[t] = {"Precision":class_p[index], "Recall": class_r[index], "F1": class_f1s[index]}
    
    evaluation = {
        "Micro-F1": micro_f1,
        "Macro-F1": macro_f1,
        "Precision": p,
        "Recall": r
    }
    
    return [ evaluation, per_class_eval]

In [None]:
types = list(set(labels))
types = types + ["-"]
evaluation, per_class_eval = calculate_f1_scores(labels, predictions, 33)

In [None]:
evaluation

In [None]:
per_class_eval

## Error Analysis

In [None]:
# "-" means the model replied with out of label or with I don't know
errors = 0
for i in range(len(predictions)):
    if predictions[i] != labels[i]:
        errors += 1
        print(f"Predicted as {predictions[i]} when it was {labels[i]}")
errors

### Re-load previous preds files

In [None]:
with open('predictions/chat-table-five-shot.pkl', "rb") as f:
    preds = pickle.load(f)