In [5]:
import os
import json

prediction_folder = 'predictions'
prediction_filename = 'prediction_all_bert_lstm.json'
prediction_filepath = os.path.join(prediction_folder, prediction_filename)

def load_prediction(filepath):
    instances = []
    with open(filepath, 'r') as jsonfile:
        lines = jsonfile.read().split("\n")[:-1]
        for line in lines:
            instance = json.loads(line)
            instances.append(instance)
    return instances

# load the instances with predictions
instances = load_prediction(prediction_filepath)

# get the instances whose predictions are not correct
errors = [ins for ins in instances if ins['adjudicated_label'] != ins['pred_label']]

## generate html file to display 100 sampled errors

In [17]:
import random

def get_html(errors, num_samples=100, html_filename='errors.html'):
    sampled_errors = random.sample(errors, num_samples)
    with open(html_filename, 'w') as htmlfile:
        htmlfile.write("<!DOCTYPE html>\n<html>\n<head>\n<title>Error Example</title>\n</head>\n<body>\n")
        htmlfile.write("""<table style=\"margin-left: auto; margin-right: auto; border: 1px solid black; line-height: 1.0em; width: 1200\">\n""")
        image_css = """style=\"display: block; margin-left: auto; margin-right: auto; max-width: 450px; max-height: 650px;\""""

        for error in sampled_errors:
            
            ###########################
            ### 3 context tweets before
            ###########################
            
            htmlfile.write("<tr>\n")

            # context8_url
            htmlfile.write("<td style=\"width:400; border-bottom: 1px solid black;\">\n")
            htmlfile.write(f"<img src=\"{error['context8_url']}\" {image_css}>\n")
            htmlfile.write("</td>\n")

            # context9_url
            htmlfile.write("<td style=\"width:400; border-bottom: 1px solid black;\">\n")
            htmlfile.write(f"<img src=\"{error['context9_url']}\" {image_css}>\n")
            htmlfile.write("</td>\n")

            # context10_url
            htmlfile.write("<td style=\"width:400; border-bottom: 1px solid black;\">\n")
            htmlfile.write(f"<img src=\"{error['context10_url']}\" {image_css}>\n")
            htmlfile.write("</td>\n")

            htmlfile.write("</tr>\n")
            
            ###########################
            ### anchor tweet
            ###########################
            
            htmlfile.write("<tr>\n")
            htmlfile.write("<td style=\"width:400; border-bottom: 1px solid black;\"></td>\n")

            # anchor_url
            htmlfile.write("<td style=\"width:400; border-bottom: 1px solid black;\">\n")
            htmlfile.write(f"<img src=\"{error['anchor_url']}\" {image_css}>\n")
            htmlfile.write("</td>\n")

            htmlfile.write("<td style=\"width:400; border-bottom: 1px solid black;\"></td>\n")
            htmlfile.write("</tr>\n")
            
            ###########################
            ### 3 context tweets after
            ###########################
            
            htmlfile.write("<tr>\n")

            # context11_url
            htmlfile.write("<td style=\"width:400; border-bottom: 1px solid black;\">\n")
            htmlfile.write(f"<img src=\"{error['context11_url']}\" {image_css}>\n")
            htmlfile.write("</td>\n")

            # context12_url
            htmlfile.write("<td style=\"width:400; border-bottom: 1px solid black;\">\n")
            htmlfile.write(f"<img src=\"{error['context12_url']}\" {image_css}>\n")
            htmlfile.write("</td>\n")

            # context13_url
            htmlfile.write("<td style=\"width:400; border-bottom: 1px solid black;\">\n")
            htmlfile.write(f"<img src=\"{error['context13_url']}\" {image_css}>\n")
            htmlfile.write("</td>\n")

            htmlfile.write("</tr>\n")

    print("Done")
            
get_html(errors)

Done


In [3]:
error_text_filename = 'errors_text'
with open(error_text_filename, 'r', encoding='utf-8') as txtfile:
    print(txtfile.read())

############################################################
############################################################
-------------- Tweet 1 --------------
Tyreek Hill Named In Child Abuse Police Report, Chiefs are Aware https://t.co/ZQ97uavEe6 https://t.co/ABcp630FJT
-------------- Tweet 2 --------------
Baltimore Ravens CB Earl Thomas Can Have My Jersey Number ... For Free!!! Breaking News Earl Thomas was facing a MAJOR issue in his first days as a Baltimore Raven ... his beloved #29 jersey was... https://t.co/1jEVO61hYn
-------------- Tweet 3 --------------
Mama June Arrested for Felony Drug Possession ... After Domestic Incident with Boyfriend EXCLUSIVE Mama June was arrested Wednesday after a domestic incident with her boyfriend... https://t.co/kRs9dv1lMy
-------------- Tweet 4 --------------
Cuba Gooding Jr. Partying at a Spring Break Pool Party in Miami Beach https://t.co/e2EcENIdhV https://t.co/qyD33OkdmR
-------------- Tweet 5 --------------
Cuba Gooding Jr. Full Party Mod