In [90]:
import json
import pandas as pd
from os import listdir
from os.path import isfile, join

In [108]:
# Discard duplicate evaluations when the second does not provide more information.
def createEvaluationsList(resultsDir):
    resultFiles = [join(resultsDir,file) for file in listdir(resultsDir) if isfile(join(resultsDir, file))]
    
    evaluations = {}
    
    for fileName in resultFiles:
        
        file = open(join(resultsDir, fileName), 'r')
        firstLine = file.readline()
        
        evaluationId = firstLine.split('#')[1]
        numLines = sum(1 for line in file)
        
        if (evaluationId in evaluations):
            if (numLines >= evaluations[evaluationId]["lines"]):
            
                resultFiles.remove(evaluations[evaluationId]["file"])
                evaluations[evaluationId]["file"]  = fileName
                evaluations[evaluationId]["lines"] = numLines
            
            else:
                resultFiles.remove(fileName);
                
        else:            
            evaluations[evaluationId] = {"file": fileName, "lines": numLines}
        
        file.close()
        
    return resultFiles

In [92]:
# Read data of evaluatin into a dataframe.
def readEvaluationData(evaluationFileName):
            
    with open(join("../annotation/claim-justification_annotator/public/",evaluationFileName), 'r') as evaluationJson:
        evaluationData = json.loads(evaluationJson.read())
    
    dataFrame = pd.read_json(join("../annotation/claim-justification_annotator/public/",evaluationFileName))

    return dataFrame

In [93]:
# Find json_file_id when given simple id
def findJsonFileId(claimId,evaluationData):
    
    valueRow = evaluationData.loc[evaluationData['id'] == claimId]
    return valueRow["json_file_id"].item()

In [129]:
# Find claim and justification given json_file_id
def findClamJustification(jsonFileId, evaluationData):
    
    valueRow = evaluationData.loc[evaluationData['json_file_id'] == jsonFileId]
    return valueRow["claim"].item(), valueRow["justification"].item()

In [147]:
def aggregateAnnotationResults(evaluationsList):
    
    # Keep all annotation results in the results object.    
    results = {}
    
    # Keep basic information (claim, justification)
    basicInfo = {}

    # For each kept evaluation.    
    for file in evaluationsList:
        resultsFile = open(file, 'r')
        resultsLines = resultsFile.readlines()

        currEvaluation = resultsLines[0].split('#')[2].replace("\n",'')
        evaluationData = readEvaluationData(currEvaluation)
        
        # For each data line.        
        for line in resultsLines[2:]:

            dataList = line.split(',')

            for i in range (0,len(dataList)):
                dataList[i] = dataList[i].replace('"', '').strip()

            resultId = dataList[0]
            jsonFileId = findJsonFileId(int(resultId),evaluationData)

            # Check if claim has been ecountered in the past.          
            if jsonFileId in results:
                result = results[jsonFileId]
                basic  = basicInfo[jsonFileId]
            else:
                result = {"distortion": 0,
                          "emphasis": 0,
                          "unfounded": 0,
                          "unclear": 0}
                
                basic = {"claim": None,
                         "justification": None}

            # Find assigned label and check for anomalies occured during the process.            
            annotationCounter = 0
            if (dataList[1] == "true"):
                label = "distortion" 
                annotationCounter += 1
            elif (dataList[2] == "true"):
                label = "emphasis"
                annotationCounter += 1
            elif (dataList[3] == "true"):
                label = "unfounded"
                annotationCounter += 1
            elif (dataList[4] == "true"):
                label = "unclear"
                annotationCounter += 1
            else:
                print("Error. Claim is Unlabeld.")
                continue
            
            if (annotationCounter > 1):
                print("Error. Claim should be assigned only one label.")
                continue
                
            result[label] = result[label] + 1
            basic["claim"], basic["justification"] = findClamJustification(jsonFileId,evaluationData)
            
            results[jsonFileId]   = result
            basicInfo[jsonFileId] = basic
    
    return results, basicInfo

In [148]:
evaluationsList = createEvaluationsList("../data")
annotationResults, basicInfo = aggregateAnnotationResults(evaluationsList)

In [150]:
uniqueId = "json_file_id"
claim = "claim"
just  = "justification"
label = "justification_label"



dataList = []
for key in annotationResults:
    
    majorityLabel = max(annotationResults[key], key=annotationResults[key].get)
    dataList.append({uniqueId: key, 
                     claim: basicInfo[key][claim],
                     just:  basicInfo[key][just],
                     label: majorityLabel})


resultsColumns = [uniqueId, claim, just, label]
data = pd.DataFrame(dataList,columns=resultsColumns)
display(data)

Unnamed: 0,json_file_id,claim,justification,justification_label
0,12134.json,We have less Americans working now than in the...,"However, Hartzler was talking about the entire...",distortion
1,238.json,"When Obama was sworn into office, he DID NOT u...",Ellison used a Koran that once belonged to Tho...,emphasis
2,7891.json,Having organizations parading as being social ...,"However, we have two professors who say the la...",unfounded
3,8169.json,Nearly half of Oregons children are poor.,"In fact, if you use federal definitions for po...",unclear
4,929.json,On attacks by Republicans that various program...,Obama's point is that some perspective is in o...,distortion
5,9416.json,When armed civilians stop mass shootings with ...,Rubens said when armed civilians stop mass sho...,emphasis
6,1122.json,The health care reform plan would set limits s...,There is no such practice in the comparative e...,unfounded
7,1880.json,Bill White has a long history of trying to lim...,Did White's positions which he hasn't backed ...,unclear
8,12803.json,John McCains chief economic adviser during the...,"Kaine said ""John McCains chief economic advise...",distortion
9,5409.json,"21,000 Wisconsin residents got jobs in 2011, b...","Vinehout said she ""did a little bit of math"" w...",emphasis
